From b8ead3dd9fc6337331ce7a79e2cef90adafc0e52 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 12:20:57 -0400
Subject: [PATCH 01/20] Updated to the latest pNFS tag:
 pnfs-all-2.6.35-2010-08-19

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 config-generic                       |    12 +
 kernel.spec                          |    15 +-
 linux-2.6-pnfs-compile.patch         |    13 +
 linux-2.6.35-inline.patch            |    11 +
 nfs-35-fc.patch                      |  7235 ++++++
 nfsd-35-fc.patch                     |  1808 ++
 pnfs-all-2.6.35-2010-08-19-f13.patch | 31788 +++++++++++++++++++++++++
 7 files changed, 40880 insertions(+), 2 deletions(-)
 create mode 100644 linux-2.6-pnfs-compile.patch
 create mode 100644 linux-2.6.35-inline.patch
 create mode 100644 nfs-35-fc.patch
 create mode 100644 nfsd-35-fc.patch
 create mode 100644 pnfs-all-2.6.35-2010-08-19-f13.patch

diff --git a/config-generic b/config-generic
index 3b23aabcc..76379c8eb 100644
--- a/config-generic
+++ b/config-generic
@@ -3322,6 +3322,18 @@ CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFS_FSCACHE=y
+# Enable pNFS
+CONFIG_PNFS=y
+CONFIG_PNFSD=y
+CONFIG_PNFSD_LOCAL_EXPORT=y
+CONFIG_SPNFS=y
+CONFIG_SPNFS_LAYOUTSEGMENTS=y
+CONFIG_SPNFS_BLOCK=y
+CONFIG_PNFS_OBJLAYOUT=m
+CONFIG_PNFS_BLOCK=m
+CONFIG_PNFS_PANLAYOUT=m
+CONFIG_PNFS_FILE_LAYOUT=m
+#
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
diff --git a/kernel.spec b/kernel.spec
index 3a5dbce65..70b6f45c8 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-# % define buildid .local
+%define buildid .pnfs_all_2.6.35_2010_08_19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
@@ -107,7 +107,7 @@ Summary: The Linux kernel
 # kernel-headers
 %define with_headers   %{?_without_headers:   0} %{?!_without_headers:   1}
 # kernel-firmware
-%define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     0}
+%define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     1}
 # tools/perf
 %define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  1}
 # perf noarch subpkg
@@ -764,6 +764,12 @@ Patch12440: direct-io-move-aio_complete-into-end_io.patch
 Patch12450: ext4-move-aio-completion-after-unwritten-extent-conversion.patch
 Patch12460: xfs-move-aio-completion-after-unwritten-extent-conversion.patch
 
+Patch30000: nfs-35-fc.patch
+Patch30001: nfsd-35-fc.patch
+Patch30002: pnfs-all-2.6.35-2010-08-19-f13.patch
+Patch30003: linux-2.6-pnfs-compile.patch
+Patch30004: linux-2.6.35-inline.patch
+
 %endif
 
 BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
@@ -1419,6 +1425,11 @@ ApplyPatch direct-io-move-aio_complete-into-end_io.patch
 ApplyPatch ext4-move-aio-completion-after-unwritten-extent-conversion.patch
 ApplyPatch xfs-move-aio-completion-after-unwritten-extent-conversion.patch
 
+ApplyPatch nfs-35-fc.patch  
+ApplyPatch nfsd-35-fc.patch  
+ApplyPatch pnfs-all-2.6.35-2010-08-19-f13.patch
+ApplyPatch linux-2.6-pnfs-compile.patch
+ApplyPatch linux-2.6.35-inline.patch
 # END OF PATCH APPLICATIONS
 
 %endif
diff --git a/linux-2.6-pnfs-compile.patch b/linux-2.6-pnfs-compile.patch
new file mode 100644
index 000000000..7c8cc4248
--- /dev/null
+++ b/linux-2.6-pnfs-compile.patch
@@ -0,0 +1,13 @@
+diff -up linux-2.6.32.x86_64/fs/nfs/objlayout/pnfs_osd_xdr.h.orig linux-2.6.32.x86_64/fs/nfs/objlayout/pnfs_osd_xdr.h
+diff -up linux-2.6.32.x86_64/include/net/inet_connection_sock.h.orig linux-2.6.32.x86_64/include/net/inet_connection_sock.h
+--- linux-2.6.32.x86_64/include/net/inet_connection_sock.h.orig	2009-12-02 22:51:21.000000000 -0500
++++ linux-2.6.32.x86_64/include/net/inet_connection_sock.h	2010-04-21 14:26:24.475659551 -0400
+@@ -23,7 +23,7 @@
+ #include <net/inet_sock.h>
+ #include <net/request_sock.h>
+ 
+-#define INET_CSK_DEBUG 1
++//#define INET_CSK_DEBUG 1
+ 
+ /* Cancel timers, when they are not required. */
+ #undef INET_CSK_CLEAR_TIMERS
diff --git a/linux-2.6.35-inline.patch b/linux-2.6.35-inline.patch
new file mode 100644
index 000000000..c56d8da5e
--- /dev/null
+++ b/linux-2.6.35-inline.patch
@@ -0,0 +1,11 @@
+diff -up linux-2.6.34.noarch/arch/x86/Makefile.orig linux-2.6.34.noarch/arch/x86/Makefile
+--- linux-2.6.34.noarch/arch/x86/Makefile.orig	2010-07-01 13:33:21.859627499 -0400
++++ linux-2.6.34.noarch/arch/x86/Makefile	2010-07-01 13:36:26.751576450 -0400
+@@ -81,6 +81,7 @@ ifdef CONFIG_CC_STACKPROTECTOR
+                 $(warning stack protector enabled but no compiler support)
+         endif
+ endif
++KBUILD_CFLAGS += -fno-inline-functions-called-once
+ 
+ # Don't unroll struct assignments with kmemcheck enabled
+ ifeq ($(CONFIG_KMEMCHECK),y)
diff --git a/nfs-35-fc.patch b/nfs-35-fc.patch
new file mode 100644
index 000000000..c3ad25f65
--- /dev/null
+++ b/nfs-35-fc.patch
@@ -0,0 +1,7235 @@
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 11:01:00.352376393 -0400
+@@ -934,7 +934,6 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	}
+ 
+ 	fsinfo.fattr = fattr;
+-	nfs_fattr_init(fattr);
+ 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+ 	if (error < 0)
+ 		goto out_error;
+@@ -1047,13 +1046,18 @@ struct nfs_server *nfs_create_server(con
+ 				     struct nfs_fh *mntfh)
+ {
+ 	struct nfs_server *server;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	int error;
+ 
+ 	server = nfs_alloc_server();
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	/* Get a client representation */
+ 	error = nfs_init_server(server, data);
+ 	if (error < 0)
+@@ -1064,7 +1068,7 @@ struct nfs_server *nfs_create_server(con
+ 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ 
+ 	/* Probe the root fh to retrieve its FSID */
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 	if (server->nfs_client->rpc_ops->version == 3) {
+@@ -1077,14 +1081,14 @@ struct nfs_server *nfs_create_server(con
+ 			server->namelen = NFS2_MAXNAMLEN;
+ 	}
+ 
+-	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+-		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
++	if (!(fattr->valid & NFS_ATTR_FATTR)) {
++		error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
+ 		if (error < 0) {
+ 			dprintk("nfs_create_server: getattr error = %d\n", -error);
+ 			goto error;
+ 		}
+ 	}
+-	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
++	memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+ 
+ 	dprintk("Server FSID: %llx:%llx\n",
+ 		(unsigned long long) server->fsid.major,
+@@ -1096,9 +1100,11 @@ struct nfs_server *nfs_create_server(con
+ 	spin_unlock(&nfs_client_lock);
+ 
+ 	server->mount_time = jiffies;
++	nfs_free_fattr(fattr);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	return ERR_PTR(error);
+ }
+@@ -1340,7 +1346,7 @@ error:
+ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
+ 				      struct nfs_fh *mntfh)
+ {
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	struct nfs_server *server;
+ 	int error;
+ 
+@@ -1350,6 +1356,11 @@ struct nfs_server *nfs4_create_server(co
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	/* set up the general RPC client */
+ 	error = nfs4_init_server(server, data);
+ 	if (error < 0)
+@@ -1364,7 +1375,7 @@ struct nfs_server *nfs4_create_server(co
+ 		goto error;
+ 
+ 	/* Probe the root fh to retrieve its FSID */
+-	error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
++	error = nfs4_get_rootfh(server, mntfh);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1375,7 +1386,7 @@ struct nfs_server *nfs4_create_server(co
+ 
+ 	nfs4_session_set_rwsize(server);
+ 
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1389,9 +1400,11 @@ struct nfs_server *nfs4_create_server(co
+ 
+ 	server->mount_time = jiffies;
+ 	dprintk("<-- nfs4_create_server() = %p\n", server);
++	nfs_free_fattr(fattr);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs4_create_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+@@ -1405,7 +1418,7 @@ struct nfs_server *nfs4_create_referral_
+ {
+ 	struct nfs_client *parent_client;
+ 	struct nfs_server *server, *parent_server;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	int error;
+ 
+ 	dprintk("--> nfs4_create_referral_server()\n");
+@@ -1414,6 +1427,11 @@ struct nfs_server *nfs4_create_referral_
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	parent_server = NFS_SB(data->sb);
+ 	parent_client = parent_server->nfs_client;
+ 
+@@ -1443,12 +1461,12 @@ struct nfs_server *nfs4_create_referral_
+ 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ 
+ 	/* Probe the root fh to retrieve its FSID and filehandle */
+-	error = nfs4_path_walk(server, mntfh, data->mnt_path);
++	error = nfs4_get_rootfh(server, mntfh);
+ 	if (error < 0)
+ 		goto error;
+ 
+ 	/* probe the filesystem info for this server filesystem */
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1466,10 +1484,12 @@ struct nfs_server *nfs4_create_referral_
+ 
+ 	server->mount_time = jiffies;
+ 
++	nfs_free_fattr(fattr);
+ 	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+@@ -1485,7 +1505,7 @@ struct nfs_server *nfs_clone_server(stru
+ 				    struct nfs_fattr *fattr)
+ {
+ 	struct nfs_server *server;
+-	struct nfs_fattr fattr_fsinfo;
++	struct nfs_fattr *fattr_fsinfo;
+ 	int error;
+ 
+ 	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+@@ -1496,6 +1516,11 @@ struct nfs_server *nfs_clone_server(stru
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr_fsinfo = nfs_alloc_fattr();
++	if (fattr_fsinfo == NULL)
++		goto out_free_server;
++
+ 	/* Copy data from the source */
+ 	server->nfs_client = source->nfs_client;
+ 	atomic_inc(&server->nfs_client->cl_count);
+@@ -1512,7 +1537,7 @@ struct nfs_server *nfs_clone_server(stru
+ 		nfs_init_server_aclclient(server);
+ 
+ 	/* probe the filesystem info for this server filesystem */
+-	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
++	error = nfs_probe_fsinfo(server, fh, fattr_fsinfo);
+ 	if (error < 0)
+ 		goto out_free_server;
+ 
+@@ -1534,10 +1559,12 @@ struct nfs_server *nfs_clone_server(stru
+ 
+ 	server->mount_time = jiffies;
+ 
++	nfs_free_fattr(fattr_fsinfo);
+ 	dprintk("<-- nfs_clone_server() = %p\n", server);
+ 	return server;
+ 
+ out_free_server:
++	nfs_free_fattr(fattr_fsinfo);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs_clone_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 11:01:00.352376393 -0400
+@@ -213,7 +213,7 @@ int nfs_inode_set_delegation(struct inod
+ 	struct nfs_delegation *freeme = NULL;
+ 	int status = 0;
+ 
+-	delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
++	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+ 	memcpy(delegation->stateid.data, res->delegation.data,
+diff -up linux-2.6.34.noarch/fs/nfs/dir.c.orig linux-2.6.34.noarch/fs/nfs/dir.c
+--- linux-2.6.34.noarch/fs/nfs/dir.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/dir.c	2010-08-23 11:01:00.353376419 -0400
+@@ -530,9 +530,7 @@ static int nfs_readdir(struct file *filp
+ 	nfs_readdir_descriptor_t my_desc,
+ 			*desc = &my_desc;
+ 	struct nfs_entry my_entry;
+-	struct nfs_fh	 fh;
+-	struct nfs_fattr fattr;
+-	long		res;
++	int res = -ENOMEM;
+ 
+ 	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
+ 			dentry->d_parent->d_name.name, dentry->d_name.name,
+@@ -554,9 +552,11 @@ static int nfs_readdir(struct file *filp
+ 
+ 	my_entry.cookie = my_entry.prev_cookie = 0;
+ 	my_entry.eof = 0;
+-	my_entry.fh = &fh;
+-	my_entry.fattr = &fattr;
+-	nfs_fattr_init(&fattr);
++	my_entry.fh = nfs_alloc_fhandle();
++	my_entry.fattr = nfs_alloc_fattr();
++	if (my_entry.fh == NULL || my_entry.fattr == NULL)
++		goto out_alloc_failed;
++
+ 	desc->entry = &my_entry;
+ 
+ 	nfs_block_sillyrename(dentry);
+@@ -598,7 +598,10 @@ out:
+ 	nfs_unblock_sillyrename(dentry);
+ 	if (res > 0)
+ 		res = 0;
+-	dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n",
++out_alloc_failed:
++	nfs_free_fattr(my_entry.fattr);
++	nfs_free_fhandle(my_entry.fh);
++	dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
+ 			dentry->d_parent->d_name.name, dentry->d_name.name,
+ 			res);
+ 	return res;
+@@ -776,9 +779,9 @@ static int nfs_lookup_revalidate(struct 
+ 	struct inode *dir;
+ 	struct inode *inode;
+ 	struct dentry *parent;
++	struct nfs_fh *fhandle = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int error;
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+ 
+ 	parent = dget_parent(dentry);
+ 	dir = parent->d_inode;
+@@ -811,14 +814,22 @@ static int nfs_lookup_revalidate(struct 
+ 	if (NFS_STALE(inode))
+ 		goto out_bad;
+ 
+-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
++	error = -ENOMEM;
++	fhandle = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fhandle == NULL || fattr == NULL)
++		goto out_error;
++
++	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ 	if (error)
+ 		goto out_bad;
+-	if (nfs_compare_fh(NFS_FH(inode), &fhandle))
++	if (nfs_compare_fh(NFS_FH(inode), fhandle))
+ 		goto out_bad;
+-	if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
++	if ((error = nfs_refresh_inode(inode, fattr)) != 0)
+ 		goto out_bad;
+ 
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ out_set_verifier:
+ 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+  out_valid:
+@@ -842,11 +853,21 @@ out_zap_parent:
+ 		shrink_dcache_parent(dentry);
+ 	}
+ 	d_drop(dentry);
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ 	dput(parent);
+ 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ 			__func__, dentry->d_parent->d_name.name,
+ 			dentry->d_name.name);
+ 	return 0;
++out_error:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
++	dput(parent);
++	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
++			__func__, dentry->d_parent->d_name.name,
++			dentry->d_name.name, error);
++	return error;
+ }
+ 
+ /*
+@@ -911,9 +932,9 @@ static struct dentry *nfs_lookup(struct 
+ 	struct dentry *res;
+ 	struct dentry *parent;
+ 	struct inode *inode = NULL;
++	struct nfs_fh *fhandle = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int error;
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+ 
+ 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
+ 		dentry->d_parent->d_name.name, dentry->d_name.name);
+@@ -923,7 +944,6 @@ static struct dentry *nfs_lookup(struct 
+ 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ 		goto out;
+ 
+-	res = ERR_PTR(-ENOMEM);
+ 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ 
+ 	/*
+@@ -936,17 +956,23 @@ static struct dentry *nfs_lookup(struct 
+ 		goto out;
+ 	}
+ 
++	res = ERR_PTR(-ENOMEM);
++	fhandle = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fhandle == NULL || fattr == NULL)
++		goto out;
++
+ 	parent = dentry->d_parent;
+ 	/* Protect against concurrent sillydeletes */
+ 	nfs_block_sillyrename(parent);
+-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
++	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ 	if (error == -ENOENT)
+ 		goto no_entry;
+ 	if (error < 0) {
+ 		res = ERR_PTR(error);
+ 		goto out_unblock_sillyrename;
+ 	}
+-	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
++	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+ 	res = (struct dentry *)inode;
+ 	if (IS_ERR(res))
+ 		goto out_unblock_sillyrename;
+@@ -962,6 +988,8 @@ no_entry:
+ out_unblock_sillyrename:
+ 	nfs_unblock_sillyrename(parent);
+ out:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ 	return res;
+ }
+ 
+@@ -1669,28 +1697,33 @@ static void nfs_access_free_entry(struct
+ 	smp_mb__after_atomic_dec();
+ }
+ 
++static void nfs_access_free_list(struct list_head *head)
++{
++	struct nfs_access_entry *cache;
++
++	while (!list_empty(head)) {
++		cache = list_entry(head->next, struct nfs_access_entry, lru);
++		list_del(&cache->lru);
++		nfs_access_free_entry(cache);
++	}
++}
++
+ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+ {
+ 	LIST_HEAD(head);
+ 	struct nfs_inode *nfsi;
+ 	struct nfs_access_entry *cache;
+ 
+-restart:
++	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
++		return (nr_to_scan == 0) ? 0 : -1;
++
+ 	spin_lock(&nfs_access_lru_lock);
+ 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+-		struct rw_semaphore *s_umount;
+ 		struct inode *inode;
+ 
+ 		if (nr_to_scan-- == 0)
+ 			break;
+-		s_umount = &nfsi->vfs_inode.i_sb->s_umount;
+-		if (!down_read_trylock(s_umount))
+-			continue;
+-		inode = igrab(&nfsi->vfs_inode);
+-		if (inode == NULL) {
+-			up_read(s_umount);
+-			continue;
+-		}
++		inode = &nfsi->vfs_inode;
+ 		spin_lock(&inode->i_lock);
+ 		if (list_empty(&nfsi->access_cache_entry_lru))
+ 			goto remove_lru_entry;
+@@ -1704,61 +1737,47 @@ restart:
+ 		else {
+ remove_lru_entry:
+ 			list_del_init(&nfsi->access_cache_inode_lru);
++			smp_mb__before_clear_bit();
+ 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
++			smp_mb__after_clear_bit();
+ 		}
+-		spin_unlock(&inode->i_lock);
+-		spin_unlock(&nfs_access_lru_lock);
+-		iput(inode);
+-		up_read(s_umount);
+-		goto restart;
+ 	}
+ 	spin_unlock(&nfs_access_lru_lock);
+-	while (!list_empty(&head)) {
+-		cache = list_entry(head.next, struct nfs_access_entry, lru);
+-		list_del(&cache->lru);
+-		nfs_access_free_entry(cache);
+-	}
++	nfs_access_free_list(&head);
+ 	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+-static void __nfs_access_zap_cache(struct inode *inode)
++static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
+ {
+-	struct nfs_inode *nfsi = NFS_I(inode);
+ 	struct rb_root *root_node = &nfsi->access_cache;
+-	struct rb_node *n, *dispose = NULL;
++	struct rb_node *n;
+ 	struct nfs_access_entry *entry;
+ 
+ 	/* Unhook entries from the cache */
+ 	while ((n = rb_first(root_node)) != NULL) {
+ 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+ 		rb_erase(n, root_node);
+-		list_del(&entry->lru);
+-		n->rb_left = dispose;
+-		dispose = n;
++		list_move(&entry->lru, head);
+ 	}
+ 	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+-	spin_unlock(&inode->i_lock);
+-
+-	/* Now kill them all! */
+-	while (dispose != NULL) {
+-		n = dispose;
+-		dispose = n->rb_left;
+-		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+-	}
+ }
+ 
+ void nfs_access_zap_cache(struct inode *inode)
+ {
++	LIST_HEAD(head);
++
++	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
++		return;
+ 	/* Remove from global LRU init */
+-	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
+-		spin_lock(&nfs_access_lru_lock);
++	spin_lock(&nfs_access_lru_lock);
++	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
+ 		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+-		spin_unlock(&nfs_access_lru_lock);
+-	}
+ 
+ 	spin_lock(&inode->i_lock);
+-	/* This will release the spinlock */
+-	__nfs_access_zap_cache(inode);
++	__nfs_access_zap_cache(NFS_I(inode), &head);
++	spin_unlock(&inode->i_lock);
++	spin_unlock(&nfs_access_lru_lock);
++	nfs_access_free_list(&head);
+ }
+ 
+ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+@@ -1809,8 +1828,8 @@ out_stale:
+ 	nfs_access_free_entry(cache);
+ 	return -ENOENT;
+ out_zap:
+-	/* This will release the spinlock */
+-	__nfs_access_zap_cache(inode);
++	spin_unlock(&inode->i_lock);
++	nfs_access_zap_cache(inode);
+ 	return -ENOENT;
+ }
+ 
+@@ -1865,9 +1884,11 @@ static void nfs_access_add_cache(struct 
+ 	smp_mb__after_atomic_inc();
+ 
+ 	/* Add inode to global LRU list */
+-	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
++	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
+ 		spin_lock(&nfs_access_lru_lock);
+-		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
++		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
++			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
++					&nfs_access_lru_list);
+ 		spin_unlock(&nfs_access_lru_lock);
+ 	}
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 11:00:23.790502081 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 11:01:00.354376416 -0400
+@@ -162,14 +162,17 @@ static int nfs_revalidate_file_size(stru
+ 	struct nfs_server *server = NFS_SERVER(inode);
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+-	if (server->flags & NFS_MOUNT_NOAC)
+-		goto force_reval;
++	if (nfs_have_delegated_attributes(inode))
++		goto out_noreval;
++
+ 	if (filp->f_flags & O_DIRECT)
+ 		goto force_reval;
+-	if (nfsi->npages != 0)
+-		return 0;
+-	if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+-		return 0;
++	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
++		goto force_reval;
++	if (nfs_attribute_timeout(inode))
++		goto force_reval;
++out_noreval:
++	return 0;
+ force_reval:
+ 	return __nfs_revalidate_inode(server, inode);
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/fscache.c.orig linux-2.6.34.noarch/fs/nfs/fscache.c
+--- linux-2.6.34.noarch/fs/nfs/fscache.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/fscache.c	2010-08-23 11:01:00.355376416 -0400
+@@ -467,7 +467,8 @@ int __nfs_readpages_from_fscache(struct 
+ 				 struct list_head *pages,
+ 				 unsigned *nr_pages)
+ {
+-	int ret, npages = *nr_pages;
++	unsigned npages = *nr_pages;
++	int ret;
+ 
+ 	dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
+ 		 NFS_I(inode)->fscache, npages, inode);
+diff -up linux-2.6.34.noarch/fs/nfs/getroot.c.orig linux-2.6.34.noarch/fs/nfs/getroot.c
+--- linux-2.6.34.noarch/fs/nfs/getroot.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/getroot.c	2010-08-23 11:01:00.356376417 -0400
+@@ -78,159 +78,94 @@ struct dentry *nfs_get_root(struct super
+ {
+ 	struct nfs_server *server = NFS_SB(sb);
+ 	struct nfs_fsinfo fsinfo;
+-	struct nfs_fattr fattr;
+-	struct dentry *mntroot;
++	struct dentry *ret;
+ 	struct inode *inode;
+ 	int error;
+ 
+ 	/* get the actual root for this mount */
+-	fsinfo.fattr = &fattr;
++	fsinfo.fattr = nfs_alloc_fattr();
++	if (fsinfo.fattr == NULL)
++		return ERR_PTR(-ENOMEM);
+ 
+ 	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ 	if (error < 0) {
+ 		dprintk("nfs_get_root: getattr error = %d\n", -error);
+-		return ERR_PTR(error);
++		ret = ERR_PTR(error);
++		goto out;
+ 	}
+ 
+ 	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+ 	if (IS_ERR(inode)) {
+ 		dprintk("nfs_get_root: get root inode failed\n");
+-		return ERR_CAST(inode);
++		ret = ERR_CAST(inode);
++		goto out;
+ 	}
+ 
+ 	error = nfs_superblock_set_dummy_root(sb, inode);
+-	if (error != 0)
+-		return ERR_PTR(error);
++	if (error != 0) {
++		ret = ERR_PTR(error);
++		goto out;
++	}
+ 
+ 	/* root dentries normally start off anonymous and get spliced in later
+ 	 * if the dentry tree reaches them; however if the dentry already
+ 	 * exists, we'll pick it up at this point and use it as the root
+ 	 */
+-	mntroot = d_obtain_alias(inode);
+-	if (IS_ERR(mntroot)) {
++	ret = d_obtain_alias(inode);
++	if (IS_ERR(ret)) {
+ 		dprintk("nfs_get_root: get root dentry failed\n");
+-		return mntroot;
++		goto out;
+ 	}
+ 
+-	security_d_instantiate(mntroot, inode);
+-
+-	if (!mntroot->d_op)
+-		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
++	security_d_instantiate(ret, inode);
+ 
+-	return mntroot;
++	if (ret->d_op == NULL)
++		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
++out:
++	nfs_free_fattr(fsinfo.fattr);
++	return ret;
+ }
+ 
+ #ifdef CONFIG_NFS_V4
+ 
+-/*
+- * Do a simple pathwalk from the root FH of the server to the nominated target
+- * of the mountpoint
+- * - give error on symlinks
+- * - give error on ".." occurring in the path
+- * - follow traversals
+- */
+-int nfs4_path_walk(struct nfs_server *server,
+-		   struct nfs_fh *mntfh,
+-		   const char *path)
++int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
+ {
+ 	struct nfs_fsinfo fsinfo;
+-	struct nfs_fattr fattr;
+-	struct nfs_fh lastfh;
+-	struct qstr name;
+-	int ret;
+-
+-	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+-
+-	fsinfo.fattr = &fattr;
+-	nfs_fattr_init(&fattr);
+-
+-	/* Eat leading slashes */
+-	while (*path == '/')
+-		path++;
++	int ret = -ENOMEM;
++
++	dprintk("--> nfs4_get_rootfh()\n");
++
++	fsinfo.fattr = nfs_alloc_fattr();
++	if (fsinfo.fattr == NULL)
++		goto out;
+ 
+ 	/* Start by getting the root filehandle from the server */
+ 	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ 	if (ret < 0) {
+-		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+-		return ret;
++		dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
++		goto out;
+ 	}
+ 
+-	if (!S_ISDIR(fattr.mode)) {
+-		printk(KERN_ERR "nfs4_get_root:"
++	if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
++			|| !S_ISDIR(fsinfo.fattr->mode)) {
++		printk(KERN_ERR "nfs4_get_rootfh:"
+ 		       " getroot encountered non-directory\n");
+-		return -ENOTDIR;
++		ret = -ENOTDIR;
++		goto out;
+ 	}
+ 
+-	/* FIXME: It is quite valid for the server to return a referral here */
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+-		printk(KERN_ERR "nfs4_get_root:"
++	if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
++		printk(KERN_ERR "nfs4_get_rootfh:"
+ 		       " getroot obtained referral\n");
+-		return -EREMOTE;
++		ret = -EREMOTE;
++		goto out;
+ 	}
+ 
+-next_component:
+-	dprintk("Next: %s\n", path);
+-
+-	/* extract the next bit of the path */
+-	if (!*path)
+-		goto path_walk_complete;
+-
+-	name.name = path;
+-	while (*path && *path != '/')
+-		path++;
+-	name.len = path - (const char *) name.name;
+-
+-	if (name.len > NFS4_MAXNAMLEN)
+-		return -ENAMETOOLONG;
+-
+-eat_dot_dir:
+-	while (*path == '/')
+-		path++;
+-
+-	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+-		path += 2;
+-		goto eat_dot_dir;
+-	}
+-
+-	/* FIXME: Why shouldn't the user be able to use ".." in the path? */
+-	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+-	    ) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " Mount path contains reference to \"..\"\n");
+-		return -EINVAL;
+-	}
+-
+-	/* lookup the next FH in the sequence */
+-	memcpy(&lastfh, mntfh, sizeof(lastfh));
+-
+-	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+-
+-	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+-						    mntfh, &fattr);
+-	if (ret < 0) {
+-		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+-		return ret;
+-	}
+-
+-	if (!S_ISDIR(fattr.mode)) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " lookupfh encountered non-directory\n");
+-		return -ENOTDIR;
+-	}
+-
+-	/* FIXME: Referrals are quite valid here too */
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " lookupfh obtained referral\n");
+-		return -EREMOTE;
+-	}
+-
+-	goto next_component;
+-
+-path_walk_complete:
+-	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+-	dprintk("<-- nfs4_path_walk() = 0\n");
+-	return 0;
++	memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
++out:
++	nfs_free_fattr(fsinfo.fattr);
++	dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
++	return ret;
+ }
+ 
+ /*
+@@ -239,8 +174,8 @@ path_walk_complete:
+ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+ {
+ 	struct nfs_server *server = NFS_SB(sb);
+-	struct nfs_fattr fattr;
+-	struct dentry *mntroot;
++	struct nfs_fattr *fattr = NULL;
++	struct dentry *ret;
+ 	struct inode *inode;
+ 	int error;
+ 
+@@ -254,40 +189,50 @@ struct dentry *nfs4_get_root(struct supe
+ 		return ERR_PTR(error);
+ 	}
+ 
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		return ERR_PTR(-ENOMEM);;
++
+ 	/* get the actual root for this mount */
+-	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
++	error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
+ 	if (error < 0) {
+ 		dprintk("nfs_get_root: getattr error = %d\n", -error);
+-		return ERR_PTR(error);
++		ret = ERR_PTR(error);
++		goto out;
+ 	}
+ 
+-	inode = nfs_fhget(sb, mntfh, &fattr);
++	inode = nfs_fhget(sb, mntfh, fattr);
+ 	if (IS_ERR(inode)) {
+ 		dprintk("nfs_get_root: get root inode failed\n");
+-		return ERR_CAST(inode);
++		ret = ERR_CAST(inode);
++		goto out;
+ 	}
+ 
+ 	error = nfs_superblock_set_dummy_root(sb, inode);
+-	if (error != 0)
+-		return ERR_PTR(error);
++	if (error != 0) {
++		ret = ERR_PTR(error);
++		goto out;
++	}
+ 
+ 	/* root dentries normally start off anonymous and get spliced in later
+ 	 * if the dentry tree reaches them; however if the dentry already
+ 	 * exists, we'll pick it up at this point and use it as the root
+ 	 */
+-	mntroot = d_obtain_alias(inode);
+-	if (IS_ERR(mntroot)) {
++	ret = d_obtain_alias(inode);
++	if (IS_ERR(ret)) {
+ 		dprintk("nfs_get_root: get root dentry failed\n");
+-		return mntroot;
++		goto out;
+ 	}
+ 
+-	security_d_instantiate(mntroot, inode);
++	security_d_instantiate(ret, inode);
+ 
+-	if (!mntroot->d_op)
+-		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
++	if (ret->d_op == NULL)
++		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+ 
++out:
++	nfs_free_fattr(fattr);
+ 	dprintk("<-- nfs4_get_root()\n");
+-	return mntroot;
++	return ret;
+ }
+ 
+ #endif /* CONFIG_NFS_V4 */
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 11:01:00.357376378 -0400
+@@ -393,8 +393,8 @@ int
+ nfs_setattr(struct dentry *dentry, struct iattr *attr)
+ {
+ 	struct inode *inode = dentry->d_inode;
+-	struct nfs_fattr fattr;
+-	int error;
++	struct nfs_fattr *fattr;
++	int error = -ENOMEM;
+ 
+ 	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+ 
+@@ -417,14 +417,20 @@ nfs_setattr(struct dentry *dentry, struc
+ 		filemap_write_and_wait(inode->i_mapping);
+ 		nfs_wb_all(inode);
+ 	}
++
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
+ 	/*
+ 	 * Return any delegations if we're going to change ACLs
+ 	 */
+ 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+ 		nfs_inode_return_delegation(inode);
+-	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
++	error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
+ 	if (error == 0)
+-		nfs_refresh_inode(inode, &fattr);
++		nfs_refresh_inode(inode, fattr);
++	nfs_free_fattr(fattr);
++out:
+ 	return error;
+ }
+ 
+@@ -682,7 +688,7 @@ int
+ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+ {
+ 	int		 status = -ESTALE;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr = NULL;
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+ 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
+@@ -693,8 +699,13 @@ __nfs_revalidate_inode(struct nfs_server
+ 	if (NFS_STALE(inode))
+ 		goto out;
+ 
++	status = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
++
+ 	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
+-	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
++	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr);
+ 	if (status != 0) {
+ 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+ 			 inode->i_sb->s_id,
+@@ -707,7 +718,7 @@ __nfs_revalidate_inode(struct nfs_server
+ 		goto out;
+ 	}
+ 
+-	status = nfs_refresh_inode(inode, &fattr);
++	status = nfs_refresh_inode(inode, fattr);
+ 	if (status) {
+ 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+ 			 inode->i_sb->s_id,
+@@ -723,6 +734,7 @@ __nfs_revalidate_inode(struct nfs_server
+ 		(long long)NFS_FILEID(inode));
+ 
+  out:
++	nfs_free_fattr(fattr);
+ 	return status;
+ }
+ 
+@@ -730,9 +742,14 @@ int nfs_attribute_timeout(struct inode *
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
++	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
++}
++
++static int nfs_attribute_cache_expired(struct inode *inode)
++{
+ 	if (nfs_have_delegated_attributes(inode))
+ 		return 0;
+-	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
++	return nfs_attribute_timeout(inode);
+ }
+ 
+ /**
+@@ -745,7 +762,7 @@ int nfs_attribute_timeout(struct inode *
+ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+ {
+ 	if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
+-			&& !nfs_attribute_timeout(inode))
++			&& !nfs_attribute_cache_expired(inode))
+ 		return NFS_STALE(inode) ? -ESTALE : 0;
+ 	return __nfs_revalidate_inode(server, inode);
+ }
+@@ -782,7 +799,8 @@ int nfs_revalidate_mapping(struct inode 
+ 	int ret = 0;
+ 
+ 	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
++			|| nfs_attribute_cache_expired(inode)
++			|| NFS_STALE(inode)) {
+ 		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ 		if (ret < 0)
+ 			goto out;
+@@ -916,6 +934,26 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
+ 
++struct nfs_fattr *nfs_alloc_fattr(void)
++{
++	struct nfs_fattr *fattr;
++
++	fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
++	if (fattr != NULL)
++		nfs_fattr_init(fattr);
++	return fattr;
++}
++
++struct nfs_fh *nfs_alloc_fhandle(void)
++{
++	struct nfs_fh *fh;
++
++	fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
++	if (fh != NULL)
++		fh->size = 0;
++	return fh;
++}
++
+ /**
+  * nfs_inode_attrs_need_update - check if the inode attributes need updating
+  * @inode - pointer to inode
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 11:01:00.358564151 -0400
+@@ -244,9 +244,7 @@ extern struct dentry *nfs_get_root(struc
+ #ifdef CONFIG_NFS_V4
+ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+ 
+-extern int nfs4_path_walk(struct nfs_server *server,
+-			  struct nfs_fh *mntfh,
+-			  const char *path);
++extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
+ #endif
+ 
+ /* read.c */
+diff -up linux-2.6.34.noarch/fs/nfs/iostat.h.orig linux-2.6.34.noarch/fs/nfs/iostat.h
+--- linux-2.6.34.noarch/fs/nfs/iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/iostat.h	2010-08-23 11:01:00.358564151 -0400
+@@ -36,14 +36,14 @@ static inline void nfs_inc_stats(const s
+ 
+ static inline void nfs_add_server_stats(const struct nfs_server *server,
+ 					enum nfs_stat_bytecounters stat,
+-					unsigned long addend)
++					long addend)
+ {
+ 	this_cpu_add(server->io_stats->bytes[stat], addend);
+ }
+ 
+ static inline void nfs_add_stats(const struct inode *inode,
+ 				 enum nfs_stat_bytecounters stat,
+-				 unsigned long addend)
++				 long addend)
+ {
+ 	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+ }
+@@ -51,7 +51,7 @@ static inline void nfs_add_stats(const s
+ #ifdef CONFIG_NFS_FSCACHE
+ static inline void nfs_add_fscache_stats(struct inode *inode,
+ 					 enum nfs_stat_fscachecounters stat,
+-					 unsigned long addend)
++					 long addend)
+ {
+ 	this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/namespace.c.orig linux-2.6.34.noarch/fs/nfs/namespace.c
+--- linux-2.6.34.noarch/fs/nfs/namespace.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/namespace.c	2010-08-23 11:01:00.359420147 -0400
+@@ -105,8 +105,8 @@ static void * nfs_follow_mountpoint(stru
+ 	struct vfsmount *mnt;
+ 	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ 	struct dentry *parent;
+-	struct nfs_fh fh;
+-	struct nfs_fattr fattr;
++	struct nfs_fh *fh = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int err;
+ 
+ 	dprintk("--> nfs_follow_mountpoint()\n");
+@@ -115,6 +115,12 @@ static void * nfs_follow_mountpoint(stru
+ 	if (IS_ROOT(dentry))
+ 		goto out_err;
+ 
++	err = -ENOMEM;
++	fh = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fh == NULL || fattr == NULL)
++		goto out_err;
++
+ 	dprintk("%s: enter\n", __func__);
+ 	dput(nd->path.dentry);
+ 	nd->path.dentry = dget(dentry);
+@@ -123,16 +129,16 @@ static void * nfs_follow_mountpoint(stru
+ 	parent = dget_parent(nd->path.dentry);
+ 	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+ 						  &nd->path.dentry->d_name,
+-						  &fh, &fattr);
++						  fh, fattr);
+ 	dput(parent);
+ 	if (err != 0)
+ 		goto out_err;
+ 
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL)
++	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+ 		mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
+ 	else
+-		mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh,
+-				      &fattr);
++		mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh,
++				      fattr);
+ 	err = PTR_ERR(mnt);
+ 	if (IS_ERR(mnt))
+ 		goto out_err;
+@@ -151,6 +157,8 @@ static void * nfs_follow_mountpoint(stru
+ 	nd->path.dentry = dget(mnt->mnt_root);
+ 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+ out:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fh);
+ 	dprintk("%s: done, returned %d\n", __func__, err);
+ 
+ 	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3acl.c.orig linux-2.6.34.noarch/fs/nfs/nfs3acl.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3acl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3acl.c	2010-08-23 11:01:00.359420147 -0400
+@@ -185,7 +185,6 @@ static void nfs3_cache_acls(struct inode
+ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
+ 	struct page *pages[NFSACL_MAXPAGES] = { };
+ 	struct nfs3_getaclargs args = {
+ 		.fh = NFS_FH(inode),
+@@ -193,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 		.pages = pages,
+ 	};
+ 	struct nfs3_getaclres res = {
+-		.fattr =	&fattr,
++		0
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_argp	= &args,
+@@ -228,7 +227,10 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 
+ 	dprintk("NFS call getacl\n");
+ 	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+-	nfs_fattr_init(&fattr);
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		return ERR_PTR(-ENOMEM);
++
+ 	status = rpc_call_sync(server->client_acl, &msg, 0);
+ 	dprintk("NFS reply getacl: %d\n", status);
+ 
+@@ -238,7 +240,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 
+ 	switch (status) {
+ 		case 0:
+-			status = nfs_refresh_inode(inode, &fattr);
++			status = nfs_refresh_inode(inode, res.fattr);
+ 			break;
+ 		case -EPFNOSUPPORT:
+ 		case -EPROTONOSUPPORT:
+@@ -278,6 +280,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ getout:
+ 	posix_acl_release(res.acl_access);
+ 	posix_acl_release(res.acl_default);
++	nfs_free_fattr(res.fattr);
+ 
+ 	if (status != 0) {
+ 		posix_acl_release(acl);
+@@ -290,7 +293,7 @@ static int nfs3_proc_setacls(struct inod
+ 		  struct posix_acl *dfacl)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	struct page *pages[NFSACL_MAXPAGES];
+ 	struct nfs3_setaclargs args = {
+ 		.inode = inode,
+@@ -335,8 +338,13 @@ static int nfs3_proc_setacls(struct inod
+ 	}
+ 
+ 	dprintk("NFS call setacl\n");
++	status = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out_freepages;
++
+ 	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+-	nfs_fattr_init(&fattr);
++	msg.rpc_resp = fattr;
+ 	status = rpc_call_sync(server->client_acl, &msg, 0);
+ 	nfs_access_zap_cache(inode);
+ 	nfs_zap_acl_cache(inode);
+@@ -344,7 +352,7 @@ static int nfs3_proc_setacls(struct inod
+ 
+ 	switch (status) {
+ 		case 0:
+-			status = nfs_refresh_inode(inode, &fattr);
++			status = nfs_refresh_inode(inode, fattr);
+ 			nfs3_cache_acls(inode, acl, dfacl);
+ 			break;
+ 		case -EPFNOSUPPORT:
+@@ -355,6 +363,7 @@ static int nfs3_proc_setacls(struct inod
+ 		case -ENOTSUPP:
+ 			status = -EOPNOTSUPP;
+ 	}
++	nfs_free_fattr(fattr);
+ out_freepages:
+ 	while (args.npages != 0) {
+ 		args.npages--;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 11:01:00.360574301 -0400
+@@ -144,14 +144,12 @@ static int
+ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
+ 		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ {
+-	struct nfs_fattr	dir_attr;
+ 	struct nfs3_diropargs	arg = {
+ 		.fh		= NFS_FH(dir),
+ 		.name		= name->name,
+ 		.len		= name->len
+ 	};
+ 	struct nfs3_diropres	res = {
+-		.dir_attr	= &dir_attr,
+ 		.fh		= fhandle,
+ 		.fattr		= fattr
+ 	};
+@@ -163,29 +161,30 @@ nfs3_proc_lookup(struct inode *dir, stru
+ 	int			status;
+ 
+ 	dprintk("NFS call  lookup %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		return -ENOMEM;
++
+ 	nfs_fattr_init(fattr);
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_refresh_inode(dir, &dir_attr);
++	nfs_refresh_inode(dir, res.dir_attr);
+ 	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+ 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ 		msg.rpc_argp = fhandle;
+ 		msg.rpc_resp = fattr;
+ 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	}
++	nfs_free_fattr(res.dir_attr);
+ 	dprintk("NFS reply lookup: %d\n", status);
+ 	return status;
+ }
+ 
+ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+ {
+-	struct nfs_fattr	fattr;
+ 	struct nfs3_accessargs	arg = {
+ 		.fh		= NFS_FH(inode),
+ 	};
+-	struct nfs3_accessres	res = {
+-		.fattr		= &fattr,
+-	};
++	struct nfs3_accessres	res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
+ 		.rpc_argp	= &arg,
+@@ -193,7 +192,7 @@ static int nfs3_proc_access(struct inode
+ 		.rpc_cred	= entry->cred,
+ 	};
+ 	int mode = entry->mask;
+-	int status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  access\n");
+ 
+@@ -210,9 +209,13 @@ static int nfs3_proc_access(struct inode
+ 		if (mode & MAY_EXEC)
+ 			arg.access |= NFS3_ACCESS_EXECUTE;
+ 	}
+-	nfs_fattr_init(&fattr);
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_refresh_inode(inode, &fattr);
++	nfs_refresh_inode(inode, res.fattr);
+ 	if (status == 0) {
+ 		entry->mask = 0;
+ 		if (res.access & NFS3_ACCESS_READ)
+@@ -222,6 +225,8 @@ static int nfs3_proc_access(struct inode
+ 		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
+ 			entry->mask |= MAY_EXEC;
+ 	}
++	nfs_free_fattr(res.fattr);
++out:
+ 	dprintk("NFS reply access: %d\n", status);
+ 	return status;
+ }
+@@ -229,7 +234,7 @@ static int nfs3_proc_access(struct inode
+ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
+ 		unsigned int pgbase, unsigned int pglen)
+ {
+-	struct nfs_fattr	fattr;
++	struct nfs_fattr	*fattr;
+ 	struct nfs3_readlinkargs args = {
+ 		.fh		= NFS_FH(inode),
+ 		.pgbase		= pgbase,
+@@ -239,14 +244,19 @@ static int nfs3_proc_readlink(struct ino
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READLINK],
+ 		.rpc_argp	= &args,
+-		.rpc_resp	= &fattr,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  readlink\n");
+-	nfs_fattr_init(&fattr);
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
++	msg.rpc_resp = fattr;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_refresh_inode(inode, &fattr);
++	nfs_refresh_inode(inode, fattr);
++	nfs_free_fattr(fattr);
++out:
+ 	dprintk("NFS reply readlink: %d\n", status);
+ 	return status;
+ }
+@@ -396,12 +406,17 @@ nfs3_proc_remove(struct inode *dir, stru
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  remove %s\n", name->name);
+-	nfs_fattr_init(&res.dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_post_op_update_inode(dir, &res.dir_attr);
++	nfs_post_op_update_inode(dir, res.dir_attr);
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	dprintk("NFS reply remove: %d\n", status);
+ 	return status;
+ }
+@@ -419,7 +434,7 @@ nfs3_proc_unlink_done(struct rpc_task *t
+ 	if (nfs3_async_handle_jukebox(task, dir))
+ 		return 0;
+ 	res = task->tk_msg.rpc_resp;
+-	nfs_post_op_update_inode(dir, &res->dir_attr);
++	nfs_post_op_update_inode(dir, res->dir_attr);
+ 	return 1;
+ }
+ 
+@@ -427,7 +442,6 @@ static int
+ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ 		 struct inode *new_dir, struct qstr *new_name)
+ {
+-	struct nfs_fattr	old_dir_attr, new_dir_attr;
+ 	struct nfs3_renameargs	arg = {
+ 		.fromfh		= NFS_FH(old_dir),
+ 		.fromname	= old_name->name,
+@@ -436,23 +450,27 @@ nfs3_proc_rename(struct inode *old_dir, 
+ 		.toname		= new_name->name,
+ 		.tolen		= new_name->len
+ 	};
+-	struct nfs3_renameres	res = {
+-		.fromattr	= &old_dir_attr,
+-		.toattr		= &new_dir_attr
+-	};
++	struct nfs3_renameres res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
+ 		.rpc_argp	= &arg,
+ 		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+-	nfs_fattr_init(&old_dir_attr);
+-	nfs_fattr_init(&new_dir_attr);
++
++	res.fromattr = nfs_alloc_fattr();
++	res.toattr = nfs_alloc_fattr();
++	if (res.fromattr == NULL || res.toattr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+-	nfs_post_op_update_inode(old_dir, &old_dir_attr);
+-	nfs_post_op_update_inode(new_dir, &new_dir_attr);
++	nfs_post_op_update_inode(old_dir, res.fromattr);
++	nfs_post_op_update_inode(new_dir, res.toattr);
++out:
++	nfs_free_fattr(res.toattr);
++	nfs_free_fattr(res.fromattr);
+ 	dprintk("NFS reply rename: %d\n", status);
+ 	return status;
+ }
+@@ -460,30 +478,32 @@ nfs3_proc_rename(struct inode *old_dir, 
+ static int
+ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+ {
+-	struct nfs_fattr	dir_attr, fattr;
+ 	struct nfs3_linkargs	arg = {
+ 		.fromfh		= NFS_FH(inode),
+ 		.tofh		= NFS_FH(dir),
+ 		.toname		= name->name,
+ 		.tolen		= name->len
+ 	};
+-	struct nfs3_linkres	res = {
+-		.dir_attr	= &dir_attr,
+-		.fattr		= &fattr
+-	};
++	struct nfs3_linkres	res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_LINK],
+ 		.rpc_argp	= &arg,
+ 		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  link %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
+-	nfs_fattr_init(&fattr);
++	res.fattr = nfs_alloc_fattr();
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.fattr == NULL || res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_post_op_update_inode(dir, &dir_attr);
+-	nfs_post_op_update_inode(inode, &fattr);
++	nfs_post_op_update_inode(dir, res.dir_attr);
++	nfs_post_op_update_inode(inode, res.fattr);
++out:
++	nfs_free_fattr(res.dir_attr);
++	nfs_free_fattr(res.fattr);
+ 	dprintk("NFS reply link: %d\n", status);
+ 	return status;
+ }
+@@ -554,7 +574,7 @@ out:
+ static int
+ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
+ {
+-	struct nfs_fattr	dir_attr;
++	struct nfs_fattr	*dir_attr;
+ 	struct nfs3_diropargs	arg = {
+ 		.fh		= NFS_FH(dir),
+ 		.name		= name->name,
+@@ -563,14 +583,19 @@ nfs3_proc_rmdir(struct inode *dir, struc
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_RMDIR],
+ 		.rpc_argp	= &arg,
+-		.rpc_resp	= &dir_attr,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  rmdir %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
++	dir_attr = nfs_alloc_fattr();
++	if (dir_attr == NULL)
++		goto out;
++
++	msg.rpc_resp = dir_attr;
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_post_op_update_inode(dir, &dir_attr);
++	nfs_post_op_update_inode(dir, dir_attr);
++	nfs_free_fattr(dir_attr);
++out:
+ 	dprintk("NFS reply rmdir: %d\n", status);
+ 	return status;
+ }
+@@ -589,7 +614,6 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		  u64 cookie, struct page *page, unsigned int count, int plus)
+ {
+ 	struct inode		*dir = dentry->d_inode;
+-	struct nfs_fattr	dir_attr;
+ 	__be32			*verf = NFS_COOKIEVERF(dir);
+ 	struct nfs3_readdirargs	arg = {
+ 		.fh		= NFS_FH(dir),
+@@ -600,7 +624,6 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		.pages		= &page
+ 	};
+ 	struct nfs3_readdirres	res = {
+-		.dir_attr	= &dir_attr,
+ 		.verf		= verf,
+ 		.plus		= plus
+ 	};
+@@ -610,7 +633,7 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		.rpc_resp	= &res,
+ 		.rpc_cred	= cred
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	if (plus)
+ 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+@@ -618,12 +641,17 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 	dprintk("NFS call  readdir%s %d\n",
+ 			plus? "plus" : "", (unsigned int) cookie);
+ 
+-	nfs_fattr_init(&dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 
+ 	nfs_invalidate_atime(dir);
++	nfs_refresh_inode(dir, res.dir_attr);
+ 
+-	nfs_refresh_inode(dir, &dir_attr);
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	dprintk("NFS reply readdir: %d\n", status);
+ 	return status;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs3xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3xdr.c	2010-08-23 11:01:00.361593802 -0400
+@@ -762,7 +762,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, _
+ static int
+ nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
+ {
+-	return nfs3_xdr_wccstat(req, p, &res->dir_attr);
++	return nfs3_xdr_wccstat(req, p, res->dir_attr);
+ }
+ 
+ /*
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 11:01:00.362574935 -0400
+@@ -206,14 +206,14 @@ extern ssize_t nfs4_listxattr(struct den
+ 
+ 
+ /* nfs4proc.c */
+-extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+-extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
++extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
++extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
+ extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
+ extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
+-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
++extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+ extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+@@ -286,7 +286,7 @@ extern void nfs4_put_lock_state(struct n
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+ extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
+ 
+-extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
++extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
+ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4namespace.c.orig linux-2.6.34.noarch/fs/nfs/nfs4namespace.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4namespace.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4namespace.c	2010-08-23 11:01:00.363574219 -0400
+@@ -115,6 +115,7 @@ static struct vfsmount *try_location(str
+ 				     char *page, char *page2,
+ 				     const struct nfs4_fs_location *location)
+ {
++	const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+ 	char *mnt_path;
+ 	unsigned int maxbuflen;
+@@ -126,9 +127,12 @@ static struct vfsmount *try_location(str
+ 	mountdata->mnt_path = mnt_path;
+ 	maxbuflen = mnt_path - 1 - page2;
+ 
++	mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
++	if (mountdata->addr == NULL)
++		return ERR_PTR(-ENOMEM);
++
+ 	for (s = 0; s < location->nservers; s++) {
+ 		const struct nfs4_string *buf = &location->servers[s];
+-		struct sockaddr_storage addr;
+ 
+ 		if (buf->len <= 0 || buf->len >= maxbuflen)
+ 			continue;
+@@ -137,11 +141,10 @@ static struct vfsmount *try_location(str
+ 			continue;
+ 
+ 		mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
+-				(struct sockaddr *)&addr, sizeof(addr));
++				mountdata->addr, addr_bufsize);
+ 		if (mountdata->addrlen == 0)
+ 			continue;
+ 
+-		mountdata->addr = (struct sockaddr *)&addr;
+ 		rpc_set_port(mountdata->addr, NFS_PORT);
+ 
+ 		memcpy(page2, buf->data, buf->len);
+@@ -156,6 +159,7 @@ static struct vfsmount *try_location(str
+ 		if (!IS_ERR(mnt))
+ 			break;
+ 	}
++	kfree(mountdata->addr);
+ 	return mnt;
+ }
+ 
+@@ -221,8 +225,8 @@ out:
+ 
+ /*
+  * nfs_do_refmount - handle crossing a referral on server
++ * @mnt_parent - mountpoint of referral
+  * @dentry - dentry of referral
+- * @nd - nameidata info
+  *
+  */
+ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 11:01:00.365544029 -0400
+@@ -70,6 +70,9 @@ static int nfs4_do_fsinfo(struct nfs_ser
+ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
++static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
++			    struct nfs_fattr *fattr, struct iattr *sattr,
++			    struct nfs4_state *state);
+ 
+ /* Prevent leaks of NFSv4 errors into userland */
+ static int nfs4_map_errors(int err)
+@@ -714,17 +717,18 @@ static void nfs4_init_opendata_res(struc
+ 
+ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
+ 		struct nfs4_state_owner *sp, fmode_t fmode, int flags,
+-		const struct iattr *attrs)
++		const struct iattr *attrs,
++		gfp_t gfp_mask)
+ {
+ 	struct dentry *parent = dget_parent(path->dentry);
+ 	struct inode *dir = parent->d_inode;
+ 	struct nfs_server *server = NFS_SERVER(dir);
+ 	struct nfs4_opendata *p;
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), gfp_mask);
+ 	if (p == NULL)
+ 		goto err;
+-	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
++	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
+ 	if (p->o_arg.seqid == NULL)
+ 		goto err_free;
+ 	path_get(path);
+@@ -1060,7 +1064,7 @@ static struct nfs4_opendata *nfs4_open_r
+ {
+ 	struct nfs4_opendata *opendata;
+ 
+-	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
++	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS);
+ 	if (opendata == NULL)
+ 		return ERR_PTR(-ENOMEM);
+ 	opendata->state = state;
+@@ -1648,7 +1652,7 @@ static int _nfs4_do_open(struct inode *d
+ 	if (path->dentry->d_inode != NULL)
+ 		nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
+ 	status = -ENOMEM;
+-	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
++	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL);
+ 	if (opendata == NULL)
+ 		goto err_put_state_owner;
+ 
+@@ -1659,15 +1663,24 @@ static int _nfs4_do_open(struct inode *d
+ 	if (status != 0)
+ 		goto err_opendata_put;
+ 
+-	if (opendata->o_arg.open_flags & O_EXCL)
+-		nfs4_exclusive_attrset(opendata, sattr);
+-
+ 	state = nfs4_opendata_to_nfs4_state(opendata);
+ 	status = PTR_ERR(state);
+ 	if (IS_ERR(state))
+ 		goto err_opendata_put;
+ 	if (server->caps & NFS_CAP_POSIX_LOCK)
+ 		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
++
++	if (opendata->o_arg.open_flags & O_EXCL) {
++		nfs4_exclusive_attrset(opendata, sattr);
++
++		nfs_fattr_init(opendata->o_res.f_attr);
++		status = nfs4_do_setattr(state->inode, cred,
++				opendata->o_res.f_attr, sattr,
++				state);
++		if (status == 0)
++			nfs_setattr_update_inode(state->inode, sattr);
++		nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
++	}
+ 	nfs4_opendata_put(opendata);
+ 	nfs4_put_state_owner(sp);
+ 	*res = state;
+@@ -1914,7 +1927,7 @@ static const struct rpc_call_ops nfs4_cl
+  *
+  * NOTE: Caller must be holding the sp->so_owner semaphore!
+  */
+-int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
++int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+ {
+ 	struct nfs_server *server = NFS_SERVER(state->inode);
+ 	struct nfs4_closedata *calldata;
+@@ -1933,7 +1946,7 @@ int nfs4_do_close(struct path *path, str
+ 	};
+ 	int status = -ENOMEM;
+ 
+-	calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
++	calldata = kzalloc(sizeof(*calldata), gfp_mask);
+ 	if (calldata == NULL)
+ 		goto out;
+ 	calldata->inode = state->inode;
+@@ -1941,7 +1954,7 @@ int nfs4_do_close(struct path *path, str
+ 	calldata->arg.fh = NFS_FH(state->inode);
+ 	calldata->arg.stateid = &state->open_stateid;
+ 	/* Serialization for the sequence id */
+-	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
++	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
+ 	if (calldata->arg.seqid == NULL)
+ 		goto out_free_calldata;
+ 	calldata->arg.fmode = 0;
+@@ -2404,14 +2417,12 @@ static int nfs4_proc_lookup(struct inode
+ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
+ 	struct nfs4_accessargs args = {
+ 		.fh = NFS_FH(inode),
+ 		.bitmask = server->attr_bitmask,
+ 	};
+ 	struct nfs4_accessres res = {
+ 		.server = server,
+-		.fattr = &fattr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
+@@ -2438,7 +2449,11 @@ static int _nfs4_proc_access(struct inod
+ 		if (mode & MAY_EXEC)
+ 			args.access |= NFS4_ACCESS_EXECUTE;
+ 	}
+-	nfs_fattr_init(&fattr);
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		return -ENOMEM;
++
+ 	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+ 	if (!status) {
+ 		entry->mask = 0;
+@@ -2448,8 +2463,9 @@ static int _nfs4_proc_access(struct inod
+ 			entry->mask |= MAY_WRITE;
+ 		if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
+ 			entry->mask |= MAY_EXEC;
+-		nfs_refresh_inode(inode, &fattr);
++		nfs_refresh_inode(inode, res.fattr);
+ 	}
++	nfs_free_fattr(res.fattr);
+ 	return status;
+ }
+ 
+@@ -2562,13 +2578,6 @@ nfs4_proc_create(struct inode *dir, stru
+ 	}
+ 	d_add(dentry, igrab(state->inode));
+ 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+-	if (flags & O_EXCL) {
+-		struct nfs_fattr fattr;
+-		status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state);
+-		if (status == 0)
+-			nfs_setattr_update_inode(state->inode, sattr);
+-		nfs_post_op_update_inode(state->inode, &fattr);
+-	}
+ 	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
+ 		status = nfs4_intent_set_file(nd, &path, state, fmode);
+ 	else
+@@ -2596,14 +2605,19 @@ static int _nfs4_proc_remove(struct inod
+ 		.rpc_argp = &args,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
++
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
+ 
+-	nfs_fattr_init(&res.dir_attr);
+ 	status = nfs4_call_sync(server, &msg, &args, &res, 1);
+ 	if (status == 0) {
+ 		update_changeattr(dir, &res.cinfo);
+-		nfs_post_op_update_inode(dir, &res.dir_attr);
++		nfs_post_op_update_inode(dir, res.dir_attr);
+ 	}
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	return status;
+ }
+ 
+@@ -2638,7 +2652,7 @@ static int nfs4_proc_unlink_done(struct 
+ 	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+-	nfs_post_op_update_inode(dir, &res->dir_attr);
++	nfs_post_op_update_inode(dir, res->dir_attr);
+ 	return 1;
+ }
+ 
+@@ -2653,29 +2667,31 @@ static int _nfs4_proc_rename(struct inod
+ 		.new_name = new_name,
+ 		.bitmask = server->attr_bitmask,
+ 	};
+-	struct nfs_fattr old_fattr, new_fattr;
+ 	struct nfs4_rename_res res = {
+ 		.server = server,
+-		.old_fattr = &old_fattr,
+-		.new_fattr = &new_fattr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 	
+-	nfs_fattr_init(res.old_fattr);
+-	nfs_fattr_init(res.new_fattr);
+-	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
++	res.old_fattr = nfs_alloc_fattr();
++	res.new_fattr = nfs_alloc_fattr();
++	if (res.old_fattr == NULL || res.new_fattr == NULL)
++		goto out;
+ 
++	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+ 	if (!status) {
+ 		update_changeattr(old_dir, &res.old_cinfo);
+ 		nfs_post_op_update_inode(old_dir, res.old_fattr);
+ 		update_changeattr(new_dir, &res.new_cinfo);
+ 		nfs_post_op_update_inode(new_dir, res.new_fattr);
+ 	}
++out:
++	nfs_free_fattr(res.new_fattr);
++	nfs_free_fattr(res.old_fattr);
+ 	return status;
+ }
+ 
+@@ -2702,28 +2718,30 @@ static int _nfs4_proc_link(struct inode 
+ 		.name   = name,
+ 		.bitmask = server->attr_bitmask,
+ 	};
+-	struct nfs_fattr fattr, dir_attr;
+ 	struct nfs4_link_res res = {
+ 		.server = server,
+-		.fattr = &fattr,
+-		.dir_attr = &dir_attr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
++
++	res.fattr = nfs_alloc_fattr();
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.fattr == NULL || res.dir_attr == NULL)
++		goto out;
+ 
+-	nfs_fattr_init(res.fattr);
+-	nfs_fattr_init(res.dir_attr);
+ 	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+ 	if (!status) {
+ 		update_changeattr(dir, &res.cinfo);
+ 		nfs_post_op_update_inode(dir, res.dir_attr);
+ 		nfs_post_op_update_inode(inode, res.fattr);
+ 	}
+-
++out:
++	nfs_free_fattr(res.dir_attr);
++	nfs_free_fattr(res.fattr);
+ 	return status;
+ }
+ 
+@@ -3146,23 +3164,31 @@ static void nfs4_proc_commit_setup(struc
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
++struct nfs4_renewdata {
++	struct nfs_client	*client;
++	unsigned long		timestamp;
++};
++
+ /*
+  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
+  * standalone procedure for queueing an asynchronous RENEW.
+  */
+-static void nfs4_renew_release(void *data)
++static void nfs4_renew_release(void *calldata)
+ {
+-	struct nfs_client *clp = data;
++	struct nfs4_renewdata *data = calldata;
++	struct nfs_client *clp = data->client;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(data);
+ }
+ 
+-static void nfs4_renew_done(struct rpc_task *task, void *data)
++static void nfs4_renew_done(struct rpc_task *task, void *calldata)
+ {
+-	struct nfs_client *clp = data;
+-	unsigned long timestamp = task->tk_start;
++	struct nfs4_renewdata *data = calldata;
++	struct nfs_client *clp = data->client;
++	unsigned long timestamp = data->timestamp;
+ 
+ 	if (task->tk_status < 0) {
+ 		/* Unless we're shutting down, schedule state recovery! */
+@@ -3188,11 +3214,17 @@ int nfs4_proc_async_renew(struct nfs_cli
+ 		.rpc_argp	= clp,
+ 		.rpc_cred	= cred,
+ 	};
++	struct nfs4_renewdata *data;
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+ 		return -EIO;
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++	if (data == NULL)
++		return -ENOMEM;
++	data->client = clp;
++	data->timestamp = jiffies;
+ 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			&nfs4_renew_ops, clp);
++			&nfs4_renew_ops, data);
+ }
+ 
+ int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
+@@ -3494,7 +3526,9 @@ nfs4_async_handle_error(struct rpc_task 
+ 	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+ }
+ 
+-int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
++int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
++		unsigned short port, struct rpc_cred *cred,
++		struct nfs4_setclientid_res *res)
+ {
+ 	nfs4_verifier sc_verifier;
+ 	struct nfs4_setclientid setclientid = {
+@@ -3504,7 +3538,7 @@ int nfs4_proc_setclientid(struct nfs_cli
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
+ 		.rpc_argp = &setclientid,
+-		.rpc_resp = clp,
++		.rpc_resp = res,
+ 		.rpc_cred = cred,
+ 	};
+ 	__be32 *p;
+@@ -3547,12 +3581,14 @@ int nfs4_proc_setclientid(struct nfs_cli
+ 	return status;
+ }
+ 
+-static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
++static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
++		struct nfs4_setclientid_res *arg,
++		struct rpc_cred *cred)
+ {
+ 	struct nfs_fsinfo fsinfo;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
+-		.rpc_argp = clp,
++		.rpc_argp = arg,
+ 		.rpc_resp = &fsinfo,
+ 		.rpc_cred = cred,
+ 	};
+@@ -3570,12 +3606,14 @@ static int _nfs4_proc_setclientid_confir
+ 	return status;
+ }
+ 
+-int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
++int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
++		struct nfs4_setclientid_res *arg,
++		struct rpc_cred *cred)
+ {
+ 	long timeout = 0;
+ 	int err;
+ 	do {
+-		err = _nfs4_proc_setclientid_confirm(clp, cred);
++		err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
+ 		switch (err) {
+ 			case 0:
+ 				return err;
+@@ -3667,7 +3705,7 @@ static int _nfs4_proc_delegreturn(struct
+ 	};
+ 	int status = 0;
+ 
+-	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	data = kzalloc(sizeof(*data), GFP_NOFS);
+ 	if (data == NULL)
+ 		return -ENOMEM;
+ 	data->args.fhandle = &data->fh;
+@@ -3823,7 +3861,7 @@ static struct nfs4_unlockdata *nfs4_allo
+ 	struct nfs4_unlockdata *p;
+ 	struct inode *inode = lsp->ls_state->inode;
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), GFP_NOFS);
+ 	if (p == NULL)
+ 		return NULL;
+ 	p->arg.fh = NFS_FH(inode);
+@@ -3961,7 +3999,7 @@ static int nfs4_proc_unlck(struct nfs4_s
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 		goto out;
+ 	lsp = request->fl_u.nfs4_fl.owner;
+-	seqid = nfs_alloc_seqid(&lsp->ls_seqid);
++	seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
+ 	status = -ENOMEM;
+ 	if (seqid == NULL)
+ 		goto out;
+@@ -3989,22 +4027,23 @@ struct nfs4_lockdata {
+ };
+ 
+ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
+-		struct nfs_open_context *ctx, struct nfs4_lock_state *lsp)
++		struct nfs_open_context *ctx, struct nfs4_lock_state *lsp,
++		gfp_t gfp_mask)
+ {
+ 	struct nfs4_lockdata *p;
+ 	struct inode *inode = lsp->ls_state->inode;
+ 	struct nfs_server *server = NFS_SERVER(inode);
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), gfp_mask);
+ 	if (p == NULL)
+ 		return NULL;
+ 
+ 	p->arg.fh = NFS_FH(inode);
+ 	p->arg.fl = &p->fl;
+-	p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
++	p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
+ 	if (p->arg.open_seqid == NULL)
+ 		goto out_free;
+-	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
++	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
+ 	if (p->arg.lock_seqid == NULL)
+ 		goto out_free_seqid;
+ 	p->arg.lock_stateid = &lsp->ls_stateid;
+@@ -4158,7 +4197,8 @@ static int _nfs4_do_setlk(struct nfs4_st
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 	data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
+-			fl->fl_u.nfs4_fl.owner);
++			fl->fl_u.nfs4_fl.owner,
++			recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
+ 	if (data == NULL)
+ 		return -ENOMEM;
+ 	if (IS_SETLKW(cmd))
+@@ -4647,7 +4687,7 @@ static int nfs4_reset_slot_table(struct 
+ 	if (max_reqs != tbl->max_slots) {
+ 		ret = -ENOMEM;
+ 		new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
+-			      GFP_KERNEL);
++			      GFP_NOFS);
+ 		if (!new)
+ 			goto out;
+ 		ret = 0;
+@@ -4712,7 +4752,7 @@ static int nfs4_init_slot_table(struct n
+ 
+ 	dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
+ 
+-	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
++	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
+ 	if (!slot)
+ 		goto out;
+ 	ret = 0;
+@@ -4761,7 +4801,7 @@ struct nfs4_session *nfs4_alloc_session(
+ 	struct nfs4_session *session;
+ 	struct nfs4_slot_table *tbl;
+ 
+-	session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
++	session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+ 	if (!session)
+ 		return NULL;
+ 
+@@ -5105,8 +5145,8 @@ static int nfs41_proc_async_sequence(str
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+ 		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_KERNEL);
+-	res = kzalloc(sizeof(*res), GFP_KERNEL);
++	args = kzalloc(sizeof(*args), GFP_NOFS);
++	res = kzalloc(sizeof(*res), GFP_NOFS);
+ 	if (!args || !res) {
+ 		kfree(args);
+ 		kfree(res);
+@@ -5207,7 +5247,7 @@ static int nfs41_proc_reclaim_complete(s
+ 	int status = -ENOMEM;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
++	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
+ 	if (calldata == NULL)
+ 		goto out;
+ 	calldata->clp = clp;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 11:01:00.367574218 -0400
+@@ -62,6 +62,7 @@ static LIST_HEAD(nfs4_clientid_list);
+ 
+ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
+ {
++	struct nfs4_setclientid_res clid;
+ 	unsigned short port;
+ 	int status;
+ 
+@@ -69,11 +70,15 @@ int nfs4_init_clientid(struct nfs_client
+ 	if (clp->cl_addr.ss_family == AF_INET6)
+ 		port = nfs_callback_tcpport6;
+ 
+-	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
+-	if (status == 0)
+-		status = nfs4_proc_setclientid_confirm(clp, cred);
+-	if (status == 0)
+-		nfs4_schedule_state_renewal(clp);
++	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
++	if (status != 0)
++		goto out;
++	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
++	if (status != 0)
++		goto out;
++	clp->cl_clientid = clid.clientid;
++	nfs4_schedule_state_renewal(clp);
++out:
+ 	return status;
+ }
+ 
+@@ -361,7 +366,7 @@ nfs4_alloc_state_owner(void)
+ {
+ 	struct nfs4_state_owner *sp;
+ 
+-	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
++	sp = kzalloc(sizeof(*sp),GFP_NOFS);
+ 	if (!sp)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+@@ -435,7 +440,7 @@ nfs4_alloc_open_state(void)
+ {
+ 	struct nfs4_state *state;
+ 
+-	state = kzalloc(sizeof(*state), GFP_KERNEL);
++	state = kzalloc(sizeof(*state), GFP_NOFS);
+ 	if (!state)
+ 		return NULL;
+ 	atomic_set(&state->count, 1);
+@@ -537,7 +542,8 @@ void nfs4_put_open_state(struct nfs4_sta
+ /*
+  * Close the current file.
+  */
+-static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
++static void __nfs4_close(struct path *path, struct nfs4_state *state,
++		fmode_t fmode, gfp_t gfp_mask, int wait)
+ {
+ 	struct nfs4_state_owner *owner = state->owner;
+ 	int call_close = 0;
+@@ -578,17 +584,17 @@ static void __nfs4_close(struct path *pa
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+ 	} else
+-		nfs4_do_close(path, state, wait);
++		nfs4_do_close(path, state, gfp_mask, wait);
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+ {
+-	__nfs4_close(path, state, fmode, 0);
++	__nfs4_close(path, state, fmode, GFP_NOFS, 0);
+ }
+ 
+ void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
+ {
+-	__nfs4_close(path, state, fmode, 1);
++	__nfs4_close(path, state, fmode, GFP_KERNEL, 1);
+ }
+ 
+ /*
+@@ -618,7 +624,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 	struct nfs4_lock_state *lsp;
+ 	struct nfs_client *clp = state->owner->so_client;
+ 
+-	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
++	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+ 		return NULL;
+ 	rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
+@@ -754,11 +760,11 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 	nfs4_put_lock_state(lsp);
+ }
+ 
+-struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
++struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
+ {
+ 	struct nfs_seqid *new;
+ 
+-	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	new = kmalloc(sizeof(*new), gfp_mask);
+ 	if (new != NULL) {
+ 		new->sequence = counter;
+ 		INIT_LIST_HEAD(&new->list);
+@@ -1347,7 +1353,7 @@ static int nfs4_recall_slot(struct nfs_c
+ 
+ 	nfs4_begin_drain_session(clp);
+ 	new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
+-		      GFP_KERNEL);
++		      GFP_NOFS);
+         if (!new)
+ 		return -ENOMEM;
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 11:00:23.792491380 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 11:01:00.369544055 -0400
+@@ -1504,14 +1504,14 @@ static void encode_setclientid(struct xd
+ 	hdr->replen += decode_setclientid_maxsz;
+ }
+ 
+-static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
++static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
+ {
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
+-	p = xdr_encode_hyper(p, client_state->cl_clientid);
+-	xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
++	p = xdr_encode_hyper(p, arg->clientid);
++	xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setclientid_confirm_maxsz;
+ }
+@@ -2324,7 +2324,7 @@ static int nfs4_xdr_enc_setclientid(stru
+ /*
+  * a SETCLIENTID_CONFIRM request
+  */
+-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
++static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+@@ -2334,7 +2334,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+-	encode_setclientid_confirm(&xdr, clp, &hdr);
++	encode_setclientid_confirm(&xdr, arg, &hdr);
+ 	encode_putrootfh(&xdr, &hdr);
+ 	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+ 	encode_nops(&hdr);
+@@ -4397,7 +4397,7 @@ out_overflow:
+ 	return -EIO;
+ }
+ 
+-static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
++static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
+ {
+ 	__be32 *p;
+ 	uint32_t opnum;
+@@ -4417,8 +4417,8 @@ static int decode_setclientid(struct xdr
+ 		p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
+ 		if (unlikely(!p))
+ 			goto out_overflow;
+-		p = xdr_decode_hyper(p, &clp->cl_clientid);
+-		memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
++		p = xdr_decode_hyper(p, &res->clientid);
++		memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
+ 	} else if (nfserr == NFSERR_CLID_INUSE) {
+ 		uint32_t len;
+ 
+@@ -4815,7 +4815,7 @@ static int nfs4_xdr_dec_remove(struct rp
+ 		goto out;
+ 	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+ 		goto out;
+-	decode_getfattr(&xdr, &res->dir_attr, res->server,
++	decode_getfattr(&xdr, res->dir_attr, res->server,
+ 			!RPC_IS_ASYNC(rqstp->rq_task));
+ out:
+ 	return status;
+@@ -5498,7 +5498,7 @@ static int nfs4_xdr_dec_renew(struct rpc
+  * Decode SETCLIENTID response
+  */
+ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
+-		struct nfs_client *clp)
++		struct nfs4_setclientid_res *res)
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr;
+@@ -5507,7 +5507,7 @@ static int nfs4_xdr_dec_setclientid(stru
+ 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ 	status = decode_compound_hdr(&xdr, &hdr);
+ 	if (!status)
+-		status = decode_setclientid(&xdr, clp);
++		status = decode_setclientid(&xdr, res);
+ 	return status;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/nfsroot.c.orig linux-2.6.34.noarch/fs/nfs/nfsroot.c
+--- linux-2.6.34.noarch/fs/nfs/nfsroot.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfsroot.c	2010-08-23 11:01:00.371574358 -0400
+@@ -488,7 +488,6 @@ static int __init root_nfs_ports(void)
+  */
+ static int __init root_nfs_get_handle(void)
+ {
+-	struct nfs_fh fh;
+ 	struct sockaddr_in sin;
+ 	unsigned int auth_flav_len = 0;
+ 	struct nfs_mount_request request = {
+@@ -499,21 +498,24 @@ static int __init root_nfs_get_handle(vo
+ 					NFS_MNT3_VERSION : NFS_MNT_VERSION,
+ 		.protocol	= (nfs_data.flags & NFS_MOUNT_TCP) ?
+ 					XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
+-		.fh		= &fh,
+ 		.auth_flav_len	= &auth_flav_len,
+ 	};
+-	int status;
++	int status = -ENOMEM;
+ 
++	request.fh = nfs_alloc_fhandle();
++	if (!request.fh)
++		goto out;
+ 	set_sockaddr(&sin, servaddr, htons(mount_port));
+ 	status = nfs_mount(&request);
+ 	if (status < 0)
+ 		printk(KERN_ERR "Root-NFS: Server returned error %d "
+ 				"while mounting %s\n", status, nfs_export_path);
+ 	else {
+-		nfs_data.root.size = fh.size;
+-		memcpy(nfs_data.root.data, fh.data, fh.size);
++		nfs_data.root.size = request.fh->size;
++		memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
+ 	}
+-
++	nfs_free_fhandle(request.fh);
++out:
+ 	return status;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 11:01:00.371574358 -0400
+@@ -60,16 +60,10 @@ nfs_create_request(struct nfs_open_conte
+ {
+ 	struct nfs_page		*req;
+ 
+-	for (;;) {
+-		/* try to allocate the request struct */
+-		req = nfs_page_alloc();
+-		if (req != NULL)
+-			break;
+-
+-		if (fatal_signal_pending(current))
+-			return ERR_PTR(-ERESTARTSYS);
+-		yield();
+-	}
++	/* try to allocate the request struct */
++	req = nfs_page_alloc();
++	if (req == NULL)
++		return ERR_PTR(-ENOMEM);
+ 
+ 	/* Initialize the request struct. Initially, we assume a
+ 	 * long write-back delay. This will be adjusted in
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 11:01:00.372574292 -0400
+@@ -224,35 +224,60 @@ static int nfs_proc_readlink(struct inod
+ 	return status;
+ }
+ 
++struct nfs_createdata {
++	struct nfs_createargs arg;
++	struct nfs_diropok res;
++	struct nfs_fh fhandle;
++	struct nfs_fattr fattr;
++};
++
++static struct nfs_createdata *nfs_alloc_createdata(struct inode *dir,
++		struct dentry *dentry, struct iattr *sattr)
++{
++	struct nfs_createdata *data;
++
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++
++	if (data != NULL) {
++		data->arg.fh = NFS_FH(dir);
++		data->arg.name = dentry->d_name.name;
++		data->arg.len = dentry->d_name.len;
++		data->arg.sattr = sattr;
++		nfs_fattr_init(&data->fattr);
++		data->fhandle.size = 0;
++		data->res.fh = &data->fhandle;
++		data->res.fattr = &data->fattr;
++	}
++	return data;
++};
++
++static void nfs_free_createdata(const struct nfs_createdata *data)
++{
++	kfree(data);
++}
++
+ static int
+ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ 		int flags, struct nameidata *nd)
+ {
+-	struct nfs_fh		fhandle;
+-	struct nfs_fattr	fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+-	nfs_fattr_init(&fattr);
+ 	dprintk("NFS call  create %s\n", dentry->d_name.name);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply create: %d\n", status);
+ 	return status;
+ }
+@@ -264,24 +289,12 @@ static int
+ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ 	       dev_t rdev)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int status, mode;
++	umode_t mode;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
+ 
+@@ -294,17 +307,24 @@ nfs_proc_mknod(struct inode *dir, struct
+ 		sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
+ 	}
+ 
+-	nfs_fattr_init(&fattr);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 
+ 	if (status == -EINVAL && S_ISFIFO(mode)) {
+ 		sattr->ia_mode = mode;
+-		nfs_fattr_init(&fattr);
++		nfs_fattr_init(data->res.fattr);
+ 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	}
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply mknod: %d\n", status);
+ 	return status;
+ }
+@@ -398,8 +418,8 @@ static int
+ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+ 		 unsigned int len, struct iattr *sattr)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
++	struct nfs_fh *fh;
++	struct nfs_fattr *fattr;
+ 	struct nfs_symlinkargs	arg = {
+ 		.fromfh		= NFS_FH(dir),
+ 		.fromname	= dentry->d_name.name,
+@@ -412,12 +432,18 @@ nfs_proc_symlink(struct inode *dir, stru
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_SYMLINK],
+ 		.rpc_argp	= &arg,
+ 	};
+-	int			status;
++	int status = -ENAMETOOLONG;
++
++	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+ 
+ 	if (len > NFS2_MAXPATHLEN)
+-		return -ENAMETOOLONG;
++		goto out;
+ 
+-	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
++	fh = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	status = -ENOMEM;
++	if (fh == NULL || fattr == NULL)
++		goto out;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -427,12 +453,12 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 * filehandle size to zero indicates to nfs_instantiate that it
+ 	 * should fill in the data with a LOOKUP call on the wire.
+ 	 */
+-	if (status == 0) {
+-		nfs_fattr_init(&fattr);
+-		fhandle.size = 0;
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+-	}
++	if (status == 0)
++		status = nfs_instantiate(dentry, fh, fattr);
+ 
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fh);
++out:
+ 	dprintk("NFS reply symlink: %d\n", status);
+ 	return status;
+ }
+@@ -440,31 +466,25 @@ nfs_proc_symlink(struct inode *dir, stru
+ static int
+ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_MKDIR],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+-	nfs_fattr_init(&fattr);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply mkdir: %d\n", status);
+ 	return status;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 11:01:00.373574317 -0400
+@@ -40,7 +40,7 @@ static mempool_t *nfs_rdata_mempool;
+ 
+ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+ {
+-	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
++	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
+ 
+ 	if (p) {
+ 		memset(p, 0, sizeof(*p));
+@@ -50,7 +50,7 @@ struct nfs_read_data *nfs_readdata_alloc
+ 		if (pagecount <= ARRAY_SIZE(p->page_array))
+ 			p->pagevec = p->page_array;
+ 		else {
+-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
++			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ 			if (!p->pagevec) {
+ 				mempool_free(p, nfs_rdata_mempool);
+ 				p = NULL;
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 11:00:23.794511661 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 11:01:00.374564179 -0400
+@@ -141,7 +141,6 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_resvport, "resvport" },
+ 	{ Opt_noresvport, "noresvport" },
+ 	{ Opt_fscache, "fsc" },
+-	{ Opt_fscache_uniq, "fsc=%s" },
+ 	{ Opt_nofscache, "nofsc" },
+ 
+ 	{ Opt_port, "port=%s" },
+@@ -171,6 +170,7 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_mountaddr, "mountaddr=%s" },
+ 
+ 	{ Opt_lookupcache, "lookupcache=%s" },
++	{ Opt_fscache_uniq, "fsc=%s" },
+ 
+ 	{ Opt_err, NULL }
+ };
+@@ -423,15 +423,19 @@ static int nfs_statfs(struct dentry *den
+ 	unsigned char blockbits;
+ 	unsigned long blockres;
+ 	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
+-	struct nfs_fattr fattr;
+-	struct nfs_fsstat res = {
+-			.fattr = &fattr,
+-	};
+-	int error;
++	struct nfs_fsstat res;
++	int error = -ENOMEM;
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		goto out_err;
+ 
+ 	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
++
++	nfs_free_fattr(res.fattr);
+ 	if (error < 0)
+ 		goto out_err;
++
+ 	buf->f_type = NFS_SUPER_MAGIC;
+ 
+ 	/*
+@@ -1060,14 +1064,6 @@ static int nfs_parse_mount_options(char 
+ 			kfree(mnt->fscache_uniq);
+ 			mnt->fscache_uniq = NULL;
+ 			break;
+-		case Opt_fscache_uniq:
+-			string = match_strdup(args);
+-			if (!string)
+-				goto out_nomem;
+-			kfree(mnt->fscache_uniq);
+-			mnt->fscache_uniq = string;
+-			mnt->options |= NFS_OPTION_FSCACHE;
+-			break;
+ 
+ 		/*
+ 		 * options that take numeric values
+@@ -1398,6 +1394,14 @@ static int nfs_parse_mount_options(char 
+ 					return 0;
+ 			};
+ 			break;
++		case Opt_fscache_uniq:
++			string = match_strdup(args);
++			if (string == NULL)
++				goto out_nomem;
++			kfree(mnt->fscache_uniq);
++			mnt->fscache_uniq = string;
++			mnt->options |= NFS_OPTION_FSCACHE;
++			break;
+ 
+ 		/*
+ 		 * Special options
+@@ -2186,7 +2190,7 @@ static int nfs_get_sb(struct file_system
+ 	int error = -ENOMEM;
+ 
+ 	data = nfs_alloc_parsed_mount_data(3);
+-	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
++	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+ 		goto out_free_fh;
+ 
+@@ -2261,7 +2265,7 @@ out:
+ 	kfree(data->fscache_uniq);
+ 	security_free_mnt_opts(&data->lsm_opts);
+ out_free_fh:
+-	kfree(mntfh);
++	nfs_free_fhandle(mntfh);
+ 	kfree(data);
+ 	return error;
+ 
+@@ -2570,7 +2574,7 @@ static int nfs4_remote_get_sb(struct fil
+ 	};
+ 	int error = -ENOMEM;
+ 
+-	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
++	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+ 		goto out_free_fh;
+ 
+@@ -2628,7 +2632,7 @@ static int nfs4_remote_get_sb(struct fil
+ out:
+ 	security_free_mnt_opts(&data->lsm_opts);
+ out_free_fh:
+-	kfree(mntfh);
++	nfs_free_fhandle(mntfh);
+ 	return error;
+ 
+ out_free:
+@@ -2683,41 +2687,120 @@ out_freepage:
+ 	free_page((unsigned long)page);
+ }
+ 
++struct nfs_referral_count {
++	struct list_head list;
++	const struct task_struct *task;
++	unsigned int referral_count;
++};
++
++static LIST_HEAD(nfs_referral_count_list);
++static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
++
++static struct nfs_referral_count *nfs_find_referral_count(void)
++{
++	struct nfs_referral_count *p;
++
++	list_for_each_entry(p, &nfs_referral_count_list, list) {
++		if (p->task == current)
++			return p;
++	}
++	return NULL;
++}
++
++#define NFS_MAX_NESTED_REFERRALS 2
++
++static int nfs_referral_loop_protect(void)
++{
++	struct nfs_referral_count *p, *new;
++	int ret = -ENOMEM;
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		goto out;
++	new->task = current;
++	new->referral_count = 1;
++
++	ret = 0;
++	spin_lock(&nfs_referral_count_list_lock);
++	p = nfs_find_referral_count();
++	if (p != NULL) {
++		if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
++			ret = -ELOOP;
++		else
++			p->referral_count++;
++	} else {
++		list_add(&new->list, &nfs_referral_count_list);
++		new = NULL;
++	}
++	spin_unlock(&nfs_referral_count_list_lock);
++	kfree(new);
++out:
++	return ret;
++}
++
++static void nfs_referral_loop_unprotect(void)
++{
++	struct nfs_referral_count *p;
++
++	spin_lock(&nfs_referral_count_list_lock);
++	p = nfs_find_referral_count();
++	p->referral_count--;
++	if (p->referral_count == 0)
++		list_del(&p->list);
++	else
++		p = NULL;
++	spin_unlock(&nfs_referral_count_list_lock);
++	kfree(p);
++}
++
+ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
+ 		const char *export_path, struct vfsmount *mnt_target)
+ {
++	struct nameidata *nd = NULL;
+ 	struct mnt_namespace *ns_private;
+-	struct nameidata nd;
+ 	struct super_block *s;
+ 	int ret;
+ 
++	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
++	if (nd == NULL)
++		return -ENOMEM;
++
+ 	ns_private = create_mnt_ns(root_mnt);
+ 	ret = PTR_ERR(ns_private);
+ 	if (IS_ERR(ns_private))
+ 		goto out_mntput;
+ 
++	ret = nfs_referral_loop_protect();
++	if (ret != 0)
++		goto out_put_mnt_ns;
++
+ 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
+-			export_path, LOOKUP_FOLLOW, &nd);
++			export_path, LOOKUP_FOLLOW, nd);
+ 
++	nfs_referral_loop_unprotect();
+ 	put_mnt_ns(ns_private);
+ 
+ 	if (ret != 0)
+ 		goto out_err;
+ 
+-	s = nd.path.mnt->mnt_sb;
++	s = nd->path.mnt->mnt_sb;
+ 	atomic_inc(&s->s_active);
+ 	mnt_target->mnt_sb = s;
+-	mnt_target->mnt_root = dget(nd.path.dentry);
++	mnt_target->mnt_root = dget(nd->path.dentry);
+ 
+ 	/* Correct the device pathname */
+-	nfs_fix_devname(&nd.path, mnt_target);
++	nfs_fix_devname(&nd->path, mnt_target);
+ 
+-	path_put(&nd.path);
++	path_put(&nd->path);
++	kfree(nd);
+ 	down_write(&s->s_umount);
+ 	return 0;
++out_put_mnt_ns:
++	put_mnt_ns(ns_private);
+ out_mntput:
+ 	mntput(root_mnt);
+ out_err:
++	kfree(nd);
+ 	return ret;
+ }
+ 
+@@ -2888,17 +2971,21 @@ static int nfs4_remote_referral_get_sb(s
+ 	struct super_block *s;
+ 	struct nfs_server *server;
+ 	struct dentry *mntroot;
+-	struct nfs_fh mntfh;
++	struct nfs_fh *mntfh;
+ 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ 	struct nfs_sb_mountdata sb_mntdata = {
+ 		.mntflags = flags,
+ 	};
+-	int error;
++	int error = -ENOMEM;
+ 
+ 	dprintk("--> nfs4_referral_get_sb()\n");
+ 
++	mntfh = nfs_alloc_fhandle();
++	if (mntfh == NULL)
++		goto out_err_nofh;
++
+ 	/* create a new volume representation */
+-	server = nfs4_create_referral_server(data, &mntfh);
++	server = nfs4_create_referral_server(data, mntfh);
+ 	if (IS_ERR(server)) {
+ 		error = PTR_ERR(server);
+ 		goto out_err_noserver;
+@@ -2930,7 +3017,7 @@ static int nfs4_remote_referral_get_sb(s
+ 		nfs_fscache_get_super_cookie(s, NULL, data);
+ 	}
+ 
+-	mntroot = nfs4_get_root(s, &mntfh);
++	mntroot = nfs4_get_root(s, mntfh);
+ 	if (IS_ERR(mntroot)) {
+ 		error = PTR_ERR(mntroot);
+ 		goto error_splat_super;
+@@ -2947,12 +3034,15 @@ static int nfs4_remote_referral_get_sb(s
+ 
+ 	security_sb_clone_mnt_opts(data->sb, s);
+ 
++	nfs_free_fhandle(mntfh);
+ 	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+ 	return 0;
+ 
+ out_err_nosb:
+ 	nfs_free_server(server);
+ out_err_noserver:
++	nfs_free_fhandle(mntfh);
++out_err_nofh:
+ 	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+ 	return error;
+ 
+@@ -2961,6 +3051,7 @@ error_splat_super:
+ 		bdi_unregister(&server->backing_dev_info);
+ error_splat_bdi:
+ 	deactivate_locked_super(s);
++	nfs_free_fhandle(mntfh);
+ 	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+ 	return error;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 11:01:00.375554592 -0400
+@@ -23,6 +23,7 @@ struct nfs_unlinkdata {
+ 	struct nfs_removeres res;
+ 	struct inode *dir;
+ 	struct rpc_cred	*cred;
++	struct nfs_fattr dir_attr;
+ };
+ 
+ /**
+@@ -169,7 +170,7 @@ static int nfs_do_call_unlink(struct den
+ 	}
+ 	nfs_sb_active(dir->i_sb);
+ 	data->args.fh = NFS_FH(dir);
+-	nfs_fattr_init(&data->res.dir_attr);
++	nfs_fattr_init(data->res.dir_attr);
+ 
+ 	NFS_PROTO(dir)->unlink_setup(&msg, dir);
+ 
+@@ -259,6 +260,7 @@ nfs_async_unlink(struct inode *dir, stru
+ 		goto out_free;
+ 	}
+ 	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	data->res.dir_attr = &data->dir_attr;
+ 
+ 	status = -EBUSY;
+ 	spin_lock(&dentry->d_lock);
+diff -up linux-2.6.34.noarch/include/linux/ktime.h.orig linux-2.6.34.noarch/include/linux/ktime.h
+--- linux-2.6.34.noarch/include/linux/ktime.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/ktime.h	2010-08-23 11:01:00.377554285 -0400
+@@ -130,7 +130,7 @@ static inline ktime_t timeval_to_ktime(s
+ /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
+ #define ktime_to_ns(kt)			((kt).tv64)
+ 
+-#else
++#else	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+ 
+ /*
+  * Helper macros/inlines to get the ktime_t math right in the timespec
+@@ -275,7 +275,7 @@ static inline s64 ktime_to_ns(const ktim
+ 	return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec;
+ }
+ 
+-#endif
++#endif	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+ 
+ /**
+  * ktime_equal - Compares two ktime_t variables to see if they are equal
+@@ -295,6 +295,12 @@ static inline s64 ktime_to_us(const ktim
+ 	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
+ }
+ 
++static inline s64 ktime_to_ms(const ktime_t kt)
++{
++	struct timeval tv = ktime_to_timeval(kt);
++	return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
++}
++
+ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
+ {
+        return ktime_to_us(ktime_sub(later, earlier));
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 11:00:23.822502111 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 11:01:00.378563926 -0400
+@@ -356,6 +356,20 @@ extern struct nfs_open_context *nfs_find
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
++extern struct nfs_fattr *nfs_alloc_fattr(void);
++
++static inline void nfs_free_fattr(const struct nfs_fattr *fattr)
++{
++	kfree(fattr);
++}
++
++extern struct nfs_fh *nfs_alloc_fhandle(void);
++
++static inline void nfs_free_fhandle(const struct nfs_fh *fh)
++{
++	kfree(fh);
++}
++
+ /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
+ extern __be32 root_nfs_parse_addr(char *name); /*__init*/
+ extern unsigned long nfs_inc_attr_generation_counter(void);
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 11:01:00.380553887 -0400
+@@ -44,7 +44,6 @@ struct nfs_client {
+ 
+ #ifdef CONFIG_NFS_V4
+ 	u64			cl_clientid;	/* constant */
+-	nfs4_verifier		cl_confirm;
+ 	unsigned long		cl_state;
+ 
+ 	struct rb_root		cl_openowner_id;
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 11:01:00.381564072 -0400
+@@ -386,8 +386,8 @@ struct nfs_removeargs {
+ 
+ struct nfs_removeres {
+ 	const struct nfs_server *server;
++	struct nfs_fattr	*dir_attr;
+ 	struct nfs4_change_info	cinfo;
+-	struct nfs_fattr	dir_attr;
+ 	struct nfs4_sequence_res 	seq_res;
+ };
+ 
+@@ -824,6 +824,11 @@ struct nfs4_setclientid {
+ 	u32				sc_cb_ident;
+ };
+ 
++struct nfs4_setclientid_res {
++	u64				clientid;
++	nfs4_verifier			confirm;
++};
++
+ struct nfs4_statfs_arg {
+ 	const struct nfs_fh *		fh;
+ 	const u32 *			bitmask;
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h.orig linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h	2010-08-23 11:01:00.382564026 -0400
+@@ -82,6 +82,7 @@ struct gss_cred {
+ 	enum rpc_gss_svc	gc_service;
+ 	struct gss_cl_ctx	*gc_ctx;
+ 	struct gss_upcall_msg	*gc_upcall;
++	unsigned long		gc_upcall_timestamp;
+ 	unsigned char		gc_machine_cred : 1;
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/auth.h.orig linux-2.6.34.noarch/include/linux/sunrpc/auth.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/auth.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/auth.h	2010-08-23 11:01:00.382564026 -0400
+@@ -54,6 +54,7 @@ struct rpc_cred {
+ #define RPCAUTH_CRED_NEW	0
+ #define RPCAUTH_CRED_UPTODATE	1
+ #define RPCAUTH_CRED_HASHED	2
++#define RPCAUTH_CRED_NEGATIVE	3
+ 
+ #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h.orig linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h	2010-08-23 11:01:00.383574314 -0400
+@@ -35,7 +35,8 @@ int gss_import_sec_context(
+ 		const void*		input_token,
+ 		size_t			bufsize,
+ 		struct gss_api_mech	*mech,
+-		struct gss_ctx		**ctx_id);
++		struct gss_ctx		**ctx_id,
++		gfp_t			gfp_mask);
+ u32 gss_get_mic(
+ 		struct gss_ctx		*ctx_id,
+ 		struct xdr_buf		*message,
+@@ -80,6 +81,8 @@ struct gss_api_mech {
+ 	/* pseudoflavors supported by this mechanism: */
+ 	int			gm_pf_num;
+ 	struct pf_desc *	gm_pfs;
++	/* Should the following be a callback operation instead? */
++	const char		*gm_upcall_enctypes;
+ };
+ 
+ /* and must provide the following operations: */
+@@ -87,7 +90,8 @@ struct gss_api_ops {
+ 	int (*gss_import_sec_context)(
+ 			const void		*input_token,
+ 			size_t			bufsize,
+-			struct gss_ctx		*ctx_id);
++			struct gss_ctx		*ctx_id,
++			gfp_t			gfp_mask);
+ 	u32 (*gss_get_mic)(
+ 			struct gss_ctx		*ctx_id,
+ 			struct xdr_buf		*message,
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h.orig linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h	2010-08-23 11:01:00.383574314 -0400
+@@ -4,7 +4,7 @@
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
+  *  lib/gssapi/krb5/gssapiP_krb5.h, and others
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -36,17 +36,86 @@
+  *
+  */
+ 
++#include <linux/crypto.h>
+ #include <linux/sunrpc/auth_gss.h>
+ #include <linux/sunrpc/gss_err.h>
+ #include <linux/sunrpc/gss_asn1.h>
+ 
++/* Length of constant used in key derivation */
++#define GSS_KRB5_K5CLENGTH (5)
++
++/* Maximum key length (in bytes) for the supported crypto algorithms*/
++#define GSS_KRB5_MAX_KEYLEN (32)
++
++/* Maximum checksum function output for the supported crypto algorithms */
++#define GSS_KRB5_MAX_CKSUM_LEN  (20)
++
++/* Maximum blocksize for the supported crypto algorithms */
++#define GSS_KRB5_MAX_BLOCKSIZE  (16)
++
++struct krb5_ctx;
++
++struct gss_krb5_enctype {
++	const u32		etype;		/* encryption (key) type */
++	const u32		ctype;		/* checksum type */
++	const char		*name;		/* "friendly" name */
++	const char		*encrypt_name;	/* crypto encrypt name */
++	const char		*cksum_name;	/* crypto checksum name */
++	const u16		signalg;	/* signing algorithm */
++	const u16		sealalg;	/* sealing algorithm */
++	const u32		blocksize;	/* encryption blocksize */
++	const u32		conflen;	/* confounder length
++						   (normally the same as
++						   the blocksize) */
++	const u32		cksumlength;	/* checksum length */
++	const u32		keyed_cksum;	/* is it a keyed cksum? */
++	const u32		keybytes;	/* raw key len, in bytes */
++	const u32		keylength;	/* final key len, in bytes */
++	u32 (*encrypt) (struct crypto_blkcipher *tfm,
++			void *iv, void *in, void *out,
++			int length);		/* encryption function */
++	u32 (*decrypt) (struct crypto_blkcipher *tfm,
++			void *iv, void *in, void *out,
++			int length);		/* decryption function */
++	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
++		       struct xdr_netobj *in,
++		       struct xdr_netobj *out);	/* complete key generation */
++	u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
++			   struct xdr_buf *buf, int ec,
++			   struct page **pages); /* v2 encryption function */
++	u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
++			   struct xdr_buf *buf, u32 *headskip,
++			   u32 *tailskip);	/* v2 decryption function */
++};
++
++/* krb5_ctx flags definitions */
++#define KRB5_CTX_FLAG_INITIATOR         0x00000001
++#define KRB5_CTX_FLAG_CFX               0x00000002
++#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY   0x00000004
++
+ struct krb5_ctx {
+ 	int			initiate; /* 1 = initiating, 0 = accepting */
++	u32			enctype;
++	u32			flags;
++	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
+ 	struct crypto_blkcipher	*enc;
+ 	struct crypto_blkcipher	*seq;
++	struct crypto_blkcipher *acceptor_enc;
++	struct crypto_blkcipher *initiator_enc;
++	struct crypto_blkcipher *acceptor_enc_aux;
++	struct crypto_blkcipher *initiator_enc_aux;
++	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
++	u8			cksum[GSS_KRB5_MAX_KEYLEN];
+ 	s32			endtime;
+ 	u32			seq_send;
++	u64			seq_send64;
+ 	struct xdr_netobj	mech_used;
++	u8			initiator_sign[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_sign[GSS_KRB5_MAX_KEYLEN];
++	u8			initiator_seal[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_seal[GSS_KRB5_MAX_KEYLEN];
++	u8			initiator_integ[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_integ[GSS_KRB5_MAX_KEYLEN];
+ };
+ 
+ extern spinlock_t krb5_seq_lock;
+@@ -57,6 +126,18 @@ extern spinlock_t krb5_seq_lock;
+ #define KG_TOK_MIC_MSG    0x0101
+ #define KG_TOK_WRAP_MSG   0x0201
+ 
++#define KG2_TOK_INITIAL     0x0101
++#define KG2_TOK_RESPONSE    0x0202
++#define KG2_TOK_MIC         0x0404
++#define KG2_TOK_WRAP        0x0504
++
++#define KG2_TOKEN_FLAG_SENTBYACCEPTOR   0x01
++#define KG2_TOKEN_FLAG_SEALED           0x02
++#define KG2_TOKEN_FLAG_ACCEPTORSUBKEY   0x04
++
++#define KG2_RESP_FLAG_ERROR             0x0001
++#define KG2_RESP_FLAG_DELEG_OK          0x0002
++
+ enum sgn_alg {
+ 	SGN_ALG_DES_MAC_MD5 = 0x0000,
+ 	SGN_ALG_MD2_5 = 0x0001,
+@@ -81,6 +162,9 @@ enum seal_alg {
+ #define CKSUMTYPE_RSA_MD5_DES		0x0008
+ #define CKSUMTYPE_NIST_SHA		0x0009
+ #define CKSUMTYPE_HMAC_SHA1_DES3	0x000c
++#define CKSUMTYPE_HMAC_SHA1_96_AES128   0x000f
++#define CKSUMTYPE_HMAC_SHA1_96_AES256   0x0010
++#define CKSUMTYPE_HMAC_MD5_ARCFOUR      -138 /* Microsoft md5 hmac cksumtype */
+ 
+ /* from gssapi_err_krb5.h */
+ #define KG_CCACHE_NOMATCH                        (39756032L)
+@@ -111,11 +195,56 @@ enum seal_alg {
+ #define ENCTYPE_DES3_CBC_RAW    0x0006	/* DES-3 cbc mode raw */
+ #define ENCTYPE_DES_HMAC_SHA1   0x0008
+ #define ENCTYPE_DES3_CBC_SHA1   0x0010
++#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011
++#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012
++#define ENCTYPE_ARCFOUR_HMAC            0x0017
++#define ENCTYPE_ARCFOUR_HMAC_EXP        0x0018
+ #define ENCTYPE_UNKNOWN         0x01ff
+ 
+-s32
+-make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body,
+-		   int body_offset, struct xdr_netobj *cksum);
++/*
++ * Constants used for key derivation
++ */
++/* for 3DES */
++#define KG_USAGE_SEAL (22)
++#define KG_USAGE_SIGN (23)
++#define KG_USAGE_SEQ  (24)
++
++/* from rfc3961 */
++#define KEY_USAGE_SEED_CHECKSUM         (0x99)
++#define KEY_USAGE_SEED_ENCRYPTION       (0xAA)
++#define KEY_USAGE_SEED_INTEGRITY        (0x55)
++
++/* from rfc4121 */
++#define KG_USAGE_ACCEPTOR_SEAL  (22)
++#define KG_USAGE_ACCEPTOR_SIGN  (23)
++#define KG_USAGE_INITIATOR_SEAL (24)
++#define KG_USAGE_INITIATOR_SIGN (25)
++
++/*
++ * This compile-time check verifies that we will not exceed the
++ * slack space allotted by the client and server auth_gss code
++ * before they call gss_wrap().
++ */
++#define GSS_KRB5_MAX_SLACK_NEEDED \
++	(GSS_KRB5_TOK_HDR_LEN     /* gss token header */         \
++	+ GSS_KRB5_MAX_CKSUM_LEN  /* gss token checksum */       \
++	+ GSS_KRB5_MAX_BLOCKSIZE  /* confounder */               \
++	+ GSS_KRB5_MAX_BLOCKSIZE  /* possible padding */         \
++	+ GSS_KRB5_TOK_HDR_LEN    /* encrypted hdr in v2 token */\
++	+ GSS_KRB5_MAX_CKSUM_LEN  /* encryption hmac */          \
++	+ 4 + 4                   /* RPC verifier */             \
++	+ GSS_KRB5_TOK_HDR_LEN                                   \
++	+ GSS_KRB5_MAX_CKSUM_LEN)
++
++u32
++make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
++		struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		unsigned int usage, struct xdr_netobj *cksumout);
++
++u32
++make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen,
++		 struct xdr_buf *body, int body_offset, u8 *key,
++		 unsigned int usage, struct xdr_netobj *cksum);
+ 
+ u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
+ 		struct xdr_netobj *);
+@@ -149,11 +278,54 @@ gss_decrypt_xdr_buf(struct crypto_blkcip
+ 		    int offset);
+ 
+ s32
+-krb5_make_seq_num(struct crypto_blkcipher *key,
++krb5_make_seq_num(struct krb5_ctx *kctx,
++		struct crypto_blkcipher *key,
+ 		int direction,
+ 		u32 seqnum, unsigned char *cksum, unsigned char *buf);
+ 
+ s32
+-krb5_get_seq_num(struct crypto_blkcipher *key,
++krb5_get_seq_num(struct krb5_ctx *kctx,
+ 	       unsigned char *cksum,
+ 	       unsigned char *buf, int *direction, u32 *seqnum);
++
++int
++xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen);
++
++u32
++krb5_derive_key(const struct gss_krb5_enctype *gk5e,
++		const struct xdr_netobj *inkey,
++		struct xdr_netobj *outkey,
++		const struct xdr_netobj *in_constant,
++		gfp_t gfp_mask);
++
++u32
++gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
++		       struct xdr_netobj *randombits,
++		       struct xdr_netobj *key);
++
++u32
++gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
++		      struct xdr_netobj *randombits,
++		      struct xdr_netobj *key);
++
++u32
++gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, int ec,
++		     struct page **pages);
++
++u32
++gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, u32 *plainoffset,
++		     u32 *plainlen);
++
++int
++krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
++		       struct crypto_blkcipher *cipher,
++		       unsigned char *cksum);
++
++int
++krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
++		       struct crypto_blkcipher *cipher,
++		       s32 seqnum);
++void
++gss_krb5_make_confounder(char *p, u32 conflen);
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/metrics.h.orig linux-2.6.34.noarch/include/linux/sunrpc/metrics.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/metrics.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/metrics.h	2010-08-23 11:01:00.384611889 -0400
+@@ -26,6 +26,7 @@
+ #define _LINUX_SUNRPC_METRICS_H
+ 
+ #include <linux/seq_file.h>
++#include <linux/ktime.h>
+ 
+ #define RPC_IOSTATS_VERS	"1.0"
+ 
+@@ -58,9 +59,9 @@ struct rpc_iostats {
+ 	 * and the total time the request spent from init to release
+ 	 * are measured.
+ 	 */
+-	unsigned long long	om_queue,	/* jiffies queued for xmit */
+-				om_rtt,		/* jiffies for RPC RTT */
+-				om_execute;	/* jiffies for RPC execution */
++	ktime_t			om_queue,	/* queued for xmit */
++				om_rtt,		/* RPC RTT */
++				om_execute;	/* RPC execution */
+ } ____cacheline_aligned;
+ 
+ struct rpc_task;
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/sched.h.orig linux-2.6.34.noarch/include/linux/sunrpc/sched.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/sched.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/sched.h	2010-08-23 11:01:00.385361873 -0400
+@@ -10,6 +10,7 @@
+ #define _LINUX_SUNRPC_SCHED_H_
+ 
+ #include <linux/timer.h>
++#include <linux/ktime.h>
+ #include <linux/sunrpc/types.h>
+ #include <linux/spinlock.h>
+ #include <linux/wait.h>
+@@ -40,21 +41,15 @@ struct rpc_wait {
+  * This is the RPC task struct
+  */
+ struct rpc_task {
+-#ifdef RPC_DEBUG
+-	unsigned long		tk_magic;	/* 0xf00baa */
+-#endif
+ 	atomic_t		tk_count;	/* Reference count */
+ 	struct list_head	tk_task;	/* global list of tasks */
+ 	struct rpc_clnt *	tk_client;	/* RPC client */
+ 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
+-	int			tk_status;	/* result of last operation */
+ 
+ 	/*
+ 	 * RPC call state
+ 	 */
+ 	struct rpc_message	tk_msg;		/* RPC call info */
+-	__u8			tk_garb_retry;
+-	__u8			tk_cred_retry;
+ 
+ 	/*
+ 	 * callback	to be executed after waking up
+@@ -67,7 +62,6 @@ struct rpc_task {
+ 	void *			tk_calldata;
+ 
+ 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
+-	unsigned short		tk_flags;	/* misc flags */
+ 	unsigned long		tk_runstate;	/* Task run status */
+ 	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
+ 						 * be any workqueue
+@@ -78,17 +72,19 @@ struct rpc_task {
+ 		struct rpc_wait		tk_wait;	/* RPC wait */
+ 	} u;
+ 
+-	unsigned short		tk_timeouts;	/* maj timeouts */
+-	size_t			tk_bytes_sent;	/* total bytes sent */
+-	unsigned long		tk_start;	/* RPC task init timestamp */
+-	long			tk_rtt;		/* round-trip time (jiffies) */
++	ktime_t			tk_start;	/* RPC task init timestamp */
+ 
+ 	pid_t			tk_owner;	/* Process id for batching tasks */
+-	unsigned char		tk_priority : 2;/* Task priority */
++	int			tk_status;	/* result of last operation */
++	unsigned short		tk_flags;	/* misc flags */
++	unsigned short		tk_timeouts;	/* maj timeouts */
+ 
+ #ifdef RPC_DEBUG
+ 	unsigned short		tk_pid;		/* debugging aid */
+ #endif
++	unsigned char		tk_priority : 2,/* Task priority */
++				tk_garb_retry : 2,
++				tk_cred_retry : 2;
+ };
+ #define tk_xprt			tk_client->cl_xprt
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 11:01:00.385361873 -0400
+@@ -1,7 +1,10 @@
+ /*
+- * include/linux/sunrpc/xdr.h
++ * XDR standard data types and function declarations
+  *
+  * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
++ *
++ * Based on:
++ *   RFC 4506 "XDR: External Data Representation Standard", May 2006
+  */
+ 
+ #ifndef _SUNRPC_XDR_H_
+@@ -62,7 +65,6 @@ struct xdr_buf {
+ 
+ 	unsigned int	buflen,		/* Total length of storage buffer */
+ 			len;		/* Length of XDR encoded message */
+-
+ };
+ 
+ /*
+@@ -178,7 +180,7 @@ struct xdr_array2_desc {
+ };
+ 
+ extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+-                             struct xdr_array2_desc *desc);
++			     struct xdr_array2_desc *desc);
+ extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+ 			     struct xdr_array2_desc *desc);
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xprt.h	2010-08-23 11:01:00.386574704 -0400
+@@ -13,6 +13,7 @@
+ #include <linux/socket.h>
+ #include <linux/in.h>
+ #include <linux/kref.h>
++#include <linux/ktime.h>
+ #include <linux/sunrpc/sched.h>
+ #include <linux/sunrpc/xdr.h>
+ #include <linux/sunrpc/msg_prot.h>
+@@ -65,8 +66,6 @@ struct rpc_rqst {
+ 	struct rpc_task *	rq_task;	/* RPC task data */
+ 	__be32			rq_xid;		/* request XID */
+ 	int			rq_cong;	/* has incremented xprt->cong */
+-	int			rq_reply_bytes_recvd;	/* number of reply */
+-							/* bytes received */
+ 	u32			rq_seqno;	/* gss seq no. used on req. */
+ 	int			rq_enc_pages_num;
+ 	struct page		**rq_enc_pages;	/* scratch pages for use by
+@@ -77,12 +76,16 @@ struct rpc_rqst {
+ 	__u32 *			rq_buffer;	/* XDR encode buffer */
+ 	size_t			rq_callsize,
+ 				rq_rcvsize;
++	size_t			rq_xmit_bytes_sent;	/* total bytes sent */
++	size_t			rq_reply_bytes_recvd;	/* total reply bytes */
++							/* received */
+ 
+ 	struct xdr_buf		rq_private_buf;		/* The receive buffer
+ 							 * used in the softirq.
+ 							 */
+ 	unsigned long		rq_majortimeo;	/* major timeout alarm */
+ 	unsigned long		rq_timeout;	/* Current timeout value */
++	ktime_t			rq_rtt;		/* round-trip time */
+ 	unsigned int		rq_retries;	/* # of retries */
+ 	unsigned int		rq_connect_cookie;
+ 						/* A cookie used to track the
+@@ -94,7 +97,7 @@ struct rpc_rqst {
+ 	 */
+ 	u32			rq_bytes_sent;	/* Bytes we have sent */
+ 
+-	unsigned long		rq_xtime;	/* when transmitted */
++	ktime_t			rq_xtime;	/* transmit time stamp */
+ 	int			rq_ntrans;
+ 
+ #if defined(CONFIG_NFS_V4_1)
+@@ -174,8 +177,7 @@ struct rpc_xprt {
+ 	/*
+ 	 * Connection of transports
+ 	 */
+-	unsigned long		connect_timeout,
+-				bind_timeout,
++	unsigned long		bind_timeout,
+ 				reestablish_timeout;
+ 	unsigned int		connect_cookie;	/* A cookie that gets bumped
+ 						   every time the transport
+@@ -294,7 +296,6 @@ void			xprt_set_retrans_timeout_rtt(stru
+ void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
+ void			xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
+ void			xprt_write_space(struct rpc_xprt *xprt);
+-void			xprt_update_rtt(struct rpc_task *task);
+ void			xprt_adjust_cwnd(struct rpc_task *task, int result);
+ struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
+ void			xprt_complete_rqst(struct rpc_task *task, int copied);
+diff -up linux-2.6.34.noarch/net/sunrpc/auth.c.orig linux-2.6.34.noarch/net/sunrpc/auth.c
+--- linux-2.6.34.noarch/net/sunrpc/auth.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth.c	2010-08-23 11:01:00.387574079 -0400
+@@ -236,10 +236,15 @@ rpcauth_prune_expired(struct list_head *
+ 
+ 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
+ 
+-		/* Enforce a 60 second garbage collection moratorium */
++		if (nr_to_scan-- == 0)
++			break;
++		/*
++		 * Enforce a 60 second garbage collection moratorium
++		 * Note that the cred_unused list must be time-ordered.
++		 */
+ 		if (time_in_range(cred->cr_expire, expired, jiffies) &&
+ 		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+-			continue;
++			return 0;
+ 
+ 		list_del_init(&cred->cr_lru);
+ 		number_cred_unused--;
+@@ -252,13 +257,10 @@ rpcauth_prune_expired(struct list_head *
+ 			get_rpccred(cred);
+ 			list_add_tail(&cred->cr_lru, free);
+ 			rpcauth_unhash_cred_locked(cred);
+-			nr_to_scan--;
+ 		}
+ 		spin_unlock(cache_lock);
+-		if (nr_to_scan == 0)
+-			break;
+ 	}
+-	return nr_to_scan;
++	return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+ /*
+@@ -270,11 +272,12 @@ rpcauth_cache_shrinker(int nr_to_scan, g
+ 	LIST_HEAD(free);
+ 	int res;
+ 
++	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
++		return (nr_to_scan == 0) ? 0 : -1;
+ 	if (list_empty(&cred_unused))
+ 		return 0;
+ 	spin_lock(&rpc_credcache_lock);
+-	nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
+-	res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
++	res = rpcauth_prune_expired(&free, nr_to_scan);
+ 	spin_unlock(&rpc_credcache_lock);
+ 	rpcauth_destroy_credlist(&free);
+ 	return res;
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c	2010-08-23 11:01:00.388574680 -0400
+@@ -57,11 +57,14 @@ static const struct rpc_authops authgss_
+ static const struct rpc_credops gss_credops;
+ static const struct rpc_credops gss_nullops;
+ 
++#define GSS_RETRY_EXPIRED 5
++static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
++
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+ #endif
+ 
+-#define GSS_CRED_SLACK		1024
++#define GSS_CRED_SLACK		(RPC_MAX_AUTH_SIZE * 2)
+ /* length of a krb5 verifier (48), plus data added before arguments when
+  * using integrity (two 4-byte integers): */
+ #define GSS_VERF_SLACK		100
+@@ -229,7 +232,7 @@ gss_fill_context(const void *p, const vo
+ 		p = ERR_PTR(-EFAULT);
+ 		goto err;
+ 	}
+-	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
++	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
+ 	if (ret < 0) {
+ 		p = ERR_PTR(ret);
+ 		goto err;
+@@ -350,6 +353,24 @@ gss_unhash_msg(struct gss_upcall_msg *gs
+ }
+ 
+ static void
++gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
++{
++	switch (gss_msg->msg.errno) {
++	case 0:
++		if (gss_msg->ctx == NULL)
++			break;
++		clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
++		gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
++		break;
++	case -EKEYEXPIRED:
++		set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
++	}
++	gss_cred->gc_upcall_timestamp = jiffies;
++	gss_cred->gc_upcall = NULL;
++	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
++}
++
++static void
+ gss_upcall_callback(struct rpc_task *task)
+ {
+ 	struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+@@ -358,13 +379,9 @@ gss_upcall_callback(struct rpc_task *tas
+ 	struct inode *inode = &gss_msg->inode->vfs_inode;
+ 
+ 	spin_lock(&inode->i_lock);
+-	if (gss_msg->ctx)
+-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
+-	else
+-		task->tk_status = gss_msg->msg.errno;
+-	gss_cred->gc_upcall = NULL;
+-	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
++	gss_handle_downcall_result(gss_cred, gss_msg);
+ 	spin_unlock(&inode->i_lock);
++	task->tk_status = gss_msg->msg.errno;
+ 	gss_release_msg(gss_msg);
+ }
+ 
+@@ -377,11 +394,12 @@ static void gss_encode_v0_msg(struct gss
+ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
+ 				struct rpc_clnt *clnt, int machine_cred)
+ {
++	struct gss_api_mech *mech = gss_msg->auth->mech;
+ 	char *p = gss_msg->databuf;
+ 	int len = 0;
+ 
+ 	gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
+-				   gss_msg->auth->mech->gm_name,
++				   mech->gm_name,
+ 				   gss_msg->uid);
+ 	p += gss_msg->msg.len;
+ 	if (clnt->cl_principal) {
+@@ -398,6 +416,11 @@ static void gss_encode_v1_msg(struct gss
+ 		p += len;
+ 		gss_msg->msg.len += len;
+ 	}
++	if (mech->gm_upcall_enctypes) {
++		len = sprintf(p, mech->gm_upcall_enctypes);
++		p += len;
++		gss_msg->msg.len += len;
++	}
+ 	len = sprintf(p, "\n");
+ 	gss_msg->msg.len += len;
+ 
+@@ -507,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task
+ 	spin_lock(&inode->i_lock);
+ 	if (gss_cred->gc_upcall != NULL)
+ 		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
+-	else if (gss_msg->ctx != NULL) {
+-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
+-		gss_cred->gc_upcall = NULL;
+-		rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+-	} else if (gss_msg->msg.errno >= 0) {
++	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+ 		task->tk_timeout = 0;
+ 		gss_cred->gc_upcall = gss_msg;
+ 		/* gss_upcall_callback will release the reference to gss_upcall_msg */
+ 		atomic_inc(&gss_msg->count);
+ 		rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
+-	} else
++	} else {
++		gss_handle_downcall_result(gss_cred, gss_msg);
+ 		err = gss_msg->msg.errno;
++	}
+ 	spin_unlock(&inode->i_lock);
+ 	gss_release_msg(gss_msg);
+ out:
+@@ -1117,6 +1138,23 @@ static int gss_renew_cred(struct rpc_tas
+ 	return 0;
+ }
+ 
++static int gss_cred_is_negative_entry(struct rpc_cred *cred)
++{
++	if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
++		unsigned long now = jiffies;
++		unsigned long begin, expire;
++		struct gss_cred *gss_cred; 
++
++		gss_cred = container_of(cred, struct gss_cred, gc_base);
++		begin = gss_cred->gc_upcall_timestamp;
++		expire = begin + gss_expired_cred_retry_delay * HZ;
++
++		if (time_in_range_open(now, begin, expire))
++			return 1;
++	}
++	return 0;
++}
++
+ /*
+ * Refresh credentials. XXX - finish
+ */
+@@ -1126,6 +1164,9 @@ gss_refresh(struct rpc_task *task)
+ 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ 	int ret = 0;
+ 
++	if (gss_cred_is_negative_entry(cred))
++		return -EKEYEXPIRED;
++
+ 	if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
+ 			!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ 		ret = gss_renew_cred(task);
+@@ -1316,15 +1357,21 @@ gss_wrap_req_priv(struct rpc_cred *cred,
+ 	inpages = snd_buf->pages + first;
+ 	snd_buf->pages = rqstp->rq_enc_pages;
+ 	snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
+-	/* Give the tail its own page, in case we need extra space in the
+-	 * head when wrapping: */
++	/*
++	 * Give the tail its own page, in case we need extra space in the
++	 * head when wrapping:
++	 *
++	 * call_allocate() allocates twice the slack space required
++	 * by the authentication flavor to rq_callsize.
++	 * For GSS, slack is GSS_CRED_SLACK.
++	 */
+ 	if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+ 		tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
+ 		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
+ 		snd_buf->tail[0].iov_base = tmp;
+ 	}
+ 	maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
+-	/* RPC_SLACK_SPACE should prevent this ever happening: */
++	/* slack space should prevent this ever happening: */
+ 	BUG_ON(snd_buf->len > snd_buf->buflen);
+ 	status = -EIO;
+ 	/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
+@@ -1573,5 +1620,11 @@ static void __exit exit_rpcsec_gss(void)
+ }
+ 
+ MODULE_LICENSE("GPL");
++module_param_named(expired_cred_retry_delay,
++		   gss_expired_cred_retry_delay,
++		   uint, 0644);
++MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
++		"the RPC engine retries an expired credential");
++
+ module_init(init_rpcsec_gss)
+ module_exit(exit_rpcsec_gss)
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c	2010-08-23 11:01:00.390553891 -0400
+@@ -1,7 +1,7 @@
+ /*
+  *  linux/net/sunrpc/gss_krb5_crypto.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -41,6 +41,7 @@
+ #include <linux/crypto.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
++#include <linux/random.h>
+ #include <linux/sunrpc/gss_krb5.h>
+ #include <linux/sunrpc/xdr.h>
+ 
+@@ -58,13 +59,13 @@ krb5_encrypt(
+ {
+ 	u32 ret = -EINVAL;
+ 	struct scatterlist sg[1];
+-	u8 local_iv[16] = {0};
++	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+ 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ 
+ 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ 		goto out;
+ 
+-	if (crypto_blkcipher_ivsize(tfm) > 16) {
++	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
+ 			crypto_blkcipher_ivsize(tfm));
+ 		goto out;
+@@ -92,13 +93,13 @@ krb5_decrypt(
+ {
+ 	u32 ret = -EINVAL;
+ 	struct scatterlist sg[1];
+-	u8 local_iv[16] = {0};
++	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+ 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ 
+ 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ 		goto out;
+ 
+-	if (crypto_blkcipher_ivsize(tfm) > 16) {
++	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
+ 			crypto_blkcipher_ivsize(tfm));
+ 		goto out;
+@@ -123,21 +124,155 @@ checksummer(struct scatterlist *sg, void
+ 	return crypto_hash_update(desc, sg, sg->length);
+ }
+ 
+-/* checksum the plaintext data and hdrlen bytes of the token header */
+-s32
+-make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
+-		   int body_offset, struct xdr_netobj *cksum)
++static int
++arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
++{
++	unsigned int ms_usage;
++
++	switch (usage) {
++	case KG_USAGE_SIGN:
++		ms_usage = 15;
++		break;
++	case KG_USAGE_SEAL:
++		ms_usage = 13;
++		break;
++	default:
++		return EINVAL;;
++	}
++	salt[0] = (ms_usage >> 0) & 0xff;
++	salt[1] = (ms_usage >> 8) & 0xff;
++	salt[2] = (ms_usage >> 16) & 0xff;
++	salt[3] = (ms_usage >> 24) & 0xff;
++
++	return 0;
++}
++
++static u32
++make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
++		       struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		       unsigned int usage, struct xdr_netobj *cksumout)
+ {
+-	struct hash_desc                desc; /* XXX add to ctx? */
++	struct hash_desc                desc;
+ 	struct scatterlist              sg[1];
+ 	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	u8 rc4salt[4];
++	struct crypto_hash *md5;
++	struct crypto_hash *hmac_md5;
++
++	if (cksumkey == NULL)
++		return GSS_S_FAILURE;
++
++	if (cksumout->len < kctx->gk5e->cksumlength) {
++		dprintk("%s: checksum buffer length, %u, too small for %s\n",
++			__func__, cksumout->len, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++
++	if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
++		dprintk("%s: invalid usage value %u\n", __func__, usage);
++		return GSS_S_FAILURE;
++	}
++
++	md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(md5))
++		return GSS_S_FAILURE;
++
++	hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
++				     CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac_md5)) {
++		crypto_free_hash(md5);
++		return GSS_S_FAILURE;
++	}
++
++	desc.tfm = md5;
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	sg_init_one(sg, rc4salt, 4);
++	err = crypto_hash_update(&desc, sg, 4);
++	if (err)
++		goto out;
++
++	sg_init_one(sg, header, hdrlen);
++	err = crypto_hash_update(&desc, sg, hdrlen);
++	if (err)
++		goto out;
++	err = xdr_process_buf(body, body_offset, body->len - body_offset,
++			      checksummer, &desc);
++	if (err)
++		goto out;
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
++
++	desc.tfm = hmac_md5;
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
++	if (err)
++		goto out;
++
++	sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
++	err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
++				 checksumdata);
++	if (err)
++		goto out;
++
++	memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++	cksumout->len = kctx->gk5e->cksumlength;
++out:
++	crypto_free_hash(md5);
++	crypto_free_hash(hmac_md5);
++	return err ? GSS_S_FAILURE : 0;
++}
++
++/*
++ * checksum the plaintext data and hdrlen bytes of the token header
++ * The checksum is performed over the first 8 bytes of the
++ * gss token header and then over the data body
++ */
++u32
++make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
++	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
++	      unsigned int usage, struct xdr_netobj *cksumout)
++{
++	struct hash_desc                desc;
++	struct scatterlist              sg[1];
++	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	unsigned int checksumlen;
++
++	if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
++		return make_checksum_hmac_md5(kctx, header, hdrlen,
++					      body, body_offset,
++					      cksumkey, usage, cksumout);
++
++	if (cksumout->len < kctx->gk5e->cksumlength) {
++		dprintk("%s: checksum buffer length, %u, too small for %s\n",
++			__func__, cksumout->len, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
+ 
+-	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
++	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ 	if (IS_ERR(desc.tfm))
+ 		return GSS_S_FAILURE;
+-	cksum->len = crypto_hash_digestsize(desc.tfm);
+ 	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ 
++	checksumlen = crypto_hash_digestsize(desc.tfm);
++
++	if (cksumkey != NULL) {
++		err = crypto_hash_setkey(desc.tfm, cksumkey,
++					 kctx->gk5e->keylength);
++		if (err)
++			goto out;
++	}
++
+ 	err = crypto_hash_init(&desc);
+ 	if (err)
+ 		goto out;
+@@ -149,15 +284,109 @@ make_checksum(char *cksumname, char *hea
+ 			      checksummer, &desc);
+ 	if (err)
+ 		goto out;
+-	err = crypto_hash_final(&desc, cksum->data);
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
+ 
++	switch (kctx->gk5e->ctype) {
++	case CKSUMTYPE_RSA_MD5:
++		err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
++					  checksumdata, checksumlen);
++		if (err)
++			goto out;
++		memcpy(cksumout->data,
++		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
++		       kctx->gk5e->cksumlength);
++		break;
++	case CKSUMTYPE_HMAC_SHA1_DES3:
++		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++		break;
++	default:
++		BUG();
++		break;
++	}
++	cksumout->len = kctx->gk5e->cksumlength;
++out:
++	crypto_free_hash(desc.tfm);
++	return err ? GSS_S_FAILURE : 0;
++}
++
++/*
++ * checksum the plaintext data and hdrlen bytes of the token header
++ * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
++ * body then over the first 16 octets of the MIC token
++ * Inclusion of the header data in the calculation of the
++ * checksum is optional.
++ */
++u32
++make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
++		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		 unsigned int usage, struct xdr_netobj *cksumout)
++{
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	unsigned int checksumlen;
++
++	if (kctx->gk5e->keyed_cksum == 0) {
++		dprintk("%s: expected keyed hash for %s\n",
++			__func__, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++	if (cksumkey == NULL) {
++		dprintk("%s: no key supplied for %s\n",
++			__func__, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++
++	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
++							CRYPTO_ALG_ASYNC);
++	if (IS_ERR(desc.tfm))
++		return GSS_S_FAILURE;
++	checksumlen = crypto_hash_digestsize(desc.tfm);
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
++	if (err)
++		goto out;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	err = xdr_process_buf(body, body_offset, body->len - body_offset,
++			      checksummer, &desc);
++	if (err)
++		goto out;
++	if (header != NULL) {
++		sg_init_one(sg, header, hdrlen);
++		err = crypto_hash_update(&desc, sg, hdrlen);
++		if (err)
++			goto out;
++	}
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
++
++	cksumout->len = kctx->gk5e->cksumlength;
++
++	switch (kctx->gk5e->ctype) {
++	case CKSUMTYPE_HMAC_SHA1_96_AES128:
++	case CKSUMTYPE_HMAC_SHA1_96_AES256:
++		/* note that this truncates the hash */
++		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++		break;
++	default:
++		BUG();
++		break;
++	}
+ out:
+ 	crypto_free_hash(desc.tfm);
+ 	return err ? GSS_S_FAILURE : 0;
+ }
+ 
+ struct encryptor_desc {
+-	u8 iv[8]; /* XXX hard-coded blocksize */
++	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+ 	struct blkcipher_desc desc;
+ 	int pos;
+ 	struct xdr_buf *outbuf;
+@@ -198,7 +427,7 @@ encryptor(struct scatterlist *sg, void *
+ 	desc->fraglen += sg->length;
+ 	desc->pos += sg->length;
+ 
+-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
++	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ 	thislen -= fraglen;
+ 
+ 	if (thislen == 0)
+@@ -256,7 +485,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcip
+ }
+ 
+ struct decryptor_desc {
+-	u8 iv[8]; /* XXX hard-coded blocksize */
++	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+ 	struct blkcipher_desc desc;
+ 	struct scatterlist frags[4];
+ 	int fragno;
+@@ -278,7 +507,7 @@ decryptor(struct scatterlist *sg, void *
+ 	desc->fragno++;
+ 	desc->fraglen += sg->length;
+ 
+-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
++	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ 	thislen -= fraglen;
+ 
+ 	if (thislen == 0)
+@@ -325,3 +554,437 @@ gss_decrypt_xdr_buf(struct crypto_blkcip
+ 
+ 	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+ }
++
++/*
++ * This function makes the assumption that it was ultimately called
++ * from gss_wrap().
++ *
++ * The client auth_gss code moves any existing tail data into a
++ * separate page before calling gss_wrap.
++ * The server svcauth_gss code ensures that both the head and the
++ * tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
++ *
++ * Even with that guarantee, this function may be called more than
++ * once in the processing of gss_wrap().  The best we can do is
++ * verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
++ * largest expected shift will fit within RPC_MAX_AUTH_SIZE.
++ * At run-time we can verify that a single invocation of this
++ * function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
++ */
++
++int
++xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
++{
++	u8 *p;
++
++	if (shiftlen == 0)
++		return 0;
++
++	BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
++	BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
++
++	p = buf->head[0].iov_base + base;
++
++	memmove(p + shiftlen, p, buf->head[0].iov_len - base);
++
++	buf->head[0].iov_len += shiftlen;
++	buf->len += shiftlen;
++
++	return 0;
++}
++
++static u32
++gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
++		   u32 offset, u8 *iv, struct page **pages, int encrypt)
++{
++	u32 ret;
++	struct scatterlist sg[1];
++	struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
++	u8 data[crypto_blkcipher_blocksize(cipher) * 2];
++	struct page **save_pages;
++	u32 len = buf->len - offset;
++
++	BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2);
++
++	/*
++	 * For encryption, we want to read from the cleartext
++	 * page cache pages, and write the encrypted data to
++	 * the supplied xdr_buf pages.
++	 */
++	save_pages = buf->pages;
++	if (encrypt)
++		buf->pages = pages;
++
++	ret = read_bytes_from_xdr_buf(buf, offset, data, len);
++	buf->pages = save_pages;
++	if (ret)
++		goto out;
++
++	sg_init_one(sg, data, len);
++
++	if (encrypt)
++		ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
++	else
++		ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
++
++	if (ret)
++		goto out;
++
++	ret = write_bytes_to_xdr_buf(buf, offset, data, len);
++
++out:
++	return ret;
++}
++
++u32
++gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, int ec, struct page **pages)
++{
++	u32 err;
++	struct xdr_netobj hmac;
++	u8 *cksumkey;
++	u8 *ecptr;
++	struct crypto_blkcipher *cipher, *aux_cipher;
++	int blocksize;
++	struct page **save_pages;
++	int nblocks, nbytes;
++	struct encryptor_desc desc;
++	u32 cbcbytes;
++	unsigned int usage;
++
++	if (kctx->initiate) {
++		cipher = kctx->initiator_enc;
++		aux_cipher = kctx->initiator_enc_aux;
++		cksumkey = kctx->initiator_integ;
++		usage = KG_USAGE_INITIATOR_SEAL;
++	} else {
++		cipher = kctx->acceptor_enc;
++		aux_cipher = kctx->acceptor_enc_aux;
++		cksumkey = kctx->acceptor_integ;
++		usage = KG_USAGE_ACCEPTOR_SEAL;
++	}
++	blocksize = crypto_blkcipher_blocksize(cipher);
++
++	/* hide the gss token header and insert the confounder */
++	offset += GSS_KRB5_TOK_HDR_LEN;
++	if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
++		return GSS_S_FAILURE;
++	gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
++	offset -= GSS_KRB5_TOK_HDR_LEN;
++
++	if (buf->tail[0].iov_base != NULL) {
++		ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
++	} else {
++		buf->tail[0].iov_base = buf->head[0].iov_base
++							+ buf->head[0].iov_len;
++		buf->tail[0].iov_len = 0;
++		ecptr = buf->tail[0].iov_base;
++	}
++
++	memset(ecptr, 'X', ec);
++	buf->tail[0].iov_len += ec;
++	buf->len += ec;
++
++	/* copy plaintext gss token header after filler (if any) */
++	memcpy(ecptr + ec, buf->head[0].iov_base + offset,
++						GSS_KRB5_TOK_HDR_LEN);
++	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
++	buf->len += GSS_KRB5_TOK_HDR_LEN;
++
++	/* Do the HMAC */
++	hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
++	hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
++
++	/*
++	 * When we are called, pages points to the real page cache
++	 * data -- which we can't go and encrypt!  buf->pages points
++	 * to scratch pages which we are going to send off to the
++	 * client/server.  Swap in the plaintext pages to calculate
++	 * the hmac.
++	 */
++	save_pages = buf->pages;
++	buf->pages = pages;
++
++	err = make_checksum_v2(kctx, NULL, 0, buf,
++			       offset + GSS_KRB5_TOK_HDR_LEN,
++			       cksumkey, usage, &hmac);
++	buf->pages = save_pages;
++	if (err)
++		return GSS_S_FAILURE;
++
++	nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
++	nblocks = (nbytes + blocksize - 1) / blocksize;
++	cbcbytes = 0;
++	if (nblocks > 2)
++		cbcbytes = (nblocks - 2) * blocksize;
++
++	memset(desc.iv, 0, sizeof(desc.iv));
++
++	if (cbcbytes) {
++		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
++		desc.fragno = 0;
++		desc.fraglen = 0;
++		desc.pages = pages;
++		desc.outbuf = buf;
++		desc.desc.info = desc.iv;
++		desc.desc.flags = 0;
++		desc.desc.tfm = aux_cipher;
++
++		sg_init_table(desc.infrags, 4);
++		sg_init_table(desc.outfrags, 4);
++
++		err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
++				      cbcbytes, encryptor, &desc);
++		if (err)
++			goto out_err;
++	}
++
++	/* Make sure IV carries forward from any CBC results. */
++	err = gss_krb5_cts_crypt(cipher, buf,
++				 offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
++				 desc.iv, pages, 1);
++	if (err) {
++		err = GSS_S_FAILURE;
++		goto out_err;
++	}
++
++	/* Now update buf to account for HMAC */
++	buf->tail[0].iov_len += kctx->gk5e->cksumlength;
++	buf->len += kctx->gk5e->cksumlength;
++
++out_err:
++	if (err)
++		err = GSS_S_FAILURE;
++	return err;
++}
++
++u32
++gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
++		     u32 *headskip, u32 *tailskip)
++{
++	struct xdr_buf subbuf;
++	u32 ret = 0;
++	u8 *cksum_key;
++	struct crypto_blkcipher *cipher, *aux_cipher;
++	struct xdr_netobj our_hmac_obj;
++	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
++	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
++	int nblocks, blocksize, cbcbytes;
++	struct decryptor_desc desc;
++	unsigned int usage;
++
++	if (kctx->initiate) {
++		cipher = kctx->acceptor_enc;
++		aux_cipher = kctx->acceptor_enc_aux;
++		cksum_key = kctx->acceptor_integ;
++		usage = KG_USAGE_ACCEPTOR_SEAL;
++	} else {
++		cipher = kctx->initiator_enc;
++		aux_cipher = kctx->initiator_enc_aux;
++		cksum_key = kctx->initiator_integ;
++		usage = KG_USAGE_INITIATOR_SEAL;
++	}
++	blocksize = crypto_blkcipher_blocksize(cipher);
++
++
++	/* create a segment skipping the header and leaving out the checksum */
++	xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
++				    (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
++				     kctx->gk5e->cksumlength));
++
++	nblocks = (subbuf.len + blocksize - 1) / blocksize;
++
++	cbcbytes = 0;
++	if (nblocks > 2)
++		cbcbytes = (nblocks - 2) * blocksize;
++
++	memset(desc.iv, 0, sizeof(desc.iv));
++
++	if (cbcbytes) {
++		desc.fragno = 0;
++		desc.fraglen = 0;
++		desc.desc.info = desc.iv;
++		desc.desc.flags = 0;
++		desc.desc.tfm = aux_cipher;
++
++		sg_init_table(desc.frags, 4);
++
++		ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
++		if (ret)
++			goto out_err;
++	}
++
++	/* Make sure IV carries forward from any CBC results. */
++	ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
++	if (ret)
++		goto out_err;
++
++
++	/* Calculate our hmac over the plaintext data */
++	our_hmac_obj.len = sizeof(our_hmac);
++	our_hmac_obj.data = our_hmac;
++
++	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
++			       cksum_key, usage, &our_hmac_obj);
++	if (ret)
++		goto out_err;
++
++	/* Get the packet's hmac value */
++	ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
++				      pkt_hmac, kctx->gk5e->cksumlength);
++	if (ret)
++		goto out_err;
++
++	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
++		ret = GSS_S_BAD_SIG;
++		goto out_err;
++	}
++	*headskip = kctx->gk5e->conflen;
++	*tailskip = kctx->gk5e->cksumlength;
++out_err:
++	if (ret && ret != GSS_S_BAD_SIG)
++		ret = GSS_S_FAILURE;
++	return ret;
++}
++
++/*
++ * Compute Kseq given the initial session key and the checksum.
++ * Set the key of the given cipher.
++ */
++int
++krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
++		       unsigned char *cksum)
++{
++	struct crypto_hash *hmac;
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	u8 Kseq[GSS_KRB5_MAX_KEYLEN];
++	u32 zeroconstant = 0;
++	int err;
++
++	dprintk("%s: entered\n", __func__);
++
++	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld, allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
++		return PTR_ERR(hmac);
++	}
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err;
++
++	/* Compute intermediate Kseq from session key */
++	err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, &zeroconstant, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kseq);
++	if (err)
++		goto out_err;
++
++	/* Compute final Kseq from the checksum and intermediate Kseq */
++	err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_set_buf(sg, cksum, 8);
++
++	err = crypto_hash_digest(&desc, sg, 8, Kseq);
++	if (err)
++		goto out_err;
++
++	err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	err = 0;
++
++out_err:
++	crypto_free_hash(hmac);
++	dprintk("%s: returning %d\n", __func__, err);
++	return err;
++}
++
++/*
++ * Compute Kcrypt given the initial session key and the plaintext seqnum.
++ * Set the key of cipher kctx->enc.
++ */
++int
++krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
++		       s32 seqnum)
++{
++	struct crypto_hash *hmac;
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
++	u8 zeroconstant[4] = {0};
++	u8 seqnumarray[4];
++	int err, i;
++
++	dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
++
++	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld, allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
++		return PTR_ERR(hmac);
++	}
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err;
++
++	/* Compute intermediate Kcrypt from session key */
++	for (i = 0; i < kctx->gk5e->keylength; i++)
++		Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
++
++	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, zeroconstant, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
++	if (err)
++		goto out_err;
++
++	/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
++	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
++	seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
++	seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
++	seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
++
++	sg_set_buf(sg, seqnumarray, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
++	if (err)
++		goto out_err;
++
++	err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	err = 0;
++
++out_err:
++	crypto_free_hash(hmac);
++	dprintk("%s: returning %d\n", __func__, err);
++	return err;
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c.orig	2010-08-23 11:01:00.390553891 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c	2010-08-23 11:01:00.391564137 -0400
+@@ -0,0 +1,336 @@
++/*
++ * COPYRIGHT (c) 2008
++ * The Regents of the University of Michigan
++ * ALL RIGHTS RESERVED
++ *
++ * Permission is granted to use, copy, create derivative works
++ * and redistribute this software and such derivative works
++ * for any purpose, so long as the name of The University of
++ * Michigan is not used in any advertising or publicity
++ * pertaining to the use of distribution of this software
++ * without specific, written prior authorization.  If the
++ * above copyright notice or any other identification of the
++ * University of Michigan is included in any copy of any
++ * portion of this software, then the disclaimer below must
++ * also be included.
++ *
++ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
++ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
++ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
++ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
++ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
++ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
++ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
++ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
++ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGES.
++ */
++
++/*
++ * Copyright (C) 1998 by the FundsXpress, INC.
++ *
++ * All rights reserved.
++ *
++ * Export of this software from the United States of America may require
++ * a specific license from the United States Government.  It is the
++ * responsibility of any person or organization contemplating export to
++ * obtain such a license before exporting.
++ *
++ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
++ * distribute this software and its documentation for any purpose and
++ * without fee is hereby granted, provided that the above copyright
++ * notice appear in all copies and that both that copyright notice and
++ * this permission notice appear in supporting documentation, and that
++ * the name of FundsXpress. not be used in advertising or publicity pertaining
++ * to distribution of the software without specific, written prior
++ * permission.  FundsXpress makes no representations about the suitability of
++ * this software for any purpose.  It is provided "as is" without express
++ * or implied warranty.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
++ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
++ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
++ */
++
++#include <linux/err.h>
++#include <linux/types.h>
++#include <linux/crypto.h>
++#include <linux/sunrpc/gss_krb5.h>
++#include <linux/sunrpc/xdr.h>
++
++#ifdef RPC_DEBUG
++# define RPCDBG_FACILITY        RPCDBG_AUTH
++#endif
++
++/*
++ * This is the n-fold function as described in rfc3961, sec 5.1
++ * Taken from MIT Kerberos and modified.
++ */
++
++static void krb5_nfold(u32 inbits, const u8 *in,
++		       u32 outbits, u8 *out)
++{
++	int a, b, c, lcm;
++	int byte, i, msbit;
++
++	/* the code below is more readable if I make these bytes
++	   instead of bits */
++
++	inbits >>= 3;
++	outbits >>= 3;
++
++	/* first compute lcm(n,k) */
++
++	a = outbits;
++	b = inbits;
++
++	while (b != 0) {
++		c = b;
++		b = a%b;
++		a = c;
++	}
++
++	lcm = outbits*inbits/a;
++
++	/* now do the real work */
++
++	memset(out, 0, outbits);
++	byte = 0;
++
++	/* this will end up cycling through k lcm(k,n)/k times, which
++	   is correct */
++	for (i = lcm-1; i >= 0; i--) {
++		/* compute the msbit in k which gets added into this byte */
++		msbit = (
++			/* first, start with the msbit in the first,
++			 * unrotated byte */
++			 ((inbits << 3) - 1)
++			 /* then, for each byte, shift to the right
++			  * for each repetition */
++			 + (((inbits << 3) + 13) * (i/inbits))
++			 /* last, pick out the correct byte within
++			  * that shifted repetition */
++			 + ((inbits - (i % inbits)) << 3)
++			 ) % (inbits << 3);
++
++		/* pull out the byte value itself */
++		byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
++				  (in[((inbits) - (msbit >> 3)) % inbits]))
++				 >> ((msbit & 7) + 1)) & 0xff;
++
++		/* do the addition */
++		byte += out[i % outbits];
++		out[i % outbits] = byte & 0xff;
++
++		/* keep around the carry bit, if any */
++		byte >>= 8;
++
++	}
++
++	/* if there's a carry bit left over, add it back in */
++	if (byte) {
++		for (i = outbits - 1; i >= 0; i--) {
++			/* do the addition */
++			byte += out[i];
++			out[i] = byte & 0xff;
++
++			/* keep around the carry bit, if any */
++			byte >>= 8;
++		}
++	}
++}
++
++/*
++ * This is the DK (derive_key) function as described in rfc3961, sec 5.1
++ * Taken from MIT Kerberos and modified.
++ */
++
++u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
++		    const struct xdr_netobj *inkey,
++		    struct xdr_netobj *outkey,
++		    const struct xdr_netobj *in_constant,
++		    gfp_t gfp_mask)
++{
++	size_t blocksize, keybytes, keylength, n;
++	unsigned char *inblockdata, *outblockdata, *rawkey;
++	struct xdr_netobj inblock, outblock;
++	struct crypto_blkcipher *cipher;
++	u32 ret = EINVAL;
++
++	blocksize = gk5e->blocksize;
++	keybytes = gk5e->keybytes;
++	keylength = gk5e->keylength;
++
++	if ((inkey->len != keylength) || (outkey->len != keylength))
++		goto err_return;
++
++	cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		goto err_return;
++	if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
++		goto err_return;
++
++	/* allocate and set up buffers */
++
++	ret = ENOMEM;
++	inblockdata = kmalloc(blocksize, gfp_mask);
++	if (inblockdata == NULL)
++		goto err_free_cipher;
++
++	outblockdata = kmalloc(blocksize, gfp_mask);
++	if (outblockdata == NULL)
++		goto err_free_in;
++
++	rawkey = kmalloc(keybytes, gfp_mask);
++	if (rawkey == NULL)
++		goto err_free_out;
++
++	inblock.data = (char *) inblockdata;
++	inblock.len = blocksize;
++
++	outblock.data = (char *) outblockdata;
++	outblock.len = blocksize;
++
++	/* initialize the input block */
++
++	if (in_constant->len == inblock.len) {
++		memcpy(inblock.data, in_constant->data, inblock.len);
++	} else {
++		krb5_nfold(in_constant->len * 8, in_constant->data,
++			   inblock.len * 8, inblock.data);
++	}
++
++	/* loop encrypting the blocks until enough key bytes are generated */
++
++	n = 0;
++	while (n < keybytes) {
++		(*(gk5e->encrypt))(cipher, NULL, inblock.data,
++				   outblock.data, inblock.len);
++
++		if ((keybytes - n) <= outblock.len) {
++			memcpy(rawkey + n, outblock.data, (keybytes - n));
++			break;
++		}
++
++		memcpy(rawkey + n, outblock.data, outblock.len);
++		memcpy(inblock.data, outblock.data, outblock.len);
++		n += outblock.len;
++	}
++
++	/* postprocess the key */
++
++	inblock.data = (char *) rawkey;
++	inblock.len = keybytes;
++
++	BUG_ON(gk5e->mk_key == NULL);
++	ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
++	if (ret) {
++		dprintk("%s: got %d from mk_key function for '%s'\n",
++			__func__, ret, gk5e->encrypt_name);
++		goto err_free_raw;
++	}
++
++	/* clean memory, free resources and exit */
++
++	ret = 0;
++
++err_free_raw:
++	memset(rawkey, 0, keybytes);
++	kfree(rawkey);
++err_free_out:
++	memset(outblockdata, 0, blocksize);
++	kfree(outblockdata);
++err_free_in:
++	memset(inblockdata, 0, blocksize);
++	kfree(inblockdata);
++err_free_cipher:
++	crypto_free_blkcipher(cipher);
++err_return:
++	return ret;
++}
++
++#define smask(step) ((1<<step)-1)
++#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
++#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
++
++static void mit_des_fixup_key_parity(u8 key[8])
++{
++	int i;
++	for (i = 0; i < 8; i++) {
++		key[i] &= 0xfe;
++		key[i] |= 1^parity_char(key[i]);
++	}
++}
++
++/*
++ * This is the des3 key derivation postprocess function
++ */
++u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
++			   struct xdr_netobj *randombits,
++			   struct xdr_netobj *key)
++{
++	int i;
++	u32 ret = EINVAL;
++
++	if (key->len != 24) {
++		dprintk("%s: key->len is %d\n", __func__, key->len);
++		goto err_out;
++	}
++	if (randombits->len != 21) {
++		dprintk("%s: randombits->len is %d\n",
++			__func__, randombits->len);
++		goto err_out;
++	}
++
++	/* take the seven bytes, move them around into the top 7 bits of the
++	   8 key bytes, then compute the parity bits.  Do this three times. */
++
++	for (i = 0; i < 3; i++) {
++		memcpy(key->data + i*8, randombits->data + i*7, 7);
++		key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
++				    ((key->data[i*8+1]&1)<<2) |
++				    ((key->data[i*8+2]&1)<<3) |
++				    ((key->data[i*8+3]&1)<<4) |
++				    ((key->data[i*8+4]&1)<<5) |
++				    ((key->data[i*8+5]&1)<<6) |
++				    ((key->data[i*8+6]&1)<<7));
++
++		mit_des_fixup_key_parity(key->data + i*8);
++	}
++	ret = 0;
++err_out:
++	return ret;
++}
++
++/*
++ * This is the aes key derivation postprocess function
++ */
++u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
++			  struct xdr_netobj *randombits,
++			  struct xdr_netobj *key)
++{
++	u32 ret = EINVAL;
++
++	if (key->len != 16 && key->len != 32) {
++		dprintk("%s: key->len is %d\n", __func__, key->len);
++		goto err_out;
++	}
++	if (randombits->len != 16 && randombits->len != 32) {
++		dprintk("%s: randombits->len is %d\n",
++			__func__, randombits->len);
++		goto err_out;
++	}
++	if (randombits->len != key->len) {
++		dprintk("%s: randombits->len is %d, key->len is %d\n",
++			__func__, randombits->len, key->len);
++		goto err_out;
++	}
++	memcpy(key->data, randombits->data, key->len);
++	ret = 0;
++err_out:
++	return ret;
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c	2010-08-23 11:01:00.392564136 -0400
+@@ -1,7 +1,7 @@
+ /*
+  *  linux/net/sunrpc/gss_krb5_mech.c
+  *
+- *  Copyright (c) 2001 The Regents of the University of Michigan.
++ *  Copyright (c) 2001-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson <andros@umich.edu>
+@@ -48,6 +48,143 @@
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+ #endif
+ 
++static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
++
++static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
++	/*
++	 * DES (All DES enctypes are mapped to the same gss functionality)
++	 */
++	{
++	  .etype = ENCTYPE_DES_CBC_RAW,
++	  .ctype = CKSUMTYPE_RSA_MD5,
++	  .name = "des-cbc-crc",
++	  .encrypt_name = "cbc(des)",
++	  .cksum_name = "md5",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = NULL,
++	  .signalg = SGN_ALG_DES_MAC_MD5,
++	  .sealalg = SEAL_ALG_DES,
++	  .keybytes = 7,
++	  .keylength = 8,
++	  .blocksize = 8,
++	  .conflen = 8,
++	  .cksumlength = 8,
++	  .keyed_cksum = 0,
++	},
++	/*
++	 * RC4-HMAC
++	 */
++	{
++	  .etype = ENCTYPE_ARCFOUR_HMAC,
++	  .ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
++	  .name = "rc4-hmac",
++	  .encrypt_name = "ecb(arc4)",
++	  .cksum_name = "hmac(md5)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = NULL,
++	  .signalg = SGN_ALG_HMAC_MD5,
++	  .sealalg = SEAL_ALG_MICROSOFT_RC4,
++	  .keybytes = 16,
++	  .keylength = 16,
++	  .blocksize = 1,
++	  .conflen = 8,
++	  .cksumlength = 8,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * 3DES
++	 */
++	{
++	  .etype = ENCTYPE_DES3_CBC_RAW,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
++	  .name = "des3-hmac-sha1",
++	  .encrypt_name = "cbc(des3_ede)",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_des3_make_key,
++	  .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
++	  .sealalg = SEAL_ALG_DES3KD,
++	  .keybytes = 21,
++	  .keylength = 24,
++	  .blocksize = 8,
++	  .conflen = 8,
++	  .cksumlength = 20,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * AES128
++	 */
++	{
++	  .etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
++	  .name = "aes128-cts",
++	  .encrypt_name = "cts(cbc(aes))",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_aes_make_key,
++	  .encrypt_v2 = gss_krb5_aes_encrypt,
++	  .decrypt_v2 = gss_krb5_aes_decrypt,
++	  .signalg = -1,
++	  .sealalg = -1,
++	  .keybytes = 16,
++	  .keylength = 16,
++	  .blocksize = 16,
++	  .conflen = 16,
++	  .cksumlength = 12,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * AES256
++	 */
++	{
++	  .etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
++	  .name = "aes256-cts",
++	  .encrypt_name = "cts(cbc(aes))",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_aes_make_key,
++	  .encrypt_v2 = gss_krb5_aes_encrypt,
++	  .decrypt_v2 = gss_krb5_aes_decrypt,
++	  .signalg = -1,
++	  .sealalg = -1,
++	  .keybytes = 32,
++	  .keylength = 32,
++	  .blocksize = 16,
++	  .conflen = 16,
++	  .cksumlength = 12,
++	  .keyed_cksum = 1,
++	},
++};
++
++static const int num_supported_enctypes =
++	ARRAY_SIZE(supported_gss_krb5_enctypes);
++
++static int
++supported_gss_krb5_enctype(int etype)
++{
++	int i;
++	for (i = 0; i < num_supported_enctypes; i++)
++		if (supported_gss_krb5_enctypes[i].etype == etype)
++			return 1;
++	return 0;
++}
++
++static const struct gss_krb5_enctype *
++get_gss_krb5_enctype(int etype)
++{
++	int i;
++	for (i = 0; i < num_supported_enctypes; i++)
++		if (supported_gss_krb5_enctypes[i].etype == etype)
++			return &supported_gss_krb5_enctypes[i];
++	return NULL;
++}
++
+ static const void *
+ simple_get_bytes(const void *p, const void *end, void *res, int len)
+ {
+@@ -78,35 +215,45 @@ simple_get_netobj(const void *p, const v
+ }
+ 
+ static inline const void *
+-get_key(const void *p, const void *end, struct crypto_blkcipher **res)
++get_key(const void *p, const void *end,
++	struct krb5_ctx *ctx, struct crypto_blkcipher **res)
+ {
+ 	struct xdr_netobj	key;
+ 	int			alg;
+-	char			*alg_name;
+ 
+ 	p = simple_get_bytes(p, end, &alg, sizeof(alg));
+ 	if (IS_ERR(p))
+ 		goto out_err;
++
++	switch (alg) {
++	case ENCTYPE_DES_CBC_CRC:
++	case ENCTYPE_DES_CBC_MD4:
++	case ENCTYPE_DES_CBC_MD5:
++		/* Map all these key types to ENCTYPE_DES_CBC_RAW */
++		alg = ENCTYPE_DES_CBC_RAW;
++		break;
++	}
++
++	if (!supported_gss_krb5_enctype(alg)) {
++		printk(KERN_WARNING "gss_kerberos_mech: unsupported "
++			"encryption key algorithm %d\n", alg);
++		goto out_err;
++	}
+ 	p = simple_get_netobj(p, end, &key);
+ 	if (IS_ERR(p))
+ 		goto out_err;
+ 
+-	switch (alg) {
+-		case ENCTYPE_DES_CBC_RAW:
+-			alg_name = "cbc(des)";
+-			break;
+-		default:
+-			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
+-			goto out_err_free_key;
+-	}
+-	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
++	*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++							CRYPTO_ALG_ASYNC);
+ 	if (IS_ERR(*res)) {
+-		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
++		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
++			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+ 		*res = NULL;
+ 		goto out_err_free_key;
+ 	}
+ 	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
+-		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
++		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
++			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+ 		goto out_err_free_tfm;
+ 	}
+ 
+@@ -123,56 +270,55 @@ out_err:
+ }
+ 
+ static int
+-gss_import_sec_context_kerberos(const void *p,
+-				size_t len,
+-				struct gss_ctx *ctx_id)
++gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
+ {
+-	const void *end = (const void *)((const char *)p + len);
+-	struct	krb5_ctx *ctx;
+ 	int tmp;
+ 
+-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) {
+-		p = ERR_PTR(-ENOMEM);
+-		goto out_err;
+-	}
+-
+ 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
++
++	/* Old format supports only DES!  Any other enctype uses new format */
++	ctx->enctype = ENCTYPE_DES_CBC_RAW;
++
++	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
++	if (ctx->gk5e == NULL)
++		goto out_err;
++
+ 	/* The downcall format was designed before we completely understood
+ 	 * the uses of the context fields; so it includes some stuff we
+ 	 * just give some minimal sanity-checking, and some we ignore
+ 	 * completely (like the next twenty bytes): */
+ 	if (unlikely(p + 20 > end || p + 20 < p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p += 20;
+ 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	if (tmp != SGN_ALG_DES_MAC_MD5) {
+ 		p = ERR_PTR(-ENOSYS);
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	}
+ 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	if (tmp != SEAL_ALG_DES) {
+ 		p = ERR_PTR(-ENOSYS);
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	}
+ 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p = simple_get_netobj(p, end, &ctx->mech_used);
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
+-	p = get_key(p, end, &ctx->enc);
++		goto out_err;
++	p = get_key(p, end, ctx, &ctx->enc);
+ 	if (IS_ERR(p))
+ 		goto out_err_free_mech;
+-	p = get_key(p, end, &ctx->seq);
++	p = get_key(p, end, ctx, &ctx->seq);
+ 	if (IS_ERR(p))
+ 		goto out_err_free_key1;
+ 	if (p != end) {
+@@ -180,9 +326,6 @@ gss_import_sec_context_kerberos(const vo
+ 		goto out_err_free_key2;
+ 	}
+ 
+-	ctx_id->internal_ctx_id = ctx;
+-
+-	dprintk("RPC:       Successfully imported new context.\n");
+ 	return 0;
+ 
+ out_err_free_key2:
+@@ -191,18 +334,378 @@ out_err_free_key1:
+ 	crypto_free_blkcipher(ctx->enc);
+ out_err_free_mech:
+ 	kfree(ctx->mech_used.data);
+-out_err_free_ctx:
+-	kfree(ctx);
+ out_err:
+ 	return PTR_ERR(p);
+ }
+ 
++struct crypto_blkcipher *
++context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
++{
++	struct crypto_blkcipher *cp;
++
++	cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cp)) {
++		dprintk("gss_kerberos_mech: unable to initialize "
++			"crypto algorithm %s\n", cname);
++		return NULL;
++	}
++	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
++		dprintk("gss_kerberos_mech: error setting key for "
++			"crypto algorithm %s\n", cname);
++		crypto_free_blkcipher(cp);
++		return NULL;
++	}
++	return cp;
++}
++
++static inline void
++set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
++{
++	cdata[0] = (usage>>24)&0xff;
++	cdata[1] = (usage>>16)&0xff;
++	cdata[2] = (usage>>8)&0xff;
++	cdata[3] = usage&0xff;
++	cdata[4] = seed;
++}
++
++static int
++context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
++{
++	struct xdr_netobj c, keyin, keyout;
++	u8 cdata[GSS_KRB5_K5CLENGTH];
++	u32 err;
++
++	c.len = GSS_KRB5_K5CLENGTH;
++	c.data = cdata;
++
++	keyin.data = ctx->Ksess;
++	keyin.len = ctx->gk5e->keylength;
++	keyout.len = ctx->gk5e->keylength;
++
++	/* seq uses the raw key */
++	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
++					   ctx->Ksess);
++	if (ctx->seq == NULL)
++		goto out_err;
++
++	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
++					   ctx->Ksess);
++	if (ctx->enc == NULL)
++		goto out_free_seq;
++
++	/* derive cksum */
++	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->cksum;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving cksum key\n",
++			__func__, err);
++		goto out_free_enc;
++	}
++
++	return 0;
++
++out_free_enc:
++	crypto_free_blkcipher(ctx->enc);
++out_free_seq:
++	crypto_free_blkcipher(ctx->seq);
++out_err:
++	return -EINVAL;
++}
++
++/*
++ * Note that RC4 depends on deriving keys using the sequence
++ * number or the checksum of a token.  Therefore, the final keys
++ * cannot be calculated until the token is being constructed!
++ */
++static int
++context_derive_keys_rc4(struct krb5_ctx *ctx)
++{
++	struct crypto_hash *hmac;
++	char sigkeyconstant[] = "signaturekey";
++	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	int err;
++
++	dprintk("RPC:       %s: entered\n", __func__);
++	/*
++	 * derive cksum (aka Ksign) key
++	 */
++	hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
++		err = PTR_ERR(hmac);
++		goto out_err;
++	}
++
++	err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
++	if (err)
++		goto out_err_free_hmac;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, sigkeyconstant, slen);
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err_free_hmac;
++
++	err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
++	if (err)
++		goto out_err_free_hmac;
++	/*
++	 * allocate hash, and blkciphers for data and seqnum encryption
++	 */
++	ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++					  CRYPTO_ALG_ASYNC);
++	if (IS_ERR(ctx->enc)) {
++		err = PTR_ERR(ctx->enc);
++		goto out_err_free_hmac;
++	}
++
++	ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++					  CRYPTO_ALG_ASYNC);
++	if (IS_ERR(ctx->seq)) {
++		crypto_free_blkcipher(ctx->enc);
++		err = PTR_ERR(ctx->seq);
++		goto out_err_free_hmac;
++	}
++
++	dprintk("RPC:       %s: returning success\n", __func__);
++
++	err = 0;
++
++out_err_free_hmac:
++	crypto_free_hash(hmac);
++out_err:
++	dprintk("RPC:       %s: returning %d\n", __func__, err);
++	return err;
++}
++
++static int
++context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
++{
++	struct xdr_netobj c, keyin, keyout;
++	u8 cdata[GSS_KRB5_K5CLENGTH];
++	u32 err;
++
++	c.len = GSS_KRB5_K5CLENGTH;
++	c.data = cdata;
++
++	keyin.data = ctx->Ksess;
++	keyin.len = ctx->gk5e->keylength;
++	keyout.len = ctx->gk5e->keylength;
++
++	/* initiator seal encryption */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
++	keyout.data = ctx->initiator_seal;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_seal key\n",
++			__func__, err);
++		goto out_err;
++	}
++	ctx->initiator_enc = context_v2_alloc_cipher(ctx,
++						     ctx->gk5e->encrypt_name,
++						     ctx->initiator_seal);
++	if (ctx->initiator_enc == NULL)
++		goto out_err;
++
++	/* acceptor seal encryption */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
++	keyout.data = ctx->acceptor_seal;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_seal key\n",
++			__func__, err);
++		goto out_free_initiator_enc;
++	}
++	ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
++						    ctx->gk5e->encrypt_name,
++						    ctx->acceptor_seal);
++	if (ctx->acceptor_enc == NULL)
++		goto out_free_initiator_enc;
++
++	/* initiator sign checksum */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->initiator_sign;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_sign key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* acceptor sign checksum */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->acceptor_sign;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_sign key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* initiator seal integrity */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
++	keyout.data = ctx->initiator_integ;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_integ key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* acceptor seal integrity */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
++	keyout.data = ctx->acceptor_integ;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_integ key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	switch (ctx->enctype) {
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		ctx->initiator_enc_aux =
++			context_v2_alloc_cipher(ctx, "cbc(aes)",
++						ctx->initiator_seal);
++		if (ctx->initiator_enc_aux == NULL)
++			goto out_free_acceptor_enc;
++		ctx->acceptor_enc_aux =
++			context_v2_alloc_cipher(ctx, "cbc(aes)",
++						ctx->acceptor_seal);
++		if (ctx->acceptor_enc_aux == NULL) {
++			crypto_free_blkcipher(ctx->initiator_enc_aux);
++			goto out_free_acceptor_enc;
++		}
++	}
++
++	return 0;
++
++out_free_acceptor_enc:
++	crypto_free_blkcipher(ctx->acceptor_enc);
++out_free_initiator_enc:
++	crypto_free_blkcipher(ctx->initiator_enc);
++out_err:
++	return -EINVAL;
++}
++
++static int
++gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
++		gfp_t gfp_mask)
++{
++	int keylen;
++
++	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
++	if (IS_ERR(p))
++		goto out_err;
++	ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
++
++	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
++	if (IS_ERR(p))
++		goto out_err;
++	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
++	if (IS_ERR(p))
++		goto out_err;
++	/* set seq_send for use by "older" enctypes */
++	ctx->seq_send = ctx->seq_send64;
++	if (ctx->seq_send64 != ctx->seq_send) {
++		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
++			(long unsigned)ctx->seq_send64, ctx->seq_send);
++		goto out_err;
++	}
++	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
++	if (IS_ERR(p))
++		goto out_err;
++	/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
++	if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
++		ctx->enctype = ENCTYPE_DES3_CBC_RAW;
++	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
++	if (ctx->gk5e == NULL) {
++		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
++			ctx->enctype);
++		p = ERR_PTR(-EINVAL);
++		goto out_err;
++	}
++	keylen = ctx->gk5e->keylength;
++
++	p = simple_get_bytes(p, end, ctx->Ksess, keylen);
++	if (IS_ERR(p))
++		goto out_err;
++
++	if (p != end) {
++		p = ERR_PTR(-EINVAL);
++		goto out_err;
++	}
++
++	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
++				      gss_kerberos_mech.gm_oid.len, gfp_mask);
++	if (unlikely(ctx->mech_used.data == NULL)) {
++		p = ERR_PTR(-ENOMEM);
++		goto out_err;
++	}
++	ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
++
++	switch (ctx->enctype) {
++	case ENCTYPE_DES3_CBC_RAW:
++		return context_derive_keys_des3(ctx, gfp_mask);
++	case ENCTYPE_ARCFOUR_HMAC:
++		return context_derive_keys_rc4(ctx);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return context_derive_keys_new(ctx, gfp_mask);
++	default:
++		return -EINVAL;
++	}
++
++out_err:
++	return PTR_ERR(p);
++}
++
++static int
++gss_import_sec_context_kerberos(const void *p, size_t len,
++				struct gss_ctx *ctx_id,
++				gfp_t gfp_mask)
++{
++	const void *end = (const void *)((const char *)p + len);
++	struct  krb5_ctx *ctx;
++	int ret;
++
++	ctx = kzalloc(sizeof(*ctx), gfp_mask);
++	if (ctx == NULL)
++		return -ENOMEM;
++
++	if (len == 85)
++		ret = gss_import_v1_context(p, end, ctx);
++	else
++		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
++
++	if (ret == 0)
++		ctx_id->internal_ctx_id = ctx;
++	else
++		kfree(ctx);
++
++	dprintk("RPC:       %s: returning %d\n", __func__, ret);
++	return ret;
++}
++
+ static void
+ gss_delete_sec_context_kerberos(void *internal_ctx) {
+ 	struct krb5_ctx *kctx = internal_ctx;
+ 
+ 	crypto_free_blkcipher(kctx->seq);
+ 	crypto_free_blkcipher(kctx->enc);
++	crypto_free_blkcipher(kctx->acceptor_enc);
++	crypto_free_blkcipher(kctx->initiator_enc);
++	crypto_free_blkcipher(kctx->acceptor_enc_aux);
++	crypto_free_blkcipher(kctx->initiator_enc_aux);
+ 	kfree(kctx->mech_used.data);
+ 	kfree(kctx);
+ }
+@@ -241,6 +744,7 @@ static struct gss_api_mech gss_kerberos_
+ 	.gm_ops		= &gss_kerberos_ops,
+ 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
+ 	.gm_pfs		= gss_kerberos_pfs,
++	.gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ",
+ };
+ 
+ static int __init init_kerberos_module(void)
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c	2010-08-23 11:01:00.392564136 -0400
+@@ -3,7 +3,7 @@
+  *
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson	<andros@umich.edu>
+@@ -70,53 +70,154 @@
+ 
+ DEFINE_SPINLOCK(krb5_seq_lock);
+ 
+-u32
+-gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
++static char *
++setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
++{
++	__be16 *ptr, *krb5_hdr;
++	int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
++
++	token->len = g_token_size(&ctx->mech_used, body_size);
++
++	ptr = (__be16 *)token->data;
++	g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
++
++	/* ptr now at start of header described in rfc 1964, section 1.2.1: */
++	krb5_hdr = ptr;
++	*ptr++ = KG_TOK_MIC_MSG;
++	*ptr++ = cpu_to_le16(ctx->gk5e->signalg);
++	*ptr++ = SEAL_ALG_NONE;
++	*ptr++ = 0xffff;
++
++	return (char *)krb5_hdr;
++}
++
++static void *
++setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
++{
++	__be16 *ptr, *krb5_hdr;
++	u8 *p, flags = 0x00;
++
++	if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
++		flags |= 0x01;
++	if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
++		flags |= 0x04;
++
++	/* Per rfc 4121, sec 4.2.6.1, there is no header,
++	 * just start the token */
++	krb5_hdr = ptr = (__be16 *)token->data;
++
++	*ptr++ = KG2_TOK_MIC;
++	p = (u8 *)ptr;
++	*p++ = flags;
++	*p++ = 0xff;
++	ptr = (__be16 *)p;
++	*ptr++ = 0xffff;
++	*ptr++ = 0xffff;
++
++	token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
++	return krb5_hdr;
++}
++
++static u32
++gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
+ 		struct xdr_netobj *token)
+ {
+-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+-	unsigned char		*ptr, *msg_start;
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
++	void			*ptr;
+ 	s32			now;
+ 	u32			seq_send;
++	u8			*cksumkey;
+ 
+-	dprintk("RPC:       gss_krb5_seal\n");
++	dprintk("RPC:       %s\n", __func__);
+ 	BUG_ON(ctx == NULL);
+ 
+ 	now = get_seconds();
+ 
+-	token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
++	ptr = setup_token(ctx, token);
+ 
+-	ptr = token->data;
+-	g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
++	if (ctx->gk5e->keyed_cksum)
++		cksumkey = ctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+-	/* ptr now at header described in rfc 1964, section 1.2.1: */
+-	ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
+-	ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
++	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
++			  KG_USAGE_SIGN, &md5cksum))
++		return GSS_S_FAILURE;
+ 
+-	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
++	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+ 
+-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+-	memset(ptr + 4, 0xff, 4);
++	spin_lock(&krb5_seq_lock);
++	seq_send = ctx->seq_send++;
++	spin_unlock(&krb5_seq_lock);
+ 
+-	if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
++	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
++			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
+ 		return GSS_S_FAILURE;
+ 
+-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+-			  md5cksum.data, md5cksum.len))
+-		return GSS_S_FAILURE;
++	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
++}
++
++u32
++gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
++		struct xdr_netobj *token)
++{
++	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
++				       .data = cksumdata};
++	void *krb5_hdr;
++	s32 now;
++	u64 seq_send;
++	u8 *cksumkey;
++	unsigned int cksum_usage;
++
++	dprintk("RPC:       %s\n", __func__);
+ 
+-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
++	krb5_hdr = setup_token_v2(ctx, token);
+ 
++	/* Set up the sequence number. Now 64-bits in clear
++	 * text and w/o direction indicator */
+ 	spin_lock(&krb5_seq_lock);
+-	seq_send = ctx->seq_send++;
++	seq_send = ctx->seq_send64++;
+ 	spin_unlock(&krb5_seq_lock);
++	*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+ 
+-	if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
+-			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
+-			      ptr + 8))
++	if (ctx->initiate) {
++		cksumkey = ctx->initiator_sign;
++		cksum_usage = KG_USAGE_INITIATOR_SIGN;
++	} else {
++		cksumkey = ctx->acceptor_sign;
++		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
++	}
++
++	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
++			     text, 0, cksumkey, cksum_usage, &cksumobj))
+ 		return GSS_S_FAILURE;
+ 
++	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
++
++	now = get_seconds();
++
+ 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+ }
++
++u32
++gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
++		     struct xdr_netobj *token)
++{
++	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
++
++	switch (ctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_get_mic_v1(ctx, text, token);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_get_mic_v2(ctx, text, token);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c	2010-08-23 11:01:00.393496180 -0400
+@@ -39,14 +39,51 @@
+ # define RPCDBG_FACILITY        RPCDBG_AUTH
+ #endif
+ 
++static s32
++krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
++		      unsigned char *cksum, unsigned char *buf)
++{
++	struct crypto_blkcipher *cipher;
++	unsigned char plain[8];
++	s32 code;
++
++	dprintk("RPC:       %s:\n", __func__);
++	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		return PTR_ERR(cipher);
++
++	plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
++	plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
++	plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
++	plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
++	plain[4] = direction;
++	plain[5] = direction;
++	plain[6] = direction;
++	plain[7] = direction;
++
++	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
++	if (code)
++		goto out;
++
++	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
++out:
++	crypto_free_blkcipher(cipher);
++	return code;
++}
+ s32
+-krb5_make_seq_num(struct crypto_blkcipher *key,
++krb5_make_seq_num(struct krb5_ctx *kctx,
++		struct crypto_blkcipher *key,
+ 		int direction,
+ 		u32 seqnum,
+ 		unsigned char *cksum, unsigned char *buf)
+ {
+ 	unsigned char plain[8];
+ 
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
++		return krb5_make_rc4_seq_num(kctx, direction, seqnum,
++					     cksum, buf);
++
+ 	plain[0] = (unsigned char) (seqnum & 0xff);
+ 	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
+ 	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
+@@ -60,17 +97,59 @@ krb5_make_seq_num(struct crypto_blkciphe
+ 	return krb5_encrypt(key, cksum, plain, buf, 8);
+ }
+ 
++static s32
++krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
++		     unsigned char *buf, int *direction, s32 *seqnum)
++{
++	struct crypto_blkcipher *cipher;
++	unsigned char plain[8];
++	s32 code;
++
++	dprintk("RPC:       %s:\n", __func__);
++	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		return PTR_ERR(cipher);
++
++	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
++	if (code)
++		goto out;
++
++	code = krb5_decrypt(cipher, cksum, buf, plain, 8);
++	if (code)
++		goto out;
++
++	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
++				   || (plain[4] != plain[7])) {
++		code = (s32)KG_BAD_SEQ;
++		goto out;
++	}
++
++	*direction = plain[4];
++
++	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
++					(plain[2] << 8) | (plain[3]));
++out:
++	crypto_free_blkcipher(cipher);
++	return code;
++}
++
+ s32
+-krb5_get_seq_num(struct crypto_blkcipher *key,
++krb5_get_seq_num(struct krb5_ctx *kctx,
+ 	       unsigned char *cksum,
+ 	       unsigned char *buf,
+ 	       int *direction, u32 *seqnum)
+ {
+ 	s32 code;
+ 	unsigned char plain[8];
++	struct crypto_blkcipher *key = kctx->seq;
+ 
+ 	dprintk("RPC:       krb5_get_seq_num:\n");
+ 
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
++		return krb5_get_rc4_seq_num(kctx, cksum, buf,
++					    direction, seqnum);
++
+ 	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
+ 		return code;
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c	2010-08-23 11:01:00.393496180 -0400
+@@ -3,7 +3,7 @@
+  *
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -70,20 +70,21 @@
+ /* read_token is a mic token, and message_buffer is the data that the mic was
+  * supposedly taken over. */
+ 
+-u32
+-gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
++static u32
++gss_verify_mic_v1(struct krb5_ctx *ctx,
+ 		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+ {
+-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+ 	int			signalg;
+ 	int			sealalg;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	s32			now;
+ 	int			direction;
+ 	u32			seqnum;
+ 	unsigned char		*ptr = (unsigned char *)read_token->data;
+ 	int			bodysize;
++	u8			*cksumkey;
+ 
+ 	dprintk("RPC:       krb5_read_token\n");
+ 
+@@ -98,7 +99,7 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 	/* XXX sanity-check bodysize?? */
+ 
+ 	signalg = ptr[2] + (ptr[3] << 8);
+-	if (signalg != SGN_ALG_DES_MAC_MD5)
++	if (signalg != ctx->gk5e->signalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	sealalg = ptr[4] + (ptr[5] << 8);
+@@ -108,13 +109,17 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+-	if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
+-		return GSS_S_FAILURE;
++	if (ctx->gk5e->keyed_cksum)
++		cksumkey = ctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
++	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
++			  cksumkey, KG_USAGE_SIGN, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 
+-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
++	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++					ctx->gk5e->cksumlength))
+ 		return GSS_S_BAD_SIG;
+ 
+ 	/* it got through unscathed.  Make sure the context is unexpired */
+@@ -126,7 +131,8 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 
+ 	/* do sequencing checks */
+ 
+-	if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
++	if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
++			     &direction, &seqnum))
+ 		return GSS_S_FAILURE;
+ 
+ 	if ((ctx->initiate && direction != 0xff) ||
+@@ -135,3 +141,86 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 
+ 	return GSS_S_COMPLETE;
+ }
++
++static u32
++gss_verify_mic_v2(struct krb5_ctx *ctx,
++		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
++{
++	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
++				      .data = cksumdata};
++	s32 now;
++	u64 seqnum;
++	u8 *ptr = read_token->data;
++	u8 *cksumkey;
++	u8 flags;
++	int i;
++	unsigned int cksum_usage;
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	flags = ptr[2];
++	if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
++	    (ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
++		return GSS_S_BAD_SIG;
++
++	if (flags & KG2_TOKEN_FLAG_SEALED) {
++		dprintk("%s: token has unexpected sealed flag\n", __func__);
++		return GSS_S_FAILURE;
++	}
++
++	for (i = 3; i < 8; i++)
++		if (ptr[i] != 0xff)
++			return GSS_S_DEFECTIVE_TOKEN;
++
++	if (ctx->initiate) {
++		cksumkey = ctx->acceptor_sign;
++		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
++	} else {
++		cksumkey = ctx->initiator_sign;
++		cksum_usage = KG_USAGE_INITIATOR_SIGN;
++	}
++
++	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
++			     cksumkey, cksum_usage, &cksumobj))
++		return GSS_S_FAILURE;
++
++	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++				ctx->gk5e->cksumlength))
++		return GSS_S_BAD_SIG;
++
++	/* it got through unscathed.  Make sure the context is unexpired */
++	now = get_seconds();
++	if (now > ctx->endtime)
++		return GSS_S_CONTEXT_EXPIRED;
++
++	/* do sequencing checks */
++
++	seqnum = be64_to_cpup((__be64 *)ptr + 8);
++
++	return GSS_S_COMPLETE;
++}
++
++u32
++gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
++			struct xdr_buf *message_buffer,
++			struct xdr_netobj *read_token)
++{
++	struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
++
++	switch (ctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_verify_mic_v1(ctx, message_buffer, read_token);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_verify_mic_v2(ctx, message_buffer, read_token);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c	2010-08-23 11:01:00.394576083 -0400
+@@ -1,3 +1,33 @@
++/*
++ * COPYRIGHT (c) 2008
++ * The Regents of the University of Michigan
++ * ALL RIGHTS RESERVED
++ *
++ * Permission is granted to use, copy, create derivative works
++ * and redistribute this software and such derivative works
++ * for any purpose, so long as the name of The University of
++ * Michigan is not used in any advertising or publicity
++ * pertaining to the use of distribution of this software
++ * without specific, written prior authorization.  If the
++ * above copyright notice or any other identification of the
++ * University of Michigan is included in any copy of any
++ * portion of this software, then the disclaimer below must
++ * also be included.
++ *
++ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
++ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
++ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
++ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
++ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
++ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
++ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
++ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
++ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGES.
++ */
++
+ #include <linux/types.h>
+ #include <linux/jiffies.h>
+ #include <linux/sunrpc/gss_krb5.h>
+@@ -12,10 +42,7 @@
+ static inline int
+ gss_krb5_padding(int blocksize, int length)
+ {
+-	/* Most of the code is block-size independent but currently we
+-	 * use only 8: */
+-	BUG_ON(blocksize != 8);
+-	return 8 - (length & 7);
++	return blocksize - (length % blocksize);
+ }
+ 
+ static inline void
+@@ -86,8 +113,8 @@ out:
+ 	return 0;
+ }
+ 
+-static void
+-make_confounder(char *p, u32 conflen)
++void
++gss_krb5_make_confounder(char *p, u32 conflen)
+ {
+ 	static u64 i = 0;
+ 	u64 *q = (u64 *)p;
+@@ -127,69 +154,73 @@ make_confounder(char *p, u32 conflen)
+ 
+ /* XXX factor out common code with seal/unseal. */
+ 
+-u32
+-gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
++static u32
++gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
+ 		struct xdr_buf *buf, struct page **pages)
+ {
+-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	int			blocksize = 0, plainlen;
+ 	unsigned char		*ptr, *msg_start;
+ 	s32			now;
+ 	int			headlen;
+ 	struct page		**tmp_pages;
+ 	u32			seq_send;
++	u8			*cksumkey;
++	u32			conflen = kctx->gk5e->conflen;
+ 
+-	dprintk("RPC:       gss_wrap_kerberos\n");
++	dprintk("RPC:       %s\n", __func__);
+ 
+ 	now = get_seconds();
+ 
+ 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+ 	gss_krb5_add_padding(buf, offset, blocksize);
+ 	BUG_ON((buf->len - offset) % blocksize);
+-	plainlen = blocksize + buf->len - offset;
++	plainlen = conflen + buf->len - offset;
+ 
+-	headlen = g_token_size(&kctx->mech_used, 24 + plainlen) -
+-						(buf->len - offset);
++	headlen = g_token_size(&kctx->mech_used,
++		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
++		(buf->len - offset);
+ 
+ 	ptr = buf->head[0].iov_base + offset;
+ 	/* shift data to make room for header. */
++	xdr_extend_head(buf, offset, headlen);
++
+ 	/* XXX Would be cleverer to encrypt while copying. */
+-	/* XXX bounds checking, slack, etc. */
+-	memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
+-	buf->head[0].iov_len += headlen;
+-	buf->len += headlen;
+ 	BUG_ON((buf->len - offset - headlen) % blocksize);
+ 
+ 	g_make_token_header(&kctx->mech_used,
+-				GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
++				GSS_KRB5_TOK_HDR_LEN +
++				kctx->gk5e->cksumlength + plainlen, &ptr);
+ 
+ 
+ 	/* ptr now at header described in rfc 1964, section 1.2.1: */
+ 	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
+ 	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
+ 
+-	msg_start = ptr + 24;
++	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
+ 
+-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
++	*(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
+ 	memset(ptr + 4, 0xff, 4);
+-	*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
++	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
+ 
+-	make_confounder(msg_start, blocksize);
++	gss_krb5_make_confounder(msg_start, conflen);
++
++	if (kctx->gk5e->keyed_cksum)
++		cksumkey = kctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+ 	/* XXXJBF: UGH!: */
+ 	tmp_pages = buf->pages;
+ 	buf->pages = pages;
+-	if (make_checksum("md5", ptr, 8, buf,
+-				offset + headlen - blocksize, &md5cksum))
++	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
++					cksumkey, KG_USAGE_SEAL, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 	buf->pages = tmp_pages;
+ 
+-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+-			  md5cksum.data, md5cksum.len))
+-		return GSS_S_FAILURE;
+-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
++	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+ 
+ 	spin_lock(&krb5_seq_lock);
+ 	seq_send = kctx->seq_send++;
+@@ -197,25 +228,42 @@ gss_wrap_kerberos(struct gss_ctx *ctx, i
+ 
+ 	/* XXX would probably be more efficient to compute checksum
+ 	 * and encrypt at the same time: */
+-	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
++	if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
+ 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
+ 		return GSS_S_FAILURE;
+ 
+-	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
+-									pages))
+-		return GSS_S_FAILURE;
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
++		struct crypto_blkcipher *cipher;
++		int err;
++		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++						CRYPTO_ALG_ASYNC);
++		if (IS_ERR(cipher))
++			return GSS_S_FAILURE;
++
++		krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
++
++		err = gss_encrypt_xdr_buf(cipher, buf,
++					  offset + headlen - conflen, pages);
++		crypto_free_blkcipher(cipher);
++		if (err)
++			return GSS_S_FAILURE;
++	} else {
++		if (gss_encrypt_xdr_buf(kctx->enc, buf,
++					offset + headlen - conflen, pages))
++			return GSS_S_FAILURE;
++	}
+ 
+ 	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+ }
+ 
+-u32
+-gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
++static u32
++gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+ {
+-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+ 	int			signalg;
+ 	int			sealalg;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	s32			now;
+ 	int			direction;
+ 	s32			seqnum;
+@@ -224,6 +272,9 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 	void			*data_start, *orig_start;
+ 	int			data_len;
+ 	int			blocksize;
++	u32			conflen = kctx->gk5e->conflen;
++	int			crypt_offset;
++	u8			*cksumkey;
+ 
+ 	dprintk("RPC:       gss_unwrap_kerberos\n");
+ 
+@@ -241,29 +292,65 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 	/* get the sign and seal algorithms */
+ 
+ 	signalg = ptr[2] + (ptr[3] << 8);
+-	if (signalg != SGN_ALG_DES_MAC_MD5)
++	if (signalg != kctx->gk5e->signalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	sealalg = ptr[4] + (ptr[5] << 8);
+-	if (sealalg != SEAL_ALG_DES)
++	if (sealalg != kctx->gk5e->sealalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+-	if (gss_decrypt_xdr_buf(kctx->enc, buf,
+-			ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
+-		return GSS_S_DEFECTIVE_TOKEN;
++	/*
++	 * Data starts after token header and checksum.  ptr points
++	 * to the beginning of the token header
++	 */
++	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
++					(unsigned char *)buf->head[0].iov_base;
++
++	/*
++	 * Need plaintext seqnum to derive encryption key for arcfour-hmac
++	 */
++	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
++			     ptr + 8, &direction, &seqnum))
++		return GSS_S_BAD_SIG;
+ 
+-	if (make_checksum("md5", ptr, 8, buf,
+-		 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+-		return GSS_S_FAILURE;
++	if ((kctx->initiate && direction != 0xff) ||
++	    (!kctx->initiate && direction != 0))
++		return GSS_S_BAD_SIG;
++
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
++		struct crypto_blkcipher *cipher;
++		int err;
++
++		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++						CRYPTO_ALG_ASYNC);
++		if (IS_ERR(cipher))
++			return GSS_S_FAILURE;
++
++		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
++
++		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
++		crypto_free_blkcipher(cipher);
++		if (err)
++			return GSS_S_DEFECTIVE_TOKEN;
++	} else {
++		if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
++			return GSS_S_DEFECTIVE_TOKEN;
++	}
+ 
+-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+-			   md5cksum.data, md5cksum.len))
++	if (kctx->gk5e->keyed_cksum)
++		cksumkey = kctx->cksum;
++	else
++		cksumkey = NULL;
++
++	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
++					cksumkey, KG_USAGE_SEAL, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 
+-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
++	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++						kctx->gk5e->cksumlength))
+ 		return GSS_S_BAD_SIG;
+ 
+ 	/* it got through unscathed.  Make sure the context is unexpired */
+@@ -275,19 +362,12 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 
+ 	/* do sequencing checks */
+ 
+-	if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+-				    &direction, &seqnum))
+-		return GSS_S_BAD_SIG;
+-
+-	if ((kctx->initiate && direction != 0xff) ||
+-	    (!kctx->initiate && direction != 0))
+-		return GSS_S_BAD_SIG;
+-
+ 	/* Copy the data back to the right position.  XXX: Would probably be
+ 	 * better to copy and encrypt at the same time. */
+ 
+ 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+-	data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
++	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
++					conflen;
+ 	orig_start = buf->head[0].iov_base + offset;
+ 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
+ 	memmove(orig_start, data_start, data_len);
+@@ -299,3 +379,209 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 
+ 	return GSS_S_COMPLETE;
+ }
++
++/*
++ * We cannot currently handle tokens with rotated data.  We need a
++ * generalized routine to rotate the data in place.  It is anticipated
++ * that we won't encounter rotated data in the general case.
++ */
++static u32
++rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc)
++{
++	unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN);
++
++	if (realrrc == 0)
++		return 0;
++
++	dprintk("%s: cannot process token with rotated data: "
++		"rrc %u, realrrc %u\n", __func__, rrc, realrrc);
++	return 1;
++}
++
++static u32
++gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, struct page **pages)
++{
++	int		blocksize;
++	u8		*ptr, *plainhdr;
++	s32		now;
++	u8		flags = 0x00;
++	__be16		*be16ptr, ec = 0;
++	__be64		*be64ptr;
++	u32		err;
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (kctx->gk5e->encrypt_v2 == NULL)
++		return GSS_S_FAILURE;
++
++	/* make room for gss token header */
++	if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
++		return GSS_S_FAILURE;
++
++	/* construct gss token header */
++	ptr = plainhdr = buf->head[0].iov_base + offset;
++	*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
++	*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
++
++	if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
++		flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
++	if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
++		flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
++	/* We always do confidentiality in wrap tokens */
++	flags |= KG2_TOKEN_FLAG_SEALED;
++
++	*ptr++ = flags;
++	*ptr++ = 0xff;
++	be16ptr = (__be16 *)ptr;
++
++	blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
++	*be16ptr++ = cpu_to_be16(ec);
++	/* "inner" token header always uses 0 for RRC */
++	*be16ptr++ = cpu_to_be16(0);
++
++	be64ptr = (__be64 *)be16ptr;
++	spin_lock(&krb5_seq_lock);
++	*be64ptr = cpu_to_be64(kctx->seq_send64++);
++	spin_unlock(&krb5_seq_lock);
++
++	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages);
++	if (err)
++		return err;
++
++	now = get_seconds();
++	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
++}
++
++static u32
++gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
++{
++	s32		now;
++	u64		seqnum;
++	u8		*ptr;
++	u8		flags = 0x00;
++	u16		ec, rrc;
++	int		err;
++	u32		headskip, tailskip;
++	u8		decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
++	unsigned int	movelen;
++
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (kctx->gk5e->decrypt_v2 == NULL)
++		return GSS_S_FAILURE;
++
++	ptr = buf->head[0].iov_base + offset;
++
++	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	flags = ptr[2];
++	if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
++	    (kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
++		return GSS_S_BAD_SIG;
++
++	if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
++		dprintk("%s: token missing expected sealed flag\n", __func__);
++		return GSS_S_DEFECTIVE_TOKEN;
++	}
++
++	if (ptr[3] != 0xff)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	ec = be16_to_cpup((__be16 *)(ptr + 4));
++	rrc = be16_to_cpup((__be16 *)(ptr + 6));
++
++	seqnum = be64_to_cpup((__be64 *)(ptr + 8));
++
++	if (rrc != 0) {
++		err = rotate_left(kctx, offset, buf, rrc);
++		if (err)
++			return GSS_S_FAILURE;
++	}
++
++	err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
++					&headskip, &tailskip);
++	if (err)
++		return GSS_S_FAILURE;
++
++	/*
++	 * Retrieve the decrypted gss token header and verify
++	 * it against the original
++	 */
++	err = read_bytes_from_xdr_buf(buf,
++				buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
++				decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
++	if (err) {
++		dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
++		return GSS_S_FAILURE;
++	}
++	if (memcmp(ptr, decrypted_hdr, 6)
++				|| memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
++		dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
++		return GSS_S_FAILURE;
++	}
++
++	/* do sequencing checks */
++
++	/* it got through unscathed.  Make sure the context is unexpired */
++	now = get_seconds();
++	if (now > kctx->endtime)
++		return GSS_S_CONTEXT_EXPIRED;
++
++	/*
++	 * Move the head data back to the right position in xdr_buf.
++	 * We ignore any "ec" data since it might be in the head or
++	 * the tail, and we really don't need to deal with it.
++	 * Note that buf->head[0].iov_len may indicate the available
++	 * head buffer space rather than that actually occupied.
++	 */
++	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
++	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
++	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
++							buf->head[0].iov_len);
++	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
++	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
++	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
++
++	return GSS_S_COMPLETE;
++}
++
++u32
++gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
++		  struct xdr_buf *buf, struct page **pages)
++{
++	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
++
++	switch (kctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
++	}
++}
++
++u32
++gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
++{
++	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
++
++	switch (kctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_unwrap_kerberos_v1(kctx, offset, buf);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_unwrap_kerberos_v2(kctx, offset, buf);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c	2010-08-23 11:01:00.395574706 -0400
+@@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put);
+ int
+ gss_import_sec_context(const void *input_token, size_t bufsize,
+ 		       struct gss_api_mech	*mech,
+-		       struct gss_ctx		**ctx_id)
++		       struct gss_ctx		**ctx_id,
++		       gfp_t gfp_mask)
+ {
+-	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
++	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
+ 		return -ENOMEM;
+ 	(*ctx_id)->mech_type = gss_mech_get(mech);
+ 
+ 	return mech->gm_ops
+-		->gss_import_sec_context(input_token, bufsize, *ctx_id);
++		->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
+ }
+ 
+ /* gss_get_mic: compute a mic over message and return mic_token. */
+@@ -285,6 +286,20 @@ gss_verify_mic(struct gss_ctx		*context_
+ 				 mic_token);
+ }
+ 
++/*
++ * This function is called from both the client and server code.
++ * Each makes guarantees about how much "slack" space is available
++ * for the underlying function in "buf"'s head and tail while
++ * performing the wrap.
++ *
++ * The client and server code allocate RPC_MAX_AUTH_SIZE extra
++ * space in both the head and tail which is available for use by
++ * the wrap function.
++ *
++ * Underlying functions should verify they do not use more than
++ * RPC_MAX_AUTH_SIZE of extra space in either the head or tail
++ * when performing the wrap.
++ */
+ u32
+ gss_wrap(struct gss_ctx	*ctx_id,
+ 	 int		offset,
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c	2010-08-23 11:01:00.396574085 -0400
+@@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const v
+ 
+ static int
+ gss_import_sec_context_spkm3(const void *p, size_t len,
+-				struct gss_ctx *ctx_id)
++				struct gss_ctx *ctx_id,
++				gfp_t gfp_mask)
+ {
+ 	const void *end = (const void *)((const char *)p + len);
+ 	struct	spkm3_ctx *ctx;
+ 	int	version;
+ 
+-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
++	if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
+ 		goto out_err;
+ 
+ 	p = simple_get_bytes(p, end, &version, sizeof(version));
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile	2010-08-23 11:01:00.387574079 -0400
+@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_gener
+ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
+ 
+ rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+-	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
++	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+ 
+ obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c	2010-08-23 11:01:00.396574085 -0400
+@@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail
+ 		len = qword_get(&mesg, buf, mlen);
+ 		if (len < 0)
+ 			goto out;
+-		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
++		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
+ 		if (status)
+ 			goto out;
+ 
+@@ -1315,6 +1315,14 @@ svcauth_gss_wrap_resp_priv(struct svc_rq
+ 	inpages = resbuf->pages;
+ 	/* XXX: Would be better to write some xdr helper functions for
+ 	 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
++
++	/*
++	 * If there is currently tail data, make sure there is
++	 * room for the head, tail, and 2 * RPC_MAX_AUTH_SIZE in
++	 * the page, and move the current tail data such that
++	 * there is RPC_MAX_AUTH_SIZE slack space available in
++	 * both the head and tail.
++	 */
+ 	if (resbuf->tail[0].iov_base) {
+ 		BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base
+ 							+ PAGE_SIZE);
+@@ -1327,6 +1335,13 @@ svcauth_gss_wrap_resp_priv(struct svc_rq
+ 			resbuf->tail[0].iov_len);
+ 		resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
+ 	}
++	/*
++	 * If there is no current tail data, make sure there is
++	 * room for the head data, and 2 * RPC_MAX_AUTH_SIZE in the
++	 * allotted page, and set up tail information such that there
++	 * is RPC_MAX_AUTH_SIZE slack space available in both the
++	 * head and tail.
++	 */
+ 	if (resbuf->tail[0].iov_base == NULL) {
+ 		if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ 			return -ENOMEM;
+diff -up linux-2.6.34.noarch/net/sunrpc/clnt.c.orig linux-2.6.34.noarch/net/sunrpc/clnt.c
+--- linux-2.6.34.noarch/net/sunrpc/clnt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/clnt.c	2010-08-23 11:01:00.397622347 -0400
+@@ -556,26 +556,16 @@ static const struct rpc_call_ops rpc_def
+  */
+ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
+ {
+-	struct rpc_task *task, *ret;
++	struct rpc_task *task;
+ 
+ 	task = rpc_new_task(task_setup_data);
+-	if (task == NULL) {
+-		rpc_release_calldata(task_setup_data->callback_ops,
+-				task_setup_data->callback_data);
+-		ret = ERR_PTR(-ENOMEM);
++	if (IS_ERR(task))
+ 		goto out;
+-	}
+ 
+-	if (task->tk_status != 0) {
+-		ret = ERR_PTR(task->tk_status);
+-		rpc_put_task(task);
+-		goto out;
+-	}
+ 	atomic_inc(&task->tk_count);
+ 	rpc_execute(task);
+-	ret = task;
+ out:
+-	return ret;
++	return task;
+ }
+ EXPORT_SYMBOL_GPL(rpc_run_task);
+ 
+@@ -657,9 +647,8 @@ struct rpc_task *rpc_run_bc_task(struct 
+ 	 * Create an rpc_task to send the data
+ 	 */
+ 	task = rpc_new_task(&task_setup_data);
+-	if (!task) {
++	if (IS_ERR(task)) {
+ 		xprt_free_bc_request(req);
+-		task = ERR_PTR(-ENOMEM);
+ 		goto out;
+ 	}
+ 	task->tk_rqstp = req;
+diff -up linux-2.6.34.noarch/net/sunrpc/sched.c.orig linux-2.6.34.noarch/net/sunrpc/sched.c
+--- linux-2.6.34.noarch/net/sunrpc/sched.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/sched.c	2010-08-23 11:01:00.398564598 -0400
+@@ -25,7 +25,6 @@
+ 
+ #ifdef RPC_DEBUG
+ #define RPCDBG_FACILITY		RPCDBG_SCHED
+-#define RPC_TASK_MAGIC_ID	0xf00baa
+ #endif
+ 
+ /*
+@@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struc
+ {
+ 	static atomic_t rpc_pid;
+ 
+-	task->tk_magic = RPC_TASK_MAGIC_ID;
+ 	task->tk_pid = atomic_inc_return(&rpc_pid);
+ }
+ #else
+@@ -360,9 +358,6 @@ static void __rpc_do_wake_up_task(struct
+ 	dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
+ 			task->tk_pid, jiffies);
+ 
+-#ifdef RPC_DEBUG
+-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+-#endif
+ 	/* Has the task been executed yet? If not, we cannot wake it up! */
+ 	if (!RPC_IS_ACTIVATED(task)) {
+ 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
+@@ -834,7 +829,7 @@ static void rpc_init_task(struct rpc_tas
+ 	}
+ 
+ 	/* starting timestamp */
+-	task->tk_start = jiffies;
++	task->tk_start = ktime_get();
+ 
+ 	dprintk("RPC:       new task initialized, procpid %u\n",
+ 				task_pid_nr(current));
+@@ -856,16 +851,23 @@ struct rpc_task *rpc_new_task(const stru
+ 
+ 	if (task == NULL) {
+ 		task = rpc_alloc_task();
+-		if (task == NULL)
+-			goto out;
++		if (task == NULL) {
++			rpc_release_calldata(setup_data->callback_ops,
++					setup_data->callback_data);
++			return ERR_PTR(-ENOMEM);
++		}
+ 		flags = RPC_TASK_DYNAMIC;
+ 	}
+ 
+ 	rpc_init_task(task, setup_data);
++	if (task->tk_status < 0) {
++		int err = task->tk_status;
++		rpc_put_task(task);
++		return ERR_PTR(err);
++	}
+ 
+ 	task->tk_flags |= flags;
+ 	dprintk("RPC:       allocated task %p\n", task);
+-out:
+ 	return task;
+ }
+ 
+@@ -909,9 +911,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task);
+ 
+ static void rpc_release_task(struct rpc_task *task)
+ {
+-#ifdef RPC_DEBUG
+-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+-#endif
+ 	dprintk("RPC: %5u release task\n", task->tk_pid);
+ 
+ 	if (!list_empty(&task->tk_task)) {
+@@ -923,9 +922,6 @@ static void rpc_release_task(struct rpc_
+ 	}
+ 	BUG_ON (RPC_IS_QUEUED(task));
+ 
+-#ifdef RPC_DEBUG
+-	task->tk_magic = 0;
+-#endif
+ 	/* Wake up anyone who is waiting for task completion */
+ 	rpc_mark_complete_task(task);
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/stats.c.orig linux-2.6.34.noarch/net/sunrpc/stats.c
+--- linux-2.6.34.noarch/net/sunrpc/stats.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/stats.c	2010-08-23 11:01:00.399574225 -0400
+@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *
+ 	struct rpc_rqst *req = task->tk_rqstp;
+ 	struct rpc_iostats *stats;
+ 	struct rpc_iostats *op_metrics;
+-	long rtt, execute, queue;
++	ktime_t delta;
+ 
+ 	if (!task->tk_client || !task->tk_client->cl_metrics || !req)
+ 		return;
+@@ -156,23 +156,16 @@ void rpc_count_iostats(struct rpc_task *
+ 	op_metrics->om_ntrans += req->rq_ntrans;
+ 	op_metrics->om_timeouts += task->tk_timeouts;
+ 
+-	op_metrics->om_bytes_sent += task->tk_bytes_sent;
++	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
+ 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
+ 
+-	queue = (long)req->rq_xtime - task->tk_start;
+-	if (queue < 0)
+-		queue = -queue;
+-	op_metrics->om_queue += queue;
+-
+-	rtt = task->tk_rtt;
+-	if (rtt < 0)
+-		rtt = -rtt;
+-	op_metrics->om_rtt += rtt;
+-
+-	execute = (long)jiffies - task->tk_start;
+-	if (execute < 0)
+-		execute = -execute;
+-	op_metrics->om_execute += execute;
++	delta = ktime_sub(req->rq_xtime, task->tk_start);
++	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
++
++	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
++
++	delta = ktime_sub(ktime_get(), task->tk_start);
++	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+ }
+ 
+ static void _print_name(struct seq_file *seq, unsigned int op,
+@@ -186,8 +179,6 @@ static void _print_name(struct seq_file 
+ 		seq_printf(seq, "\t%12u: ", op);
+ }
+ 
+-#define MILLISECS_PER_JIFFY	(1000 / HZ)
+-
+ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+ {
+ 	struct rpc_iostats *stats = clnt->cl_metrics;
+@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *
+ 				metrics->om_timeouts,
+ 				metrics->om_bytes_sent,
+ 				metrics->om_bytes_recv,
+-				metrics->om_queue * MILLISECS_PER_JIFFY,
+-				metrics->om_rtt * MILLISECS_PER_JIFFY,
+-				metrics->om_execute * MILLISECS_PER_JIFFY);
++				ktime_to_ms(metrics->om_queue),
++				ktime_to_ms(metrics->om_rtt),
++				ktime_to_ms(metrics->om_execute));
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(rpc_print_iostats);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 11:01:00.400574086 -0400
+@@ -762,6 +762,7 @@ int write_bytes_to_xdr_buf(struct xdr_bu
+ 	__write_bytes_to_xdr_buf(&subbuf, obj, len);
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(write_bytes_to_xdr_buf);
+ 
+ int
+ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
+diff -up linux-2.6.34.noarch/net/sunrpc/xprt.c.orig linux-2.6.34.noarch/net/sunrpc/xprt.c
+--- linux-2.6.34.noarch/net/sunrpc/xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprt.c	2010-08-23 11:01:00.401372963 -0400
+@@ -43,6 +43,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/workqueue.h>
+ #include <linux/net.h>
++#include <linux/ktime.h>
+ 
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/metrics.h>
+@@ -62,7 +63,6 @@
+  * Local functions
+  */
+ static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
+-static inline void	do_xprt_reserve(struct rpc_task *);
+ static void	xprt_connect_status(struct rpc_task *task);
+ static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+ 
+@@ -711,12 +711,16 @@ void xprt_connect(struct rpc_task *task)
+ 		if (task->tk_rqstp)
+ 			task->tk_rqstp->rq_bytes_sent = 0;
+ 
+-		task->tk_timeout = xprt->connect_timeout;
++		task->tk_timeout = task->tk_rqstp->rq_timeout;
+ 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
++
++		if (test_bit(XPRT_CLOSING, &xprt->state))
++			return;
++		if (xprt_test_and_set_connecting(xprt))
++			return;
+ 		xprt->stat.connect_start = jiffies;
+ 		xprt->ops->connect(task);
+ 	}
+-	return;
+ }
+ 
+ static void xprt_connect_status(struct rpc_task *task)
+@@ -771,25 +775,19 @@ struct rpc_rqst *xprt_lookup_rqst(struct
+ }
+ EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
+ 
+-/**
+- * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
+- * @task: RPC request that recently completed
+- *
+- */
+-void xprt_update_rtt(struct rpc_task *task)
++static void xprt_update_rtt(struct rpc_task *task)
+ {
+ 	struct rpc_rqst *req = task->tk_rqstp;
+ 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
+ 	unsigned timer = task->tk_msg.rpc_proc->p_timer;
++	long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
+ 
+ 	if (timer) {
+ 		if (req->rq_ntrans == 1)
+-			rpc_update_rtt(rtt, timer,
+-					(long)jiffies - req->rq_xtime);
++			rpc_update_rtt(rtt, timer, m);
+ 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
+ 	}
+ }
+-EXPORT_SYMBOL_GPL(xprt_update_rtt);
+ 
+ /**
+  * xprt_complete_rqst - called when reply processing is complete
+@@ -807,7 +805,9 @@ void xprt_complete_rqst(struct rpc_task 
+ 			task->tk_pid, ntohl(req->rq_xid), copied);
+ 
+ 	xprt->stat.recvs++;
+-	task->tk_rtt = (long)jiffies - req->rq_xtime;
++	req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
++	if (xprt->ops->timer != NULL)
++		xprt_update_rtt(task);
+ 
+ 	list_del_init(&req->rq_list);
+ 	req->rq_private_buf.len = copied;
+@@ -906,7 +906,7 @@ void xprt_transmit(struct rpc_task *task
+ 		return;
+ 
+ 	req->rq_connect_cookie = xprt->connect_cookie;
+-	req->rq_xtime = jiffies;
++	req->rq_xtime = ktime_get();
+ 	status = xprt->ops->send_request(task);
+ 	if (status != 0) {
+ 		task->tk_status = status;
+@@ -935,7 +935,7 @@ void xprt_transmit(struct rpc_task *task
+ 	spin_unlock_bh(&xprt->transport_lock);
+ }
+ 
+-static inline void do_xprt_reserve(struct rpc_task *task)
++static void xprt_alloc_slot(struct rpc_task *task)
+ {
+ 	struct rpc_xprt	*xprt = task->tk_xprt;
+ 
+@@ -955,6 +955,16 @@ static inline void do_xprt_reserve(struc
+ 	rpc_sleep_on(&xprt->backlog, task, NULL);
+ }
+ 
++static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
++{
++	memset(req, 0, sizeof(*req));	/* mark unused */
++
++	spin_lock(&xprt->reserve_lock);
++	list_add(&req->rq_list, &xprt->free);
++	rpc_wake_up_next(&xprt->backlog);
++	spin_unlock(&xprt->reserve_lock);
++}
++
+ /**
+  * xprt_reserve - allocate an RPC request slot
+  * @task: RPC task requesting a slot allocation
+@@ -968,7 +978,7 @@ void xprt_reserve(struct rpc_task *task)
+ 
+ 	task->tk_status = -EIO;
+ 	spin_lock(&xprt->reserve_lock);
+-	do_xprt_reserve(task);
++	xprt_alloc_slot(task);
+ 	spin_unlock(&xprt->reserve_lock);
+ }
+ 
+@@ -1006,14 +1016,10 @@ void xprt_release(struct rpc_task *task)
+ {
+ 	struct rpc_xprt	*xprt;
+ 	struct rpc_rqst	*req;
+-	int is_bc_request;
+ 
+ 	if (!(req = task->tk_rqstp))
+ 		return;
+ 
+-	/* Preallocated backchannel request? */
+-	is_bc_request = bc_prealloc(req);
+-
+ 	xprt = req->rq_xprt;
+ 	rpc_count_iostats(task);
+ 	spin_lock_bh(&xprt->transport_lock);
+@@ -1027,21 +1033,16 @@ void xprt_release(struct rpc_task *task)
+ 		mod_timer(&xprt->timer,
+ 				xprt->last_used + xprt->idle_timeout);
+ 	spin_unlock_bh(&xprt->transport_lock);
+-	if (!bc_prealloc(req))
++	if (req->rq_buffer)
+ 		xprt->ops->buf_free(req->rq_buffer);
+ 	task->tk_rqstp = NULL;
+ 	if (req->rq_release_snd_buf)
+ 		req->rq_release_snd_buf(req);
+ 
+ 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
+-	if (likely(!is_bc_request)) {
+-		memset(req, 0, sizeof(*req));	/* mark unused */
+-
+-		spin_lock(&xprt->reserve_lock);
+-		list_add(&req->rq_list, &xprt->free);
+-		rpc_wake_up_next(&xprt->backlog);
+-		spin_unlock(&xprt->reserve_lock);
+-	} else
++	if (likely(!bc_prealloc(req)))
++		xprt_free_slot(xprt, req);
++	else
+ 		xprt_free_bc_request(req);
+ }
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c	2010-08-23 11:01:00.402563985 -0400
+@@ -305,7 +305,6 @@ xprt_setup_rdma(struct xprt_create *args
+ 	/* 60 second timeout, no retries */
+ 	xprt->timeout = &xprt_rdma_default_timeout;
+ 	xprt->bind_timeout = (60U * HZ);
+-	xprt->connect_timeout = (60U * HZ);
+ 	xprt->reestablish_timeout = (5U * HZ);
+ 	xprt->idle_timeout = (5U * 60 * HZ);
+ 
+@@ -449,21 +448,19 @@ xprt_rdma_connect(struct rpc_task *task)
+ 	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ 
+-	if (!xprt_test_and_set_connecting(xprt)) {
+-		if (r_xprt->rx_ep.rep_connected != 0) {
+-			/* Reconnect */
+-			schedule_delayed_work(&r_xprt->rdma_connect,
+-				xprt->reestablish_timeout);
+-			xprt->reestablish_timeout <<= 1;
+-			if (xprt->reestablish_timeout > (30 * HZ))
+-				xprt->reestablish_timeout = (30 * HZ);
+-			else if (xprt->reestablish_timeout < (5 * HZ))
+-				xprt->reestablish_timeout = (5 * HZ);
+-		} else {
+-			schedule_delayed_work(&r_xprt->rdma_connect, 0);
+-			if (!RPC_IS_ASYNC(task))
+-				flush_scheduled_work();
+-		}
++	if (r_xprt->rx_ep.rep_connected != 0) {
++		/* Reconnect */
++		schedule_delayed_work(&r_xprt->rdma_connect,
++			xprt->reestablish_timeout);
++		xprt->reestablish_timeout <<= 1;
++		if (xprt->reestablish_timeout > (30 * HZ))
++			xprt->reestablish_timeout = (30 * HZ);
++		else if (xprt->reestablish_timeout < (5 * HZ))
++			xprt->reestablish_timeout = (5 * HZ);
++	} else {
++		schedule_delayed_work(&r_xprt->rdma_connect, 0);
++		if (!RPC_IS_ASYNC(task))
++			flush_scheduled_work();
+ 	}
+ }
+ 
+@@ -677,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *
+ 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ 		goto drop_connection;
+ 
+-	task->tk_bytes_sent += rqst->rq_snd_buf.len;
++	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
+ 	rqst->rq_bytes_sent = 0;
+ 	return 0;
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtsock.c.orig linux-2.6.34.noarch/net/sunrpc/xprtsock.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtsock.c.orig	2010-08-23 11:00:23.890501549 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtsock.c	2010-08-23 11:01:00.403564023 -0400
+@@ -138,20 +138,6 @@ static ctl_table sunrpc_table[] = {
+ #endif
+ 
+ /*
+- * Time out for an RPC UDP socket connect.  UDP socket connects are
+- * synchronous, but we set a timeout anyway in case of resource
+- * exhaustion on the local host.
+- */
+-#define XS_UDP_CONN_TO		(5U * HZ)
+-
+-/*
+- * Wait duration for an RPC TCP connection to be established.  Solaris
+- * NFS over TCP uses 60 seconds, for example, which is in line with how
+- * long a server takes to reboot.
+- */
+-#define XS_TCP_CONN_TO		(60U * HZ)
+-
+-/*
+  * Wait duration for a reply from the RPC portmapper.
+  */
+ #define XS_BIND_TO		(60U * HZ)
+@@ -543,7 +529,7 @@ static int xs_udp_send_request(struct rp
+ 			xdr->len - req->rq_bytes_sent, status);
+ 
+ 	if (status >= 0) {
+-		task->tk_bytes_sent += status;
++		req->rq_xmit_bytes_sent += status;
+ 		if (status >= req->rq_slen)
+ 			return 0;
+ 		/* Still some bytes left; set up for a retry later. */
+@@ -639,7 +625,7 @@ static int xs_tcp_send_request(struct rp
+ 		/* If we've sent the entire packet, immediately
+ 		 * reset the count of bytes sent. */
+ 		req->rq_bytes_sent += status;
+-		task->tk_bytes_sent += status;
++		req->rq_xmit_bytes_sent += status;
+ 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ 			req->rq_bytes_sent = 0;
+ 			return 0;
+@@ -859,7 +845,6 @@ static void xs_udp_data_ready(struct soc
+ 	dst_confirm(skb_dst(skb));
+ 
+ 	xprt_adjust_cwnd(task, copied);
+-	xprt_update_rtt(task);
+ 	xprt_complete_rqst(task, copied);
+ 
+  out_unlock:
+@@ -2022,9 +2007,6 @@ static void xs_connect(struct rpc_task *
+ 	struct rpc_xprt *xprt = task->tk_xprt;
+ 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ 
+-	if (xprt_test_and_set_connecting(xprt))
+-		return;
+-
+ 	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
+ 		dprintk("RPC:       xs_connect delayed xprt %p for %lu "
+ 				"seconds\n",
+@@ -2044,16 +2026,6 @@ static void xs_connect(struct rpc_task *
+ 	}
+ }
+ 
+-static void xs_tcp_connect(struct rpc_task *task)
+-{
+-	struct rpc_xprt *xprt = task->tk_xprt;
+-
+-	/* Exit if we need to wait for socket shutdown to complete */
+-	if (test_bit(XPRT_CLOSING, &xprt->state))
+-		return;
+-	xs_connect(task);
+-}
+-
+ /**
+  * xs_udp_print_stats - display UDP socket-specifc stats
+  * @xprt: rpc_xprt struct containing statistics
+@@ -2252,7 +2224,7 @@ static struct rpc_xprt_ops xs_tcp_ops = 
+ 	.release_xprt		= xs_tcp_release_xprt,
+ 	.rpcbind		= rpcb_getport_async,
+ 	.set_port		= xs_set_port,
+-	.connect		= xs_tcp_connect,
++	.connect		= xs_connect,
+ 	.buf_alloc		= rpc_malloc,
+ 	.buf_free		= rpc_free,
+ 	.send_request		= xs_tcp_send_request,
+@@ -2343,7 +2315,6 @@ static struct rpc_xprt *xs_setup_udp(str
+ 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
+ 
+ 	xprt->bind_timeout = XS_BIND_TO;
+-	xprt->connect_timeout = XS_UDP_CONN_TO;
+ 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
+ 	xprt->idle_timeout = XS_IDLE_DISC_TO;
+ 
+@@ -2418,7 +2389,6 @@ static struct rpc_xprt *xs_setup_tcp(str
+ 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+ 
+ 	xprt->bind_timeout = XS_BIND_TO;
+-	xprt->connect_timeout = XS_TCP_CONN_TO;
+ 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ 	xprt->idle_timeout = XS_IDLE_DISC_TO;
+ 
+@@ -2478,9 +2448,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(
+ 	struct sock_xprt *transport;
+ 	struct svc_sock *bc_sock;
+ 
+-	if (!args->bc_xprt)
+-		ERR_PTR(-EINVAL);
+-
+ 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ 	if (IS_ERR(xprt))
+ 		return xprt;
+@@ -2494,7 +2461,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(
+ 	/* backchannel */
+ 	xprt_set_bound(xprt);
+ 	xprt->bind_timeout = 0;
+-	xprt->connect_timeout = 0;
+ 	xprt->reestablish_timeout = 0;
+ 	xprt->idle_timeout = 0;
+ 
diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
new file mode 100644
index 000000000..ef99b4995
--- /dev/null
+++ b/nfsd-35-fc.patch
@@ -0,0 +1,1808 @@
+diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
+@@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
+    | READ                 | REQ        |              | Section 18.22  |
+    | READDIR              | REQ        |              | Section 18.23  |
+    | READLINK             | OPT        |              | Section 18.24  |
+-NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
++   | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
+    | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
+    | REMOVE               | REQ        |              | Section 18.25  |
+    | RENAME               | REQ        |              | Section 18.26  |
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
+@@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
+ 	.alloc		= expkey_alloc,
+ };
+ 
+-static struct svc_expkey *
+-svc_expkey_lookup(struct svc_expkey *item)
++static int
++svc_expkey_hash(struct svc_expkey *item)
+ {
+-	struct cache_head *ch;
+ 	int hash = item->ek_fsidtype;
+ 	char * cp = (char*)item->ek_fsid;
+ 	int len = key_len(item->ek_fsidtype);
+@@ -270,6 +269,14 @@ svc_expkey_lookup(struct svc_expkey *ite
+ 	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+ 	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+ 	hash &= EXPKEY_HASHMASK;
++	return hash;
++}
++
++static struct svc_expkey *
++svc_expkey_lookup(struct svc_expkey *item)
++{
++	struct cache_head *ch;
++	int hash = svc_expkey_hash(item);
+ 
+ 	ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
+ 				 hash);
+@@ -283,13 +290,7 @@ static struct svc_expkey *
+ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
+ {
+ 	struct cache_head *ch;
+-	int hash = new->ek_fsidtype;
+-	char * cp = (char*)new->ek_fsid;
+-	int len = key_len(new->ek_fsidtype);
+-
+-	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+-	hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
+-	hash &= EXPKEY_HASHMASK;
++	int hash = svc_expkey_hash(new);
+ 
+ 	ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
+ 				 &old->h, hash);
+@@ -738,14 +739,22 @@ struct cache_detail svc_export_cache = {
+ 	.alloc		= svc_export_alloc,
+ };
+ 
+-static struct svc_export *
+-svc_export_lookup(struct svc_export *exp)
++static int
++svc_export_hash(struct svc_export *exp)
+ {
+-	struct cache_head *ch;
+ 	int hash;
++
+ 	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
+ 	hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
+ 	hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
++	return hash;
++}
++
++static struct svc_export *
++svc_export_lookup(struct svc_export *exp)
++{
++	struct cache_head *ch;
++	int hash = svc_export_hash(exp);
+ 
+ 	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
+ 				 hash);
+@@ -759,10 +768,7 @@ static struct svc_export *
+ svc_export_update(struct svc_export *new, struct svc_export *old)
+ {
+ 	struct cache_head *ch;
+-	int hash;
+-	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
+-	hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
+-	hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
++	int hash = svc_export_hash(old);
+ 
+ 	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
+ 				 &old->h,
+@@ -1071,9 +1077,9 @@ exp_export(struct nfsctl_export *nxp)
+ 		err = 0;
+ finish:
+ 	kfree(new.ex_pathname);
+-	if (exp)
++	if (!IS_ERR_OR_NULL(exp))
+ 		exp_put(exp);
+-	if (fsid_key && !IS_ERR(fsid_key))
++	if (!IS_ERR_OR_NULL(fsid_key))
+ 		cache_put(&fsid_key->h, &svc_expkey_cache);
+ 	path_put(&path);
+ out_put_clp:
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
+@@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
+ 
+-struct nfs4_rpc_args {
+-	void				*args_op;
+-	struct nfsd4_cb_sequence	args_seq;
+-};
+-
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+ */
+@@ -428,13 +423,19 @@ static struct rpc_procinfo     nfs4_cb_p
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
++/*
++ * Note on the callback rpc program version number: despite language in rfc
++ * 5661 section 18.36.3 requiring servers to use 4 in this field, the
++ * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
++ * in practice that appears to be what implementations use.  The section
++ * 18.36.3 language is expected to be fixed in an erratum.
++ */
+         .number                 = 1,
+         .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
+         .procs                  = nfs4_cb_procedures
+ };
+ 
+ static struct rpc_version *	nfs_cb_version[] = {
+-	NULL,
+ 	&nfs_cb_version4,
+ };
+ 
+@@ -456,15 +457,14 @@ static struct rpc_program cb_program = {
+ 
+ static int max_cb_time(void)
+ {
+-	return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
++	return max(nfsd4_lease/10, (time_t)1) * HZ;
+ }
+ 
+ /* Reference counting, callback cleanup, etc., all look racy as heck.
+- * And why is cb_set an atomic? */
++ * And why is cl_cb_set an atomic? */
+ 
+-int setup_callback_client(struct nfs4_client *clp)
++int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+ {
+-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
+ 	struct rpc_timeout	timeparms = {
+ 		.to_initval	= max_cb_time(),
+ 		.to_retries	= 0,
+@@ -476,7 +476,7 @@ int setup_callback_client(struct nfs4_cl
+ 		.timeout	= &timeparms,
+ 		.program	= &cb_program,
+ 		.prognumber	= cb->cb_prog,
+-		.version	= nfs_cb_version[1]->number,
++		.version	= 0,
+ 		.authflavor	= clp->cl_flavor,
+ 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+ 		.client_name    = clp->cl_principal,
+@@ -486,7 +486,7 @@ int setup_callback_client(struct nfs4_cl
+ 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+ 		return -EINVAL;
+ 	if (cb->cb_minorversion) {
+-		args.bc_xprt = clp->cl_cb_xprt;
++		args.bc_xprt = cb->cb_xprt;
+ 		args.protocol = XPRT_TRANSPORT_BC_TCP;
+ 	}
+ 	/* Create RPC client */
+@@ -496,7 +496,7 @@ int setup_callback_client(struct nfs4_cl
+ 			PTR_ERR(client));
+ 		return PTR_ERR(client);
+ 	}
+-	cb->cb_client = client;
++	nfsd4_set_callback_client(clp, client);
+ 	return 0;
+ 
+ }
+@@ -514,8 +514,7 @@ static void nfsd4_cb_probe_done(struct r
+ 	if (task->tk_status)
+ 		warn_no_callback_path(clp, task->tk_status);
+ 	else
+-		atomic_set(&clp->cl_cb_conn.cb_set, 1);
+-	put_nfs4_client(clp);
++		atomic_set(&clp->cl_cb_set, 1);
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
+@@ -537,7 +536,6 @@ int set_callback_cred(void)
+ 
+ void do_probe_callback(struct nfs4_client *clp)
+ {
+-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
+ 	struct rpc_message msg = {
+ 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+ 		.rpc_argp       = clp,
+@@ -545,34 +543,28 @@ void do_probe_callback(struct nfs4_clien
+ 	};
+ 	int status;
+ 
+-	status = rpc_call_async(cb->cb_client, &msg,
++	status = rpc_call_async(cb->cl_cb_client, &msg,
+ 				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ 				&nfsd4_cb_probe_ops, (void *)clp);
+-	if (status) {
++	if (status)
+ 		warn_no_callback_path(clp, status);
+-		put_nfs4_client(clp);
+-	}
+ }
+ 
+ /*
+  * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+  */
+-void
+-nfsd4_probe_callback(struct nfs4_client *clp)
++void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+ {
+ 	int status;
+ 
+-	BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
++	BUG_ON(atomic_read(&clp->cl_cb_set));
+ 
+-	status = setup_callback_client(clp);
++	status = setup_callback_client(clp, cb);
+ 	if (status) {
+ 		warn_no_callback_path(clp, status);
+ 		return;
+ 	}
+ 
+-	/* the task holds a reference to the nfs4_client struct */
+-	atomic_inc(&clp->cl_count);
+-
+ 	do_probe_callback(clp);
+ }
+ 
+@@ -658,18 +650,32 @@ static void nfsd4_cb_done(struct rpc_tas
+ 	}
+ }
+ 
++
+ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+ 	struct nfs4_client *clp = dp->dl_client;
++	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+ 	nfsd4_cb_done(task, calldata);
+ 
++	if (current_rpc_client == NULL) {
++		/* We're shutting down; give up. */
++		/* XXX: err, or is it ok just to fall through
++		 * and rpc_restart_call? */
++		return;
++	}
++
+ 	switch (task->tk_status) {
+ 	case -EIO:
+ 		/* Network partition? */
+-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
++		atomic_set(&clp->cl_cb_set, 0);
+ 		warn_no_callback_path(clp, task->tk_status);
++		if (current_rpc_client != task->tk_client) {
++			/* queue a callback on the new connection: */
++			nfsd4_cb_recall(dp);
++			return;
++		}
+ 	case -EBADHANDLE:
+ 	case -NFS4ERR_BAD_STATEID:
+ 		/* Race: client probably got cb_recall
+@@ -677,7 +683,7 @@ static void nfsd4_cb_recall_done(struct 
+ 		break;
+ 	default:
+ 		/* success, or error we can't handle */
+-		goto done;
++		return;
+ 	}
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+@@ -685,20 +691,16 @@ static void nfsd4_cb_recall_done(struct 
+ 		rpc_restart_call(task);
+ 		return;
+ 	} else {
+-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
++		atomic_set(&clp->cl_cb_set, 0);
+ 		warn_no_callback_path(clp, task->tk_status);
+ 	}
+-done:
+-	kfree(task->tk_msg.rpc_argp);
+ }
+ 
+ static void nfsd4_cb_recall_release(void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 
+ 	nfs4_put_delegation(dp);
+-	put_nfs4_client(clp);
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+@@ -707,33 +709,75 @@ static const struct rpc_call_ops nfsd4_c
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+ 
++static struct workqueue_struct *callback_wq;
++
++int nfsd4_create_callback_queue(void)
++{
++	callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
++	if (!callback_wq)
++		return -ENOMEM;
++	return 0;
++}
++
++void nfsd4_destroy_callback_queue(void)
++{
++	destroy_workqueue(callback_wq);
++}
++
++/* must be called under the state lock */
++void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
++{
++	struct rpc_clnt *old = clp->cl_cb_client;
++
++	clp->cl_cb_client = new;
++	/*
++	 * After this, any work that saw the old value of cl_cb_client will
++	 * be gone:
++	 */
++	flush_workqueue(callback_wq);
++	/* So we can safely shut it down: */
++	if (old)
++		rpc_shutdown_client(old);
++}
++
+ /*
+  * called with dp->dl_count inc'ed.
+  */
+-void
+-nfsd4_cb_recall(struct nfs4_delegation *dp)
++static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
+ {
+ 	struct nfs4_client *clp = dp->dl_client;
+-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+-	struct nfs4_rpc_args *args;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
+ 		.rpc_cred = callback_cred
+ 	};
+-	int status = -ENOMEM;
++	int status;
++
++	if (clnt == NULL)
++		return; /* Client is shutting down; give up. */
+ 
+-	args = kzalloc(sizeof(*args), GFP_KERNEL);
+-	if (!args)
+-		goto out;
+ 	args->args_op = dp;
+ 	msg.rpc_argp = args;
+ 	dp->dl_retries = 1;
+ 	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
+ 				&nfsd4_cb_recall_ops, dp);
+-out:
+-	if (status) {
+-		kfree(args);
+-		put_nfs4_client(clp);
++	if (status)
+ 		nfs4_put_delegation(dp);
+-	}
++}
++
++void nfsd4_do_callback_rpc(struct work_struct *w)
++{
++	/* XXX: for now, just send off delegation recall. */
++	/* In future, generalize to handle any sort of callback. */
++	struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
++	struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
++
++	_nfsd4_cb_recall(dp);
++}
++
++
++void nfsd4_cb_recall(struct nfs4_delegation *dp)
++{
++	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
+@@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
+ static const char *nfsd4_op_name(unsigned opnum);
+ 
+ /*
+- * Enforce NFSv4.1 COMPOUND ordering rules.
++ * Enforce NFSv4.1 COMPOUND ordering rules:
+  *
+- * TODO:
+- * - enforce NFS4ERR_NOT_ONLY_OP,
+- * - DESTROY_SESSION MUST be the final operation in the COMPOUND request.
++ * Also note, enforced elsewhere:
++ *	- SEQUENCE other than as first op results in
++ *	  NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
++ *	- BIND_CONN_TO_SESSION must be the only op in its compound
++ *	  (Will be enforced in nfsd4_bind_conn_to_session().)
++ *	- DESTROY_SESSION must be the final operation in a compound, if
++ *	  sessionid's in SEQUENCE and DESTROY_SESSION are the same.
++ *	  (Enforced in nfsd4_destroy_session().)
+  */
+-static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args)
++static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
+ {
+-	if (args->minorversion && args->opcnt > 0) {
+-		struct nfsd4_op *op = &args->ops[0];
+-		return (op->status == nfserr_op_illegal) ||
+-		       (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP);
+-	}
+-	return true;
++	struct nfsd4_op *op = &args->ops[0];
++
++	/* These ordering requirements don't apply to NFSv4.0: */
++	if (args->minorversion == 0)
++		return nfs_ok;
++	/* This is weird, but OK, not our problem: */
++	if (args->opcnt == 0)
++		return nfs_ok;
++	if (op->status == nfserr_op_illegal)
++		return nfs_ok;
++	if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
++		return nfserr_op_not_in_session;
++	if (op->opnum == OP_SEQUENCE)
++		return nfs_ok;
++	if (args->opcnt != 1)
++		return nfserr_not_only_op;
++	return nfs_ok;
+ }
+ 
+ /*
+@@ -1012,6 +1028,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ 	resp->rqstp = rqstp;
+ 	resp->cstate.minorversion = args->minorversion;
+ 	resp->cstate.replay_owner = NULL;
++	resp->cstate.session = NULL;
+ 	fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
+ 	fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
+ 	/* Use the deferral mechanism only for NFSv4.0 compounds */
+@@ -1024,13 +1041,13 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ 	if (args->minorversion > nfsd_supported_minorversion)
+ 		goto out;
+ 
+-	if (!nfs41_op_ordering_ok(args)) {
++	status = nfs41_check_op_ordering(args);
++	if (status) {
+ 		op = &args->ops[0];
+-		op->status = nfserr_sequence_pos;
++		op->status = status;
+ 		goto encode_op;
+ 	}
+ 
+-	status = nfs_ok;
+ 	while (!status && resp->opcnt < args->opcnt) {
+ 		op = &args->ops[resp->opcnt++];
+ 
+@@ -1295,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ 		.op_name = "OP_SEQUENCE",
+ 	},
++	[OP_RECLAIM_COMPLETE] = {
++		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_RECLAIM_COMPLETE",
++	},
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
+@@ -45,8 +45,8 @@
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+-static time_t lease_time = 90;     /* default lease time */
+-static time_t user_lease_time = 90;
++time_t nfsd4_lease = 90;     /* default lease time */
++time_t nfsd4_grace = 90;
+ static time_t boot_time;
+ static u32 current_ownerid = 1;
+ static u32 current_fileid = 1;
+@@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp
+ 	dp->dl_vfs_file = stp->st_vfs_file;
+ 	dp->dl_type = type;
+ 	dp->dl_ident = cb->cb_ident;
+-	dp->dl_stateid.si_boot = get_seconds();
++	dp->dl_stateid.si_boot = boot_time;
+ 	dp->dl_stateid.si_stateownerid = current_delegid++;
+ 	dp->dl_stateid.si_fileid = 0;
+ 	dp->dl_stateid.si_generation = 0;
+@@ -199,6 +199,7 @@ alloc_init_deleg(struct nfs4_client *clp
+ 	atomic_set(&dp->dl_count, 1);
+ 	list_add(&dp->dl_perfile, &fp->fi_delegations);
+ 	list_add(&dp->dl_perclnt, &clp->cl_delegations);
++	INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
+ 	return dp;
+ }
+ 
+@@ -249,6 +250,9 @@ unhash_delegation(struct nfs4_delegation
+  * SETCLIENTID state 
+  */
+ 
++/* client_lock protects the client lru list and session hash table */
++static DEFINE_SPINLOCK(client_lock);
++
+ /* Hash tables for nfs4_clientid state */
+ #define CLIENT_HASH_BITS                 4
+ #define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
+@@ -367,7 +371,6 @@ static void release_openowner(struct nfs
+ 	nfs4_put_stateowner(sop);
+ }
+ 
+-static DEFINE_SPINLOCK(sessionid_lock);
+ #define SESSION_HASH_SIZE	512
+ static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
+ 
+@@ -565,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqst
+ 
+ 	new->se_flags = cses->flags;
+ 	kref_init(&new->se_ref);
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	list_add(&new->se_hash, &sessionid_hashtbl[idx]);
+ 	list_add(&new->se_perclnt, &clp->cl_sessions);
+-	spin_unlock(&sessionid_lock);
++	spin_unlock(&client_lock);
+ 
+ 	status = nfs_ok;
+ out:
+@@ -579,7 +582,7 @@ out_free:
+ 	goto out;
+ }
+ 
+-/* caller must hold sessionid_lock */
++/* caller must hold client_lock */
+ static struct nfsd4_session *
+ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
+ {
+@@ -602,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_se
+ 	return NULL;
+ }
+ 
+-/* caller must hold sessionid_lock */
++/* caller must hold client_lock */
+ static void
+ unhash_session(struct nfsd4_session *ses)
+ {
+@@ -610,15 +613,6 @@ unhash_session(struct nfsd4_session *ses
+ 	list_del(&ses->se_perclnt);
+ }
+ 
+-static void
+-release_session(struct nfsd4_session *ses)
+-{
+-	spin_lock(&sessionid_lock);
+-	unhash_session(ses);
+-	spin_unlock(&sessionid_lock);
+-	nfsd4_put_session(ses);
+-}
+-
+ void
+ free_session(struct kref *kref)
+ {
+@@ -634,9 +628,18 @@ free_session(struct kref *kref)
+ 	kfree(ses);
+ }
+ 
++/* must be called under the client_lock */
+ static inline void
+-renew_client(struct nfs4_client *clp)
++renew_client_locked(struct nfs4_client *clp)
+ {
++	if (is_client_expired(clp)) {
++		dprintk("%s: client (clientid %08x/%08x) already expired\n",
++			__func__,
++			clp->cl_clientid.cl_boot,
++			clp->cl_clientid.cl_id);
++		return;
++	}
++
+ 	/*
+ 	* Move client to the end to the LRU list.
+ 	*/
+@@ -647,6 +650,14 @@ renew_client(struct nfs4_client *clp)
+ 	clp->cl_time = get_seconds();
+ }
+ 
++static inline void
++renew_client(struct nfs4_client *clp)
++{
++	spin_lock(&client_lock);
++	renew_client_locked(clp);
++	spin_unlock(&client_lock);
++}
++
+ /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
+ static int
+ STALE_CLIENTID(clientid_t *clid)
+@@ -680,27 +691,9 @@ static struct nfs4_client *alloc_client(
+ 	return clp;
+ }
+ 
+-static void
+-shutdown_callback_client(struct nfs4_client *clp)
+-{
+-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+-
+-	if (clnt) {
+-		/*
+-		 * Callback threads take a reference on the client, so there
+-		 * should be no outstanding callbacks at this point.
+-		 */
+-		clp->cl_cb_conn.cb_client = NULL;
+-		rpc_shutdown_client(clnt);
+-	}
+-}
+-
+ static inline void
+ free_client(struct nfs4_client *clp)
+ {
+-	shutdown_callback_client(clp);
+-	if (clp->cl_cb_xprt)
+-		svc_xprt_put(clp->cl_cb_xprt);
+ 	if (clp->cl_cred.cr_group_info)
+ 		put_group_info(clp->cl_cred.cr_group_info);
+ 	kfree(clp->cl_principal);
+@@ -709,10 +702,34 @@ free_client(struct nfs4_client *clp)
+ }
+ 
+ void
+-put_nfs4_client(struct nfs4_client *clp)
++release_session_client(struct nfsd4_session *session)
+ {
+-	if (atomic_dec_and_test(&clp->cl_count))
++	struct nfs4_client *clp = session->se_client;
++
++	if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
++		return;
++	if (is_client_expired(clp)) {
+ 		free_client(clp);
++		session->se_client = NULL;
++	} else
++		renew_client_locked(clp);
++	spin_unlock(&client_lock);
++	nfsd4_put_session(session);
++}
++
++/* must be called under the client_lock */
++static inline void
++unhash_client_locked(struct nfs4_client *clp)
++{
++	mark_client_expired(clp);
++	list_del(&clp->cl_lru);
++	while (!list_empty(&clp->cl_sessions)) {
++		struct nfsd4_session  *ses;
++		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
++				 se_perclnt);
++		unhash_session(ses);
++		nfsd4_put_session(ses);
++	}
+ }
+ 
+ static void
+@@ -722,9 +739,6 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
+-	dprintk("NFSD: expire_client cl_count %d\n",
+-	                    atomic_read(&clp->cl_count));
+-
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -740,20 +754,20 @@ expire_client(struct nfs4_client *clp)
+ 		list_del_init(&dp->dl_recall_lru);
+ 		unhash_delegation(dp);
+ 	}
+-	list_del(&clp->cl_idhash);
+-	list_del(&clp->cl_strhash);
+-	list_del(&clp->cl_lru);
+ 	while (!list_empty(&clp->cl_openowners)) {
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
+-	while (!list_empty(&clp->cl_sessions)) {
+-		struct nfsd4_session  *ses;
+-		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
+-				 se_perclnt);
+-		release_session(ses);
+-	}
+-	put_nfs4_client(clp);
++	nfsd4_set_callback_client(clp, NULL);
++	if (clp->cl_cb_conn.cb_xprt)
++		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
++	list_del(&clp->cl_idhash);
++	list_del(&clp->cl_strhash);
++	spin_lock(&client_lock);
++	unhash_client_locked(clp);
++	if (atomic_read(&clp->cl_refcount) == 0)
++		free_client(clp);
++	spin_unlock(&client_lock);
+ }
+ 
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+@@ -839,14 +853,15 @@ static struct nfs4_client *create_client
+ 	}
+ 
+ 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
+-	atomic_set(&clp->cl_count, 1);
+-	atomic_set(&clp->cl_cb_conn.cb_set, 0);
++	atomic_set(&clp->cl_refcount, 0);
++	atomic_set(&clp->cl_cb_set, 0);
+ 	INIT_LIST_HEAD(&clp->cl_idhash);
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
++	clp->cl_time = get_seconds();
+ 	clear_bit(0, &clp->cl_cb_slot_busy);
+ 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
+ 	copy_verf(clp, verf);
+@@ -877,8 +892,7 @@ add_to_unconfirmed(struct nfs4_client *c
+ 	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+ 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+ 	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+-	list_add_tail(&clp->cl_lru, &client_lru);
+-	clp->cl_time = get_seconds();
++	renew_client(clp);
+ }
+ 
+ static void
+@@ -888,10 +902,9 @@ move_to_confirmed(struct nfs4_client *cl
+ 	unsigned int strhashval;
+ 
+ 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
+-	list_del_init(&clp->cl_strhash);
+ 	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
+ 	strhashval = clientstr_hashval(clp->cl_recdir);
+-	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
++	list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+ 	renew_client(clp);
+ }
+ 
+@@ -1327,15 +1340,9 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		/*
+-		 * We do not support RDMA or persistent sessions
+-		 */
+-		cr_ses->flags &= ~SESSION4_PERSIST;
+-		cr_ses->flags &= ~SESSION4_RDMA;
+-
+ 		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(unconf->cl_cb_xprt);
++			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++			svc_xprt_get(rqstp->rq_xprt);
+ 			rpc_copy_addr(
+ 				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+ 				sa);
+@@ -1344,7 +1351,7 @@ nfsd4_create_session(struct svc_rqst *rq
+ 				cstate->minorversion;
+ 			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+ 			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf);
++			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+ 		}
+ 		conf = unconf;
+ 	} else {
+@@ -1352,6 +1359,12 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		goto out;
+ 	}
+ 
++	/*
++	 * We do not support RDMA or persistent sessions
++	 */
++	cr_ses->flags &= ~SESSION4_PERSIST;
++	cr_ses->flags &= ~SESSION4_RDMA;
++
+ 	status = alloc_init_session(rqstp, conf, cr_ses);
+ 	if (status)
+ 		goto out;
+@@ -1369,6 +1382,21 @@ out:
+ 	return status;
+ }
+ 
++static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
++{
++	struct nfsd4_compoundres *resp = rqstp->rq_resp;
++	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
++
++	return argp->opcnt == resp->opcnt;
++}
++
++static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
++{
++	if (!session)
++		return 0;
++	return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
++}
++
+ __be32
+ nfsd4_destroy_session(struct svc_rqst *r,
+ 		      struct nfsd4_compound_state *cstate,
+@@ -1384,19 +1412,25 @@ nfsd4_destroy_session(struct svc_rqst *r
+ 	 * - Do we need to clear any callback info from previous session?
+ 	 */
+ 
++	if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
++		if (!nfsd4_last_compound_op(r))
++			return nfserr_not_only_op;
++	}
+ 	dump_sessionid(__func__, &sessionid->sessionid);
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
+ 	if (!ses) {
+-		spin_unlock(&sessionid_lock);
++		spin_unlock(&client_lock);
+ 		goto out;
+ 	}
+ 
+ 	unhash_session(ses);
+-	spin_unlock(&sessionid_lock);
++	spin_unlock(&client_lock);
+ 
++	nfs4_lock_state();
+ 	/* wait for callbacks */
+-	shutdown_callback_client(ses->se_client);
++	nfsd4_set_callback_client(ses->se_client, NULL);
++	nfs4_unlock_state();
+ 	nfsd4_put_session(ses);
+ 	status = nfs_ok;
+ out:
+@@ -1417,7 +1451,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
+ 	if (resp->opcnt != 1)
+ 		return nfserr_sequence_pos;
+ 
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	status = nfserr_badsession;
+ 	session = find_in_sessionid_hashtbl(&seq->sessionid);
+ 	if (!session)
+@@ -1456,23 +1490,47 @@ nfsd4_sequence(struct svc_rqst *rqstp,
+ 	cstate->slot = slot;
+ 	cstate->session = session;
+ 
+-	/* Hold a session reference until done processing the compound:
+-	 * nfsd4_put_session called only if the cstate slot is set.
+-	 */
+-	nfsd4_get_session(session);
+ out:
+-	spin_unlock(&sessionid_lock);
+-	/* Renew the clientid on success and on replay */
++	/* Hold a session reference until done processing the compound. */
+ 	if (cstate->session) {
+-		nfs4_lock_state();
+-		renew_client(session->se_client);
+-		nfs4_unlock_state();
++		nfsd4_get_session(cstate->session);
++		atomic_inc(&session->se_client->cl_refcount);
+ 	}
++	spin_unlock(&client_lock);
+ 	dprintk("%s: return %d\n", __func__, ntohl(status));
+ 	return status;
+ }
+ 
+ __be32
++nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
++{
++	if (rc->rca_one_fs) {
++		if (!cstate->current_fh.fh_dentry)
++			return nfserr_nofilehandle;
++		/*
++		 * We don't take advantage of the rca_one_fs case.
++		 * That's OK, it's optional, we can safely ignore it.
++		 */
++		 return nfs_ok;
++	}
++	nfs4_lock_state();
++	if (is_client_expired(cstate->session->se_client)) {
++		nfs4_unlock_state();
++		/*
++		 * The following error isn't really legal.
++		 * But we only get here if the client just explicitly
++		 * destroyed the client.  Surely it no longer cares what
++		 * error it gets back on an operation for the dead
++		 * client.
++		 */
++		return nfserr_stale_clientid;
++	}
++	nfsd4_create_clid_dir(cstate->session->se_client);
++	nfs4_unlock_state();
++	return nfs_ok;
++}
++
++__be32
+ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 		  struct nfsd4_setclientid *setclid)
+ {
+@@ -1631,9 +1689,8 @@ nfsd4_setclientid_confirm(struct svc_rqs
+ 		if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
+ 			status = nfserr_clid_inuse;
+ 		else {
+-			/* XXX: We just turn off callbacks until we can handle
+-			  * change request correctly. */
+-			atomic_set(&conf->cl_cb_conn.cb_set, 0);
++			atomic_set(&conf->cl_cb_set, 0);
++			nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
+ 			expire_client(unconf);
+ 			status = nfs_ok;
+ 
+@@ -1667,7 +1724,7 @@ nfsd4_setclientid_confirm(struct svc_rqs
+ 			}
+ 			move_to_confirmed(unconf);
+ 			conf = unconf;
+-			nfsd4_probe_callback(conf);
++			nfsd4_probe_callback(conf, &conf->cl_cb_conn);
+ 			status = nfs_ok;
+ 		}
+ 	} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
+@@ -1700,12 +1757,12 @@ alloc_init_file(struct inode *ino)
+ 		INIT_LIST_HEAD(&fp->fi_hash);
+ 		INIT_LIST_HEAD(&fp->fi_stateids);
+ 		INIT_LIST_HEAD(&fp->fi_delegations);
+-		spin_lock(&recall_lock);
+-		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+-		spin_unlock(&recall_lock);
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++		spin_lock(&recall_lock);
++		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
++		spin_unlock(&recall_lock);
+ 		return fp;
+ 	}
+ 	return NULL;
+@@ -1827,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	stp->st_stateowner = sop;
+ 	get_nfs4_file(fp);
+ 	stp->st_file = fp;
+-	stp->st_stateid.si_boot = get_seconds();
++	stp->st_stateid.si_boot = boot_time;
+ 	stp->st_stateid.si_stateownerid = sop->so_id;
+ 	stp->st_stateid.si_fileid = fp->fi_id;
+ 	stp->st_stateid.si_generation = 0;
+@@ -2028,7 +2085,6 @@ void nfsd_break_deleg_cb(struct file_loc
+ 	 * lock) we know the server hasn't removed the lease yet, we know
+ 	 * it's safe to take a reference: */
+ 	atomic_inc(&dp->dl_count);
+-	atomic_inc(&dp->dl_client->cl_count);
+ 
+ 	spin_lock(&recall_lock);
+ 	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+@@ -2347,7 +2403,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ {
+ 	struct nfs4_delegation *dp;
+ 	struct nfs4_stateowner *sop = stp->st_stateowner;
+-	struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
++	int cb_up = atomic_read(&sop->so_client->cl_cb_set);
+ 	struct file_lock fl, *flp = &fl;
+ 	int status, flag = 0;
+ 
+@@ -2355,7 +2411,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ 	open->op_recall = 0;
+ 	switch (open->op_claim_type) {
+ 		case NFS4_OPEN_CLAIM_PREVIOUS:
+-			if (!atomic_read(&cb->cb_set))
++			if (!cb_up)
+ 				open->op_recall = 1;
+ 			flag = open->op_delegate_type;
+ 			if (flag == NFS4_OPEN_DELEGATE_NONE)
+@@ -2366,7 +2422,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ 			 * had the chance to reclaim theirs.... */
+ 			if (locks_in_grace())
+ 				goto out;
+-			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
++			if (!cb_up || !sop->so_confirmed)
+ 				goto out;
+ 			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ 				flag = NFS4_OPEN_DELEGATE_WRITE;
+@@ -2483,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 	}
+ 	memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
+ 
+-	if (nfsd4_has_session(&resp->cstate)) {
++	if (nfsd4_has_session(&resp->cstate))
+ 		open->op_stateowner->so_confirmed = 1;
+-		nfsd4_create_clid_dir(open->op_stateowner->so_client);
+-	}
+ 
+ 	/*
+ 	* Attempt to hand out a delegation. No error return, because the
+@@ -2537,7 +2591,7 @@ nfsd4_renew(struct svc_rqst *rqstp, stru
+ 	renew_client(clp);
+ 	status = nfserr_cb_path_down;
+ 	if (!list_empty(&clp->cl_delegations)
+-			&& !atomic_read(&clp->cl_cb_conn.cb_set))
++			&& !atomic_read(&clp->cl_cb_set))
+ 		goto out;
+ 	status = nfs_ok;
+ out:
+@@ -2554,6 +2608,12 @@ nfsd4_end_grace(void)
+ 	dprintk("NFSD: end of grace period\n");
+ 	nfsd4_recdir_purge_old();
+ 	locks_end_grace(&nfsd4_manager);
++	/*
++	 * Now that every NFSv4 client has had the chance to recover and
++	 * to see the (possibly new, possibly shorter) lease time, we
++	 * can safely set the next grace time to the current lease time:
++	 */
++	nfsd4_grace = nfsd4_lease;
+ }
+ 
+ static time_t
+@@ -2563,15 +2623,17 @@ nfs4_laundromat(void)
+ 	struct nfs4_stateowner *sop;
+ 	struct nfs4_delegation *dp;
+ 	struct list_head *pos, *next, reaplist;
+-	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
+-	time_t t, clientid_val = NFSD_LEASE_TIME;
+-	time_t u, test_val = NFSD_LEASE_TIME;
++	time_t cutoff = get_seconds() - nfsd4_lease;
++	time_t t, clientid_val = nfsd4_lease;
++	time_t u, test_val = nfsd4_lease;
+ 
+ 	nfs4_lock_state();
+ 
+ 	dprintk("NFSD: laundromat service - starting\n");
+ 	if (locks_in_grace())
+ 		nfsd4_end_grace();
++	INIT_LIST_HEAD(&reaplist);
++	spin_lock(&client_lock);
+ 	list_for_each_safe(pos, next, &client_lru) {
+ 		clp = list_entry(pos, struct nfs4_client, cl_lru);
+ 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+@@ -2580,12 +2642,22 @@ nfs4_laundromat(void)
+ 				clientid_val = t;
+ 			break;
+ 		}
++		if (atomic_read(&clp->cl_refcount)) {
++			dprintk("NFSD: client in use (clientid %08x)\n",
++				clp->cl_clientid.cl_id);
++			continue;
++		}
++		unhash_client_locked(clp);
++		list_add(&clp->cl_lru, &reaplist);
++	}
++	spin_unlock(&client_lock);
++	list_for_each_safe(pos, next, &reaplist) {
++		clp = list_entry(pos, struct nfs4_client, cl_lru);
+ 		dprintk("NFSD: purging unused client (clientid %08x)\n",
+ 			clp->cl_clientid.cl_id);
+ 		nfsd4_remove_clid_dir(clp);
+ 		expire_client(clp);
+ 	}
+-	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	list_for_each_safe(pos, next, &del_recall_lru) {
+ 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+@@ -2605,7 +2677,7 @@ nfs4_laundromat(void)
+ 		list_del_init(&dp->dl_recall_lru);
+ 		unhash_delegation(dp);
+ 	}
+-	test_val = NFSD_LEASE_TIME;
++	test_val = nfsd4_lease;
+ 	list_for_each_safe(pos, next, &close_lru) {
+ 		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+ 		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
+@@ -2661,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ static int
+ STALE_STATEID(stateid_t *stateid)
+ {
+-	if (time_after((unsigned long)boot_time,
+-			(unsigned long)stateid->si_boot)) {
+-		dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+-			STATEID_VAL(stateid));
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-static int
+-EXPIRED_STATEID(stateid_t *stateid)
+-{
+-	if (time_before((unsigned long)boot_time,
+-			((unsigned long)stateid->si_boot)) &&
+-	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
+-		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
+-			STATEID_VAL(stateid));
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-static __be32
+-stateid_error_map(stateid_t *stateid)
+-{
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
+-	if (EXPIRED_STATEID(stateid))
+-		return nfserr_expired;
+-
+-	dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
++	if (stateid->si_boot == boot_time)
++		return 0;
++	dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+ 		STATEID_VAL(stateid));
+-	return nfserr_bad_stateid;
++	return 1;
+ }
+ 
+ static inline int
+@@ -2817,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	status = nfserr_bad_stateid;
+ 	if (is_delegation_stateid(stateid)) {
+ 		dp = find_delegation_stateid(ino, stateid);
+-		if (!dp) {
+-			status = stateid_error_map(stateid);
++		if (!dp)
+ 			goto out;
+-		}
+ 		status = check_stateid_generation(stateid, &dp->dl_stateid,
+ 						  flags);
+ 		if (status)
+@@ -2833,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 			*filpp = dp->dl_vfs_file;
+ 	} else { /* open or lock stateid */
+ 		stp = find_stateid(stateid, flags);
+-		if (!stp) {
+-			status = stateid_error_map(stateid);
++		if (!stp)
+ 			goto out;
+-		}
+ 		if (nfs4_check_fh(current_fh, stp))
+ 			goto out;
+ 		if (!stp->st_stateowner->so_confirmed)
+@@ -2908,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 		 */
+ 		sop = search_close_lru(stateid->si_stateownerid, flags);
+ 		if (sop == NULL)
+-			return stateid_error_map(stateid);
++			return nfserr_bad_stateid;
+ 		*sopp = sop;
+ 		goto check_replay;
+ 	}
+@@ -3175,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (!is_delegation_stateid(stateid))
+ 		goto out;
+ 	dp = find_delegation_stateid(inode, stateid);
+-	if (!dp) {
+-		status = stateid_error_map(stateid);
++	if (!dp)
+ 		goto out;
+-	}
+ 	status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
+ 	if (status)
+ 		goto out;
+@@ -3404,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	stp->st_stateowner = sop;
+ 	get_nfs4_file(fp);
+ 	stp->st_file = fp;
+-	stp->st_stateid.si_boot = get_seconds();
++	stp->st_stateid.si_boot = boot_time;
+ 	stp->st_stateid.si_stateownerid = sop->so_id;
+ 	stp->st_stateid.si_fileid = fp->fi_id;
+ 	stp->st_stateid.si_generation = 0;
+@@ -3976,12 +4014,6 @@ nfsd4_load_reboot_recovery_data(void)
+ 		printk("NFSD: Failure reading reboot recovery data\n");
+ }
+ 
+-unsigned long
+-get_nfs4_grace_period(void)
+-{
+-	return max(user_lease_time, lease_time) * HZ;
+-}
+-
+ /*
+  * Since the lifetime of a delegation isn't limited to that of an open, a
+  * client may quite reasonably hang on to a delegation as long as it has
+@@ -4008,20 +4040,27 @@ set_max_delegations(void)
+ static int
+ __nfs4_state_start(void)
+ {
+-	unsigned long grace_time;
++	int ret;
+ 
+ 	boot_time = get_seconds();
+-	grace_time = get_nfs4_grace_period();
+-	lease_time = user_lease_time;
+ 	locks_start_grace(&nfsd4_manager);
+ 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
+-	       grace_time/HZ);
++	       nfsd4_grace);
++	ret = set_callback_cred();
++	if (ret)
++		return -ENOMEM;
+ 	laundry_wq = create_singlethread_workqueue("nfsd4");
+ 	if (laundry_wq == NULL)
+ 		return -ENOMEM;
+-	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
++	ret = nfsd4_create_callback_queue();
++	if (ret)
++		goto out_free_laundry;
++	queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
+ 	set_max_delegations();
+-	return set_callback_cred();
++	return 0;
++out_free_laundry:
++	destroy_workqueue(laundry_wq);
++	return ret;
+ }
+ 
+ int
+@@ -4039,12 +4078,6 @@ nfs4_state_start(void)
+ 	return 0;
+ }
+ 
+-time_t
+-nfs4_lease_time(void)
+-{
+-	return lease_time;
+-}
+-
+ static void
+ __nfs4_state_shutdown(void)
+ {
+@@ -4089,6 +4122,7 @@ nfs4_state_shutdown(void)
+ 	nfs4_lock_state();
+ 	nfs4_release_reclaim();
+ 	__nfs4_state_shutdown();
++	nfsd4_destroy_callback_queue();
+ 	nfs4_unlock_state();
+ }
+ 
+@@ -4128,21 +4162,3 @@ nfs4_recoverydir(void)
+ {
+ 	return user_recovery_dirname;
+ }
+-
+-/*
+- * Called when leasetime is changed.
+- *
+- * The only way the protocol gives us to handle on-the-fly lease changes is to
+- * simulate a reboot.  Instead of doing that, we just wait till the next time
+- * we start to register any changes in lease time.  If the administrator
+- * really wants to change the lease time *now*, they can go ahead and bring
+- * nfsd down and then back up again after changing the lease time.
+- *
+- * user_lease_time is protected by nfsd_mutex since it's only really accessed
+- * when nfsd is starting
+- */
+-void
+-nfs4_reset_lease(time_t leasetime)
+-{
+-	user_lease_time = leasetime;
+-}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
+@@ -46,6 +46,7 @@ enum {
+ 	 */
+ #ifdef CONFIG_NFSD_V4
+ 	NFSD_Leasetime,
++	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
+ };
+@@ -70,6 +71,7 @@ static ssize_t write_ports(struct file *
+ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
+ #ifdef CONFIG_NFSD_V4
+ static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
++static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
+ 
+@@ -91,6 +93,7 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_MaxBlkSize] = write_maxblksize,
+ #ifdef CONFIG_NFSD_V4
+ 	[NFSD_Leasetime] = write_leasetime,
++	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
+ };
+@@ -1204,29 +1207,45 @@ static ssize_t write_maxblksize(struct f
+ }
+ 
+ #ifdef CONFIG_NFSD_V4
+-extern time_t nfs4_leasetime(void);
+-
+-static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
++static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+ {
+-	/* if size > 10 seconds, call
+-	 * nfs4_reset_lease() then write out the new lease (seconds) as reply
+-	 */
+ 	char *mesg = buf;
+-	int rv, lease;
++	int rv, i;
+ 
+ 	if (size > 0) {
+ 		if (nfsd_serv)
+ 			return -EBUSY;
+-		rv = get_int(&mesg, &lease);
++		rv = get_int(&mesg, &i);
+ 		if (rv)
+ 			return rv;
+-		if (lease < 10 || lease > 3600)
++		/*
++		 * Some sanity checking.  We don't have a reason for
++		 * these particular numbers, but problems with the
++		 * extremes are:
++		 *	- Too short: the briefest network outage may
++		 *	  cause clients to lose all their locks.  Also,
++		 *	  the frequent polling may be wasteful.
++		 *	- Too long: do you really want reboot recovery
++		 *	  to take more than an hour?  Or to make other
++		 *	  clients wait an hour before being able to
++		 *	  revoke a dead client's locks?
++		 */
++		if (i < 10 || i > 3600)
+ 			return -EINVAL;
+-		nfs4_reset_lease(lease);
++		*time = i;
+ 	}
+ 
+-	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
+-							nfs4_lease_time());
++	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
++}
++
++static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __nfsd4_write_time(file, buf, size, time);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
+ }
+ 
+ /**
+@@ -1252,12 +1271,22 @@ static ssize_t __write_leasetime(struct 
+  */
+ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+ {
+-	ssize_t rv;
++	return nfsd4_write_time(file, buf, size, &nfsd4_lease);
++}
+ 
+-	mutex_lock(&nfsd_mutex);
+-	rv = __write_leasetime(file, buf, size);
+-	mutex_unlock(&nfsd_mutex);
+-	return rv;
++/**
++ * write_gracetime - Set or report current NFSv4 grace period time
++ *
++ * As above, but sets the time of the NFSv4 grace period.
++ *
++ * Note this should never be set to less than the *previous*
++ * lease-period time, but we don't try to enforce this.  (In the common
++ * case (a new boot), we don't know what the previous lease time was
++ * anyway.)
++ */
++static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
++{
++	return nfsd4_write_time(file, buf, size, &nfsd4_grace);
+ }
+ 
+ extern char *nfs4_recoverydir(void);
+@@ -1351,6 +1380,7 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+ #ifdef CONFIG_NFSD_V4
+ 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
++		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
+ 		/* last one */ {""}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
+@@ -82,7 +82,6 @@ int nfs4_state_init(void);
+ void nfsd4_free_slabs(void);
+ int nfs4_state_start(void);
+ void nfs4_state_shutdown(void);
+-time_t nfs4_lease_time(void);
+ void nfs4_reset_lease(time_t leasetime);
+ int nfs4_reset_recoverydir(char *recdir);
+ #else
+@@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) 
+ static inline void nfsd4_free_slabs(void) { }
+ static inline int nfs4_state_start(void) { return 0; }
+ static inline void nfs4_state_shutdown(void) { }
+-static inline time_t nfs4_lease_time(void) { return 0; }
+ static inline void nfs4_reset_lease(time_t leasetime) { }
+ static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+ #endif
+@@ -229,6 +227,9 @@ extern struct timeval	nfssvc_boot;
+ 
+ #ifdef CONFIG_NFSD_V4
+ 
++extern time_t nfsd4_lease;
++extern time_t nfsd4_grace;
++
+ /* before processing a COMPOUND operation, we have to check that there
+  * is enough space in the buffer for XDR encode to succeed.  otherwise,
+  * we might process an operation with side effects, and be unable to
+@@ -247,7 +248,6 @@ extern struct timeval	nfssvc_boot;
+ #define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
+ #define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
+ 
+-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
+ #define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
+ 
+ /*
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
+@@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
+ 	struct nfs4_client	*cbs_clp;
+ };
+ 
++struct nfs4_rpc_args {
++	void				*args_op;
++	struct nfsd4_cb_sequence	args_seq;
++};
++
++struct nfsd4_callback {
++	struct nfs4_rpc_args cb_args;
++	struct work_struct cb_work;
++};
++
+ struct nfs4_delegation {
+ 	struct list_head	dl_perfile;
+ 	struct list_head	dl_perclnt;
+@@ -86,6 +96,7 @@ struct nfs4_delegation {
+ 	stateid_t		dl_stateid;
+ 	struct knfsd_fh		dl_fh;
+ 	int			dl_retries;
++	struct nfsd4_callback	dl_recall;
+ };
+ 
+ /* client delegation callback info */
+@@ -96,9 +107,7 @@ struct nfs4_cb_conn {
+ 	u32                     cb_prog;
+ 	u32			cb_minorversion;
+ 	u32                     cb_ident;	/* minorversion 0 only */
+-	/* RPC client info */
+-	atomic_t		cb_set;     /* successful CB_NULL call */
+-	struct rpc_clnt *       cb_client;
++	struct svc_xprt		*cb_xprt;	/* minorversion 1 only */
+ };
+ 
+ /* Maximum number of slots per session. 160 is useful for long haul TCP */
+@@ -157,7 +166,7 @@ struct nfsd4_session {
+ 	struct list_head	se_hash;	/* hash by sessionid */
+ 	struct list_head	se_perclnt;
+ 	u32			se_flags;
+-	struct nfs4_client	*se_client;	/* for expire_client */
++	struct nfs4_client	*se_client;
+ 	struct nfs4_sessionid	se_sessionid;
+ 	struct nfsd4_channel_attrs se_fchannel;
+ 	struct nfsd4_channel_attrs se_bchannel;
+@@ -212,25 +221,41 @@ struct nfs4_client {
+ 	struct svc_cred		cl_cred; 	/* setclientid principal */
+ 	clientid_t		cl_clientid;	/* generated by server */
+ 	nfs4_verifier		cl_confirm;	/* generated by server */
+-	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
+-	atomic_t		cl_count;	/* ref count */
+ 	u32			cl_firststate;	/* recovery dir creation */
+ 
++	/* for v4.0 and v4.1 callbacks: */
++	struct nfs4_cb_conn	cl_cb_conn;
++	struct rpc_clnt		*cl_cb_client;
++	atomic_t		cl_cb_set;
++
+ 	/* for nfs41 */
+ 	struct list_head	cl_sessions;
+ 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_sessionid	cl_sessionid;
++	/* number of rpc's in progress over an associated session: */
++	atomic_t		cl_refcount;
+ 
+ 	/* for nfs41 callbacks */
+ 	/* We currently support a single back channel with a single slot */
+ 	unsigned long		cl_cb_slot_busy;
+ 	u32			cl_cb_seq_nr;
+-	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
+ };
+ 
++static inline void
++mark_client_expired(struct nfs4_client *clp)
++{
++	clp->cl_time = 0;
++}
++
++static inline bool
++is_client_expired(struct nfs4_client *clp)
++{
++	return clp->cl_time == 0;
++}
++
+ /* struct nfs4_client_reset
+  * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
+  * upon lease reset, or from upcall to state_daemon (to read in state
+@@ -377,11 +402,14 @@ extern void nfs4_lock_state(void);
+ extern void nfs4_unlock_state(void);
+ extern int nfs4_in_grace(void);
+ extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
+-extern void put_nfs4_client(struct nfs4_client *clp);
+ extern void nfs4_free_stateowner(struct kref *kref);
+ extern int set_callback_cred(void);
+-extern void nfsd4_probe_callback(struct nfs4_client *clp);
++extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
++extern void nfsd4_do_callback_rpc(struct work_struct *);
+ extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
++extern int nfsd4_create_callback_queue(void);
++extern void nfsd4_destroy_callback_queue(void);
++extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
+ extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+ extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+ extern void nfsd4_init_recdir(char *recdir_name);
+@@ -392,6 +420,7 @@ extern int nfs4_has_reclaimed_state(cons
+ extern void nfsd4_recdir_purge_old(void);
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
++extern void release_session_client(struct nfsd4_session *);
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
+@@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
+ 	struct nfs4_sessionid	sessionid;
+ };
+ 
++struct nfsd4_reclaim_complete {
++	u32 rca_one_fs;
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -421,6 +425,7 @@ struct nfsd4_op {
+ 		struct nfsd4_create_session	create_session;
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
++		struct nfsd4_reclaim_complete	reclaim_complete;
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+@@ -513,9 +518,8 @@ extern void nfsd4_store_cache_entry(stru
+ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+ 		struct nfsd4_sequence *seq);
+ extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
+-		struct nfsd4_compound_state *,
+-struct nfsd4_exchange_id *);
+-		extern __be32 nfsd4_create_session(struct svc_rqst *,
++		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
++extern __be32 nfsd4_create_session(struct svc_rqst *,
+ 		struct nfsd4_compound_state *,
+ 		struct nfsd4_create_session *);
+ extern __be32 nfsd4_sequence(struct svc_rqst *,
+@@ -524,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_
+ extern __be32 nfsd4_destroy_session(struct svc_rqst *,
+ 		struct nfsd4_compound_state *,
+ 		struct nfsd4_destroy_session *);
++__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
+ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
+ 		struct nfsd4_open *open);
+ extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
+@@ -40,12 +40,12 @@ struct nfs_fhbase_old {
+  * This is the new flexible, extensible style NFSv2/v3 file handle.
+  * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
+  *
+- * The file handle is seens as a list of 4byte words.
+- * The first word contains a version number (1) and four descriptor bytes
++ * The file handle starts with a sequence of four-byte words.
++ * The first word contains a version number (1) and three descriptor bytes
+  * that tell how the remaining 3 variable length fields should be handled.
+  * These three bytes are auth_type, fsid_type and fileid_type.
+  *
+- * All 4byte values are in host-byte-order.
++ * All four-byte values are in host-byte-order.
+  *
+  * The auth_type field specifies how the filehandle can be authenticated
+  * This might allow a file to be confirmed to be in a writable part of a
+diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
+--- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
+@@ -49,11 +49,17 @@ static void cache_init(struct cache_head
+ 	h->last_refresh = now;
+ }
+ 
++static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
++{
++	return  (h->expiry_time < get_seconds()) ||
++		(detail->flush_time > h->last_refresh);
++}
++
+ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
+ 				       struct cache_head *key, int hash)
+ {
+ 	struct cache_head **head,  **hp;
+-	struct cache_head *new = NULL;
++	struct cache_head *new = NULL, *freeme = NULL;
+ 
+ 	head = &detail->hash_table[hash];
+ 
+@@ -62,6 +68,9 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+ 		struct cache_head *tmp = *hp;
+ 		if (detail->match(tmp, key)) {
++			if (cache_is_expired(detail, tmp))
++				/* This entry is expired, we will discard it. */
++				break;
+ 			cache_get(tmp);
+ 			read_unlock(&detail->hash_lock);
+ 			return tmp;
+@@ -86,6 +95,13 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+ 		struct cache_head *tmp = *hp;
+ 		if (detail->match(tmp, key)) {
++			if (cache_is_expired(detail, tmp)) {
++				*hp = tmp->next;
++				tmp->next = NULL;
++				detail->entries --;
++				freeme = tmp;
++				break;
++			}
+ 			cache_get(tmp);
+ 			write_unlock(&detail->hash_lock);
+ 			cache_put(new, detail);
+@@ -98,6 +114,8 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	cache_get(new);
+ 	write_unlock(&detail->hash_lock);
+ 
++	if (freeme)
++		cache_put(freeme, detail);
+ 	return new;
+ }
+ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
+@@ -183,10 +201,7 @@ static int cache_make_upcall(struct cach
+ 
+ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
+ {
+-	if (!test_bit(CACHE_VALID, &h->flags) ||
+-	    h->expiry_time < get_seconds())
+-		return -EAGAIN;
+-	else if (detail->flush_time > h->last_refresh)
++	if (!test_bit(CACHE_VALID, &h->flags))
+ 		return -EAGAIN;
+ 	else {
+ 		/* entry is valid */
+diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
+--- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
+@@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
+ 			dprintk("svc: recvfrom returned error %d\n", -err);
+ 			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+ 		}
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		return -EAGAIN;
+ 	}
+ 	len = svc_addr_len(svc_addr(rqstp));
+@@ -562,11 +561,6 @@ static int svc_udp_recvfrom(struct svc_r
+ 	svsk->sk_sk->sk_stamp = skb->tstamp;
+ 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
+ 
+-	/*
+-	 * Maybe more packets - kick another thread ASAP.
+-	 */
+-	svc_xprt_received(&svsk->sk_xprt);
+-
+ 	len  = skb->len - sizeof(struct udphdr);
+ 	rqstp->rq_arg.len = len;
+ 
+@@ -917,7 +911,6 @@ static int svc_tcp_recv_record(struct sv
+ 		if (len < want) {
+ 			dprintk("svc: short recvfrom while reading record "
+ 				"length (%d of %d)\n", len, want);
+-			svc_xprt_received(&svsk->sk_xprt);
+ 			goto err_again; /* record header not complete */
+ 		}
+ 
+@@ -953,7 +946,6 @@ static int svc_tcp_recv_record(struct sv
+ 	if (len < svsk->sk_reclen) {
+ 		dprintk("svc: incomplete TCP record (%d of %d)\n",
+ 			len, svsk->sk_reclen);
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		goto err_again;	/* record not complete */
+ 	}
+ 	len = svsk->sk_reclen;
+@@ -961,10 +953,8 @@ static int svc_tcp_recv_record(struct sv
+ 
+ 	return len;
+  error:
+-	if (len == -EAGAIN) {
++	if (len == -EAGAIN)
+ 		dprintk("RPC: TCP recv_record got EAGAIN\n");
+-		svc_xprt_received(&svsk->sk_xprt);
+-	}
+ 	return len;
+  err_delete:
+ 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+@@ -1110,7 +1100,6 @@ out:
+ 	svsk->sk_tcplen = 0;
+ 
+ 	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
+-	svc_xprt_received(&svsk->sk_xprt);
+ 	if (serv->sv_stats)
+ 		serv->sv_stats->nettcpcnt++;
+ 
+@@ -1119,7 +1108,6 @@ out:
+ err_again:
+ 	if (len == -EAGAIN) {
+ 		dprintk("RPC: TCP recvfrom got EAGAIN\n");
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		return len;
+ 	}
+ error:
+diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
+--- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
+@@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
+ 		if (rqstp->rq_deferred) {
+ 			svc_xprt_received(xprt);
+ 			len = svc_deferred_recv(rqstp);
+-		} else
++		} else {
+ 			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
++			svc_xprt_received(xprt);
++		}
+ 		dprintk("svc: got len=%d\n", len);
+ 	}
+ 
+@@ -893,12 +895,12 @@ void svc_delete_xprt(struct svc_xprt *xp
+ 	 */
+ 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+ 		serv->sv_tmpcnt--;
++	spin_unlock_bh(&serv->sv_lock);
+ 
+ 	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
+ 		kfree(dr);
+ 
+ 	svc_xprt_put(xprt);
+-	spin_unlock_bh(&serv->sv_lock);
+ }
+ 
+ void svc_close_xprt(struct svc_xprt *xprt)
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
+@@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
+ 		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
+ 		rqstp->rq_arg.head[0].iov_len);
+ 
+-	svc_xprt_received(rqstp->rq_xprt);
+ 	return ret;
+ }
+ 
+@@ -665,7 +664,6 @@ int svc_rdma_recvfrom(struct svc_rqst *r
+ 		rqstp->rq_arg.head[0].iov_len);
+ 	rqstp->rq_prot = IPPROTO_MAX;
+ 	svc_xprt_copy_addrs(rqstp, xprt);
+-	svc_xprt_received(xprt);
+ 	return ret;
+ 
+  close_out:
+@@ -678,6 +676,5 @@ int svc_rdma_recvfrom(struct svc_rqst *r
+ 	 */
+ 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ defer:
+-	svc_xprt_received(xprt);
+ 	return 0;
+ }
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
new file mode 100644
index 000000000..a9d78ba0e
--- /dev/null
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -0,0 +1,31788 @@
+diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
+@@ -13,6 +13,7 @@
+ #include <sys/stat.h>
+ #include <sys/mman.h>
+ #include <sys/param.h>
++#include <sys/stat.h>
+ #include "init.h"
+ #include "kern_constants.h"
+ #include "os.h"
+diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
+--- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
+@@ -1009,6 +1009,7 @@ static void disk_release(struct device *
+ struct class block_class = {
+ 	.name		= "block",
+ };
++EXPORT_SYMBOL(block_class);
+ 
+ static char *block_devnode(struct device *dev, mode_t *mode)
+ {
+diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
+@@ -0,0 +1,211 @@
++(c) 2007 Network Appliance Inc.
++
++spNFS
++-----
++
++An spNFS system consists of a Meta Data Server (MDS), a number of Client machines (C) and a number of Data Servers (DS).
++
++A file system is mounted by the clients from the MDS, and all file data
++is striped across the DSs.
++
++Identify the machines that will be filling each of these roles.
++
++The spnfs kernel will be installed on all machines: clients, the MDS and DSs.
++
++
++Building and installing the spNFS kernel
++----------------------------------------
++
++Get the spNFS kernel from:
++
++	git://linux-nfs.org/~bhalevy/linux-pnfs.git
++
++Use the pnfs-all-latest branch and add these options to your .config file
++
++	CONFIG_NETWORK_FILESYSTEMS=y
++	CONFIG_NFS_FS=m
++	CONFIG_NFS_V4=y
++	CONFIG_NFS_V4_1=y
++	CONFIG_PNFS=y
++	CONFIG_NFSD=m
++	CONFIG_PNFSD=y
++	# CONFIG_PNFSD_LOCAL_EXPORT is not set
++	CONFIG_SPNFS=y
++
++By default, spNFS uses whole-file layouts.  Layout segments can be enabled
++by adding:
++
++	CONFIG_SPNFS_LAYOUTSEGMENTS=y
++
++to your .config file.
++
++Building and installation of kernel+modules is as usual.
++This kernel should be installed and booted on the client, MDS and DSs.
++
++Note that CONFIG_PNFSD_LOCAL_EXPORT must be disabled for spnfs as it
++takes over the pnfs export interface.
++
++Building nfs-utils
++------------------
++
++Get the nfs-utils package containing spnfsd from:
++
++	git://linux-nfs.org/~bhalevy/pnfs-nfs-utils.git
++
++Follow the standard instructions for building nfs-utils.
++
++After building, the spnfsd daemon will be located in utils/spnfsd.  The spnfsd
++daemon will only be needed on the MDS.
++
++
++Installation
++------------
++
++The nfs-utils package contains a default spnfsd.conf file in
++utils/spnfsd/spnfsd.conf.  Copy this file to /etc/spnfsd.conf.
++
++By default, the DS-Mount-Directory is set to /spnfs (see spnfsd.conf).  Under
++this directory, mount points must be created for each DS to
++be used for pNFS data stripes.  These mount points are named by the ip address
++of the corresponding DS.  In the sample spnfsd.conf, there are two
++DSs defined (172.16.28.134 and 172.16.28.141).
++
++Following the sample spnfsd.conf,
++
++	mkdir /spnfs
++
++on the MDS (corresponding to DS-Mount-Directory).  Then
++
++	mkdir /spnfs/172.16.28.134
++	mkdir /spnfs/172.16.28.141
++
++to create the mount points for the DSs.
++
++On the DSs, chose a directory where data stripes will be created by the MDS.
++For the sample file, this directory is /pnfs, so on each DS execute:
++
++	mkdir /pnfs
++
++This directory is specified in the spnfsd.conf file by the DS*_ROOT option
++(where * is replaced by the DS number).  DS_ROOT is specified relative to
++the directory being exported by the DSs.  In our example, our DSs are exporting
++the root directory (/) and therefore our DS_ROOT is /pnfs.  On the DSs, we have
++the following entry in /etc/exports:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check)
++
++N.B. If we had created a /exports directory and a /pnfs directory under
++/exports, and if we were exporting /exports, then DS_ROOT would still be /pnfs
++(not /exports/pnfs).
++
++It may be useful to add entries to /etc/fstab on the MDS to automatically
++mount the DS_ROOT file systems.  For this example, our MDS fstab would
++contain:
++
++	172.17.84.128:/pnfs /spnfs/172.17.84.128 nfs    defaults        1 2
++	172.17.84.122:/pnfs /spnfs/172.17.84.122 nfs    defaults        1 2
++
++The DS mounts must be performed manually or via fstab at this time (automatic
++mounting, directory creation, etc. are on the todo list).  To perform I/O
++through the MDS, the DS mounts MUST use NFSv3 at this time (this restriction
++will eventually be removed).
++
++
++On the MDS, choose a file system to use with spNFS and export it, e.g.:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check,pnfs)
++
++Make sure nfsd and all supporting processes are running on the MDS and DSs.
++
++
++Running
++-------
++
++If rpc_pipefs is not already mounted (if you're running idmapd it probably is),
++you may want to add the following line to /etc/fstab:
++
++	rpc_pipefs    /var/lib/nfs/rpc_pipefs rpc_pipefs defaults     0 0
++
++to automatically mount rpc_pipefs.
++
++With spnfsd.conf configured for your environment and the mounts mounted as
++described above, spnfsd can now be started.
++
++On the MDS, execute spnfsd:
++
++	spnfsd
++
++The executable is located in the directory where it was built, and
++may also have been installed elsewhere depending on how you built nfs-utils.
++It will run in the foreground by default, and in fact will do so despite
++any options suggesting the contrary (it's still a debugging build).
++
++On the client, make sure the nfslayoutdriver module is loaded:
++
++	modprobe nfslayoutdriver
++
++Then mount the file system from the MDS:
++
++	mount -t nfs4 -o minorversion=1 mds:/ /mnt
++
++I/O through the MDS is now supported.  To use it, do not load the
++nfslayoutdriver on the client, and mount the MDS using NFSv4 or 4.1
++(NFSv2 and v3 are not yet supported).
++
++You may now use spNFS by performing file system activities in /mnt.
++If you create files in /mnt, you should see stripe files corresponding to
++new files being created on the DSs.  The current implementation names the
++stripe files based on the inode number of the file on the MDS.  For example,
++if you create a file foo in /mnt and do an 'ls -li /mnt/foo':
++
++	# ls -li foo
++	1233 -rw-r--r-- 1 root root 0 Nov 29 15:54 foo
++
++You should see stripe files on each under /pnfs (per the sample) named
++1233.  The file /pnfs/1233 on DS1 will contain the first <stripe size> bytes
++of data written to foo, DS2 will contain the next <stripe size> bytes, etc.
++Removing /mnt/foo will remove the corresponding stripe files on the DSs.
++Other file system operations should behave (mostly :-) as expected.
++
++
++Layout Segments
++---------------
++
++If the kernel is compiled to support layout segments, there will
++be two files created under /proc/fs/spnfs for controlling layout
++segment functionality.
++
++To enable layout segments, write a '1' to /proc/fs/spnfs/layoutseg, e.g.:
++
++	echo 1 > /proc/fs/spnfs/layoutseg
++
++Layout segments can be disabled (returning to whole-file layouts) by
++writing a '0' to /proc/fs/spnfs/layoutseg:
++
++	echo 0 > /proc/fs/spnfs/layoutseg
++
++When layout segments are enabled, the size of the layouts returned can
++be specified by writing a decimal number (ascii representation) to
++/proc/fs/spnfs/layoutsegsize:
++
++	echo 1024 > /proc/fs/spnfs/layoutsegsize
++
++The value'0' has a special meaning--it causes the server to return a
++layout that is exactly the size requested by the client:
++
++	echo 0 > /proc/fs/spnfs/layoutsegsize
++
++
++Troubleshooting
++---------------
++
++If you see data being written to the files on the MDS rather than
++the stripe files, make sure the nfslayoutdriver is loaded on the client
++(see above).
++
++If you get a "permission denied" error, make sure mountd is running on the mds
++(it occasionally fails to start).
++
++Bugs, enhancements, compliments, complaints to: dmuntz@netapp.com
++
++
+diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
+@@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
+ 	return r;
+ }
+ 
++int dm_dev_create(struct dm_ioctl *param)
++{
++	return dev_create(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_create);
++
+ /*
+  * Always use UUID for lookups if it's present, otherwise use name or dev.
+  */
+@@ -745,6 +751,12 @@ static int dev_remove(struct dm_ioctl *p
+ 	return 0;
+ }
+ 
++int dm_dev_remove(struct dm_ioctl *param)
++{
++	return dev_remove(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_remove);
++
+ /*
+  * Check a string doesn't overrun the chunk of
+  * memory we copied from userland.
+@@ -917,6 +929,12 @@ static int do_resume(struct dm_ioctl *pa
+ 	return r;
+ }
+ 
++int dm_do_resume(struct dm_ioctl *param)
++{
++	return do_resume(param);
++}
++EXPORT_SYMBOL(dm_do_resume);
++
+ /*
+  * Set or unset the suspension state of a device.
+  * If the device already is in the requested state we just return its status.
+@@ -1194,6 +1212,12 @@ out:
+ 	return r;
+ }
+ 
++int dm_table_load(struct dm_ioctl *param, size_t param_size)
++{
++	return table_load(param, param_size);
++}
++EXPORT_SYMBOL(dm_table_load);
++
+ static int table_clear(struct dm_ioctl *param, size_t param_size)
+ {
+ 	int r;
+diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
+--- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
+@@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
+ 	put_device(&class_to_shost(dev)->shost_gendev);
+ }
+ 
+-static struct class shost_class = {
++struct class shost_class = {
+ 	.name		= "scsi_host",
+ 	.dev_release	= scsi_host_cls_release,
+ };
+diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
+--- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
+@@ -36,13 +36,9 @@
+ #include <linux/fs.h>
+ #include <linux/time.h>
+ #include <linux/backing-dev.h>
++#include <linux/pnfs_osd_xdr.h>
+ #include "common.h"
+ 
+-/* FIXME: Remove once pnfs hits mainline
+- * #include <linux/exportfs/pnfs_osd_xdr.h>
+- */
+-#include "pnfs.h"
+-
+ #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
+ 
+ #ifdef CONFIG_EXOFS_DEBUG
+@@ -103,6 +99,7 @@ struct exofs_sb_info {
+ struct exofs_i_info {
+ 	struct inode   vfs_inode;          /* normal in-memory inode          */
+ 	wait_queue_head_t i_wq;            /* wait queue for inode            */
++	spinlock_t     i_layout_lock;      /* lock for layout/return/recall   */
+ 	unsigned long  i_flags;            /* various atomic flags            */
+ 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
+ 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
+@@ -166,6 +163,9 @@ static inline unsigned exofs_io_state_si
+  */
+ #define OBJ_2BCREATED	0	/* object will be created soon*/
+ #define OBJ_CREATED	1	/* object has been created on the osd*/
++/* Below are not used atomic but reuse the same i_flags */
++#define OBJ_LAYOUT_IS_GIVEN  2  /* inode has given layouts to clients*/
++#define OBJ_IN_LAYOUT_RECALL 3  /* inode is in the middle of a layout recall*/
+ 
+ static inline int obj_2bcreated(struct exofs_i_info *oi)
+ {
+@@ -304,4 +304,20 @@ extern const struct inode_operations exo
+ extern const struct inode_operations exofs_symlink_inode_operations;
+ extern const struct inode_operations exofs_fast_symlink_inode_operations;
+ 
++/* export.c */
++typedef int (exofs_recall_fn)(struct inode *inode);
++#ifdef CONFIG_PNFSD
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo);
++void exofs_init_export(struct super_block *sb);
++#else
++static inline int exofs_inode_recall_layout(struct inode *inode,
++				enum pnfs_iomode iomode, exofs_recall_fn todo)
++{
++	return todo(inode);
++}
++
++static inline void exofs_init_export(struct super_block *sb) {}
++#endif
++
+ #endif
+diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
+@@ -0,0 +1,396 @@
++/*
++ * export.c - Implementation of the pnfs_export_operations
++ *
++ * Copyright (C) 2009 Panasas Inc.
++ * All rights reserved.
++ *
++ * Boaz Harrosh <bharrosh@panasas.com>
++ *
++ * This file is part of exofs.
++ *
++ * exofs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation.  Since it is based on ext2, and the only
++ * valid version of GPL for the Linux kernel is version 2, the only valid
++ * version of GPL for exofs is version 2.
++ *
++ * exofs is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with exofs; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include "exofs.h"
++
++static int exofs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_OSD2_OBJECTS;
++}
++
++static void set_dev_id(struct pnfs_deviceid *pnfs_devid, u64 sbid, u64 devid)
++{
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)pnfs_devid;
++
++	dev_id->sbid  = sbid;
++	dev_id->devid = devid;
++}
++
++static int cb_layout_recall(struct inode *inode, enum pnfs_iomode iomode,
++			    u64 offset, u64 length, void *cookie)
++{
++	struct nfsd4_pnfs_cb_layout cbl;
++	struct pnfsd_cb_ctl cb_ctl;
++	int status;
++
++	memset(&cb_ctl, 0, sizeof(cb_ctl));
++	status = pnfsd_get_cb_op(&cb_ctl);
++	if (unlikely(status)) {
++		EXOFS_ERR("%s: nfsd unloaded!! inode (0x%lx) status=%d\n",
++			  __func__, inode->i_ino, status);
++		goto err;
++	}
++
++	memset(&cbl, 0, sizeof(cbl));
++	cbl.cbl_recall_type = RETURN_FILE;
++	cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
++	cbl.cbl_seg.iomode = iomode;
++	cbl.cbl_seg.offset = offset;
++	cbl.cbl_seg.length = length;
++	cbl.cbl_cookie = cookie;
++
++	status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
++	pnfsd_put_cb_op(&cb_ctl);
++
++err:
++	return status;
++}
++
++static enum nfsstat4 exofs_layout_get(
++	struct inode *inode,
++	struct exp_xdr_stream *xdr,
++	const struct nfsd4_pnfs_layoutget_arg *args,
++	struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
++	struct exofs_layout *el = &sbi->layout;
++	struct pnfs_osd_object_cred *creds = NULL;
++	struct pnfs_osd_layout layout;
++	__be32 *start;
++	bool in_recall;
++	int i, err;
++	enum nfsstat4 nfserr;
++
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	res->lg_seg.iomode = IOMODE_RW;
++	res->lg_return_on_close = true; /* TODO: unused but will be soon */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	creds = kcalloc(el->s_numdevs, sizeof(*creds), GFP_KERNEL);
++	if (!creds) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto out;
++	}
++
++	/* Fill in a pnfs_osd_layout struct */
++	layout.olo_map = sbi->data_map;
++
++	for (i = 0; i < el->s_numdevs; i++) {
++		struct pnfs_osd_object_cred *cred = &creds[i];
++		osd_id id = exofs_oi_objno(oi);
++		unsigned dev = exofs_layout_od_id(el, id, i);
++
++		set_dev_id(&cred->oc_object_id.oid_device_id, args->lg_sbid,
++			   dev);
++		cred->oc_object_id.oid_partition_id = el->s_pid;
++		cred->oc_object_id.oid_object_id = id;
++		cred->oc_osd_version = osd_dev_is_ver1(el->s_ods[dev]) ?
++						PNFS_OSD_VERSION_1 :
++						PNFS_OSD_VERSION_2;
++		cred->oc_cap_key_sec = PNFS_OSD_CAP_KEY_SEC_NONE;
++
++		cred->oc_cap_key.cred_len	= 0;
++		cred->oc_cap_key.cred		= NULL;
++
++		cred->oc_cap.cred_len	= OSD_CAP_LEN;
++		cred->oc_cap.cred	= oi->i_cred;
++	}
++
++	layout.olo_comps_index = 0;
++	layout.olo_num_comps = el->s_numdevs;
++	layout.olo_comps = creds;
++
++	err = pnfs_osd_xdr_encode_layout(xdr, &layout);
++	if (err) {
++		nfserr = NFS4ERR_TOOSMALL; /* FIXME: Change osd_xdr error codes */
++		goto out;
++	}
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	if (!in_recall) {
++		__set_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		nfserr = NFS4_OK;
++	} else {
++		nfserr = NFS4ERR_RECALLCONFLICT;
++	}
++	spin_unlock(&oi->i_layout_lock);
++
++out:
++	kfree(creds);
++	EXOFS_DBGMSG("(0x%lx) nfserr=%u xdr_bytes=%zu\n",
++		     inode->i_ino, nfserr, exp_xdr_qbytes(xdr->p - start));
++	return nfserr;
++}
++
++/* NOTE: inode mutex must NOT be held */
++static int exofs_layout_commit(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutcommit_arg *args,
++	struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct timespec mtime;
++	loff_t i_size;
++	int in_recall;
++
++	/* In case of a recall we ignore the new size and mtime since they
++	 * are going to be changed again by truncate, and since we cannot take
++	 * the inode lock in that case.
++	 */
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	if (in_recall) {
++		EXOFS_DBGMSG("(0x%lx) commit was called during recall\n",
++			     inode->i_ino);
++		return 0;
++	}
++
++	/* NOTE: I would love to call inode_setattr here
++	 *	 but i cannot since this will cause an eventual vmtruncate,
++	 *	 which will cause a layout_recall. So open code the i_size
++	 *	 and mtime/atime changes under i_mutex.
++	 */
++	mutex_lock_nested(&inode->i_mutex, I_MUTEX_NORMAL);
++
++	if (args->lc_mtime.seconds) {
++		mtime.tv_sec = args->lc_mtime.seconds;
++		mtime.tv_nsec = args->lc_mtime.nseconds;
++
++		/* layout commit may only make time bigger, since there might
++		 * be reordering of the notifications and it might arrive after
++		 * A local change.
++		 * TODO: if mtime > ctime then we know set_attr did an mtime
++		 * in the future. and we can let this update through
++		 */
++		if (0 <= timespec_compare(&mtime, &inode->i_mtime))
++			mtime = inode->i_mtime;
++	} else {
++		mtime = current_fs_time(inode->i_sb);
++	}
++
++	/* TODO: Will below work? since mark_inode_dirty has it's own
++	 *       Time handling
++	 */
++	inode->i_atime = inode->i_mtime = mtime;
++
++	i_size = i_size_read(inode);
++	if (args->lc_newoffset) {
++		loff_t new_size = args->lc_last_wr + 1;
++
++		if (i_size < new_size) {
++			i_size_write(inode, i_size = new_size);
++			res->lc_size_chg = 1;
++			res->lc_newsize = new_size;
++		}
++	}
++	/* TODO: else { i_size = osd_get_object_length() } */
++
++/* TODO: exofs does not currently use the osd_xdr part of the layout_commit */
++
++	mark_inode_dirty_sync(inode);
++
++	mutex_unlock(&inode->i_mutex);
++	EXOFS_DBGMSG("(0x%lx) i_size=0x%llx lcp->off=0x%llx\n",
++		     inode->i_ino, i_size, args->lc_last_wr);
++	return 0;
++}
++
++static void exofs_handle_error(struct pnfs_osd_ioerr *ioerr)
++{
++	EXOFS_ERR("exofs_handle_error: errno=%d is_write=%d obj=0x%llx "
++		  "offset=0x%llx length=0x%llx\n",
++		  ioerr->oer_errno, ioerr->oer_iswrite,
++		  _LLU(ioerr->oer_component.oid_object_id),
++		  _LLU(ioerr->oer_comp_offset),
++		  _LLU(ioerr->oer_comp_length));
++}
++
++static int exofs_layout_return(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	__be32 *p = args->lrf_body;
++	unsigned len = exp_xdr_qwords(args->lrf_body_len);
++
++	EXOFS_DBGMSG("(0x%lx) cookie %p xdr_len %d\n",
++		     inode->i_ino, args->lr_cookie, len);
++
++	while (len >= pnfs_osd_ioerr_xdr_sz()) {
++		struct pnfs_osd_ioerr ioerr;
++
++		p = pnfs_osd_xdr_decode_ioerr(&ioerr, p);
++		len -= pnfs_osd_ioerr_xdr_sz();
++		exofs_handle_error(&ioerr);
++	}
++
++	if (args->lr_cookie) {
++		struct exofs_i_info *oi = exofs_i(inode);
++		bool in_recall;
++
++		spin_lock(&oi->i_layout_lock);
++		in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++		__clear_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		spin_unlock(&oi->i_layout_lock);
++
++		/* TODO: how to communicate cookie with the waiter */
++		if (in_recall)
++			wake_up(&oi->i_wq); /* wakeup any recalls */
++	}
++
++	return 0;
++}
++
++int exofs_get_device_info(struct super_block *sb, struct exp_xdr_stream *xdr,
++			  u32 layout_type,
++			  const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct exofs_sb_info *sbi = sb->s_fs_info;
++	struct pnfs_osd_deviceaddr devaddr;
++	const struct osd_dev_info *odi;
++	u64 devno = devid->devid;
++	__be32 *start;
++	int err;
++
++	memset(&devaddr, 0, sizeof(devaddr));
++
++	if (unlikely(devno >= sbi->layout.s_numdevs))
++		return -ENODEV;
++
++	odi = osduld_device_info(sbi->layout.s_ods[devno]);
++
++	devaddr.oda_systemid.len = odi->systemid_len;
++	devaddr.oda_systemid.data = (void *)odi->systemid; /* !const cast */
++
++	devaddr.oda_osdname.len = odi->osdname_len ;
++	devaddr.oda_osdname.data = (void *)odi->osdname;/* !const cast */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		err = -E2BIG;
++		goto err;
++	}
++
++	err = pnfs_osd_xdr_encode_deviceaddr(xdr, &devaddr);
++	if (err)
++		goto err;
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	EXOFS_DBGMSG("xdr_bytes=%Zu devno=%lld osdname-%s\n",
++		     exp_xdr_qbytes(xdr->p - start), devno, odi->osdname);
++	return 0;
++
++err:
++	EXOFS_DBGMSG("Error: err=%d at_byte=%zu\n",
++		     err, exp_xdr_qbytes(xdr->p - start));
++	return err;
++}
++
++struct pnfs_export_operations exofs_pnfs_ops = {
++	.layout_type	= exofs_layout_type,
++	.layout_get	= exofs_layout_get,
++	.layout_commit	= exofs_layout_commit,
++	.layout_return	= exofs_layout_return,
++	.get_device_info = exofs_get_device_info,
++};
++
++static bool is_layout_returned(struct exofs_i_info *oi)
++{
++	bool layout_given;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	return !layout_given;
++}
++
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	int layout_given;
++	int error = 0;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	__set_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	if (!layout_given)
++		goto exec;
++
++	for (;;) {
++		EXOFS_DBGMSG("(0x%lx) has_layout issue a recall\n",
++			     inode->i_ino);
++		error = cb_layout_recall(inode, iomode, 0, NFS4_MAX_UINT64,
++					 &oi->i_wq);
++		switch (error) {
++		case 0:
++		case -EAGAIN:
++			break;
++		case -ENOENT:
++			goto exec;
++		default:
++			goto err;
++		}
++
++		error = wait_event_interruptible(oi->i_wq,
++						 is_layout_returned(oi));
++		if (error)
++			goto err;
++	}
++
++exec:
++	error = todo(inode);
++
++err:
++	spin_lock(&oi->i_layout_lock);
++	__clear_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	EXOFS_DBGMSG("(0x%lx) return=>%d\n", inode->i_ino, error);
++	return error;
++}
++
++void exofs_init_export(struct super_block *sb)
++{
++	sb->s_pnfs_op = &exofs_pnfs_ops;
++}
+diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
+--- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
+@@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
+ 	if (unlikely(wait_obj_created(oi)))
+ 		goto fail;
+ 
+-	ret = _do_truncate(inode);
++	ret = exofs_inode_recall_layout(inode, IOMODE_ANY, _do_truncate);
+ 	if (ret)
+ 		goto fail;
+ 
+@@ -964,6 +964,7 @@ static void __oi_init(struct exofs_i_inf
+ {
+ 	init_waitqueue_head(&oi->i_wq);
+ 	oi->i_flags = 0;
++	spin_lock_init(&oi->i_layout_lock);
+ }
+ /*
+  * Fill in an inode read from the OSD and set it up for use
+diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
+--- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
+@@ -13,4 +13,5 @@
+ #
+ 
+ exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
++exofs-$(CONFIG_PNFSD) +=  export.o
+ obj-$(CONFIG_EXOFS_FS) += exofs.o
+diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
+--- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
+@@ -1,6 +1,7 @@
+ config EXOFS_FS
+ 	tristate "exofs: OSD based file system support"
+ 	depends on SCSI_OSD_ULD
++	select EXPORTFS_OSD_LAYOUT if PNFSD
+ 	help
+ 	  EXOFS is a file system that uses an OSD storage device,
+ 	  as its backing storage.
+diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
+--- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
+@@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
+ 	sb->s_fs_info = sbi;
+ 	sb->s_op = &exofs_sops;
+ 	sb->s_export_op = &exofs_export_ops;
++	exofs_init_export(sb);
+ 	root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
+ 	if (IS_ERR(root)) {
+ 		EXOFS_ERR("ERROR: exofs_iget failed\n");
+diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
+--- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
+@@ -16,6 +16,13 @@
+ #include <linux/namei.h>
+ #include <linux/sched.h>
+ 
++#if defined(CONFIG_PNFSD)
++struct pnfsd_cb_ctl pnfsd_cb_ctl = {
++	.lock = __SPIN_LOCK_UNLOCKED(pnfsd_cb_ctl.lock)
++};
++EXPORT_SYMBOL(pnfsd_cb_ctl);
++#endif /* CONFIG_PNFSD */
++
+ #define dprintk(fmt, args...) do{}while(0)
+ 
+ 
+diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
+--- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
+@@ -3,4 +3,7 @@
+ 
+ obj-$(CONFIG_EXPORTFS) += exportfs.o
+ 
+-exportfs-objs := expfs.o
++exportfs-y				:= expfs.o
++exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT)	+= nfs4filelayoutxdr.o
++exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
++exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+@@ -0,0 +1,158 @@
++/*
++ *  linux/fs/nfsd/nfs4blocklayoutxdr.c
++ *
++ *
++ *  Created by Rick McNeal on 3/31/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++#include <linux/module.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++static int
++bl_encode_simple(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  12 + 4 + bld->u.simple.bld_sig_len);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u32(p, 1);
++	p = exp_xdr_encode_u64(p, bld->u.simple.bld_offset);
++	exp_xdr_encode_opaque(p, bld->u.simple.bld_sig,
++			      bld->u.simple.bld_sig_len);
++
++	return 0;
++}
++
++static int
++bl_encode_slice(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2 + 2 + 1);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_start);
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_len);
++	exp_xdr_encode_u32(p, bld->u.slice.bld_index);
++
++	return 0;
++}
++
++static int
++bl_encode_concat(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	return -ENOTSUPP;
++}
++
++static int
++bl_encode_stripe(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	int i;
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  2 + 1 + bld->u.stripe.bld_stripes);
++
++	p = exp_xdr_encode_u64(p, bld->u.stripe.bld_chunk_size);
++	p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripes);
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++)
++		p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripe_indexs[i]);
++
++	return 0;
++}
++
++int
++blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			   const struct list_head *volumes)
++{
++	u32				num_vols	= 0,
++					*layoutlen_p	= xdr->p;
++	pnfs_blocklayout_devinfo_t	*bld;
++	int				status		= 0;
++	__be32 *p;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -ETOOSMALL;
++	p += 2;
++
++	/*
++	 * All simple volumes with their signature are required to be listed
++	 * first.
++	 */
++	list_for_each_entry(bld, volumes, bld_list) {
++		num_vols++;
++		p = exp_xdr_reserve_qwords(xdr, 1);
++		if (!p)
++			return -ETOOSMALL;
++		p = exp_xdr_encode_u32(p, bld->bld_type);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				status = bl_encode_simple(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_SLICE:
++				status = bl_encode_slice(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				status = bl_encode_concat(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				status = bl_encode_stripe(xdr, bld);
++				break;
++			default:
++				BUG();
++		}
++		if (status)
++			goto error;
++	}
++
++	/* ---- Fill in the overall length and number of volumes ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (xdr->p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, num_vols);
++
++error:
++	return status;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_devinfo);
++
++enum nfsstat4
++blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++			  const struct list_head *bl_head)
++{
++	struct pnfs_blocklayout_layout	*b;
++	u32				*layoutlen_p	= xdr->p,
++					extents		= 0;
++	__be32 *p;
++
++	/*
++	 * Save spot for opaque block layout length and number of extents,
++	 * fill-in later.
++	 */
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return NFS4ERR_TOOSMALL;
++	p += 2;
++
++	list_for_each_entry(b, bl_head, bll_list) {
++		extents++;
++		p = exp_xdr_reserve_qwords(xdr, 5 * 2 + 1);
++		if (!p)
++			return NFS4ERR_TOOSMALL;
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.sbid);
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.devid);
++		p = exp_xdr_encode_u64(p, b->bll_foff);
++		p = exp_xdr_encode_u64(p, b->bll_len);
++		p = exp_xdr_encode_u64(p, b->bll_soff);
++		p = exp_xdr_encode_u32(p, b->bll_es);
++	}
++
++	/* ---- Fill in the overall length and number of extents ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, extents);
++
++	return NFS4_OK;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+@@ -0,0 +1,218 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/exp_xdr.h>
++#include <linux/module.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++/* We do our-own dprintk so filesystems are not dependent on sunrpc */
++#ifdef dprintk
++#undef dprintk
++#endif
++#define dprintk(fmt, args, ...)	do { } while (0)
++
++/* Calculate the XDR length of the GETDEVICEINFO4resok structure
++ * excluding the gdir_notification and the gdir_device_addr da_layout_type.
++ */
++static int fl_devinfo_xdr_words(const struct pnfs_filelayout_device *fdev)
++{
++	struct pnfs_filelayout_devaddr *fl_addr;
++	struct pnfs_filelayout_multipath *mp;
++	int i, j, nwords;
++
++	/* da_addr_body length, indice length, indices,
++	 * multipath_list4 length */
++	nwords = 1 + 1 + fdev->fl_stripeindices_length + 1;
++	for (i = 0; i < fdev->fl_device_length; i++) {
++		mp = &fdev->fl_device_list[i];
++		nwords++; /* multipath list length */
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			fl_addr = mp->fl_multipath_list;
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_netid.len);
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_addr.len);
++		}
++	}
++	dprintk("<-- %s nwords %d\n", __func__, nwords);
++	return nwords;
++}
++
++/* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++int
++filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			  const struct pnfs_filelayout_device *fdev)
++{
++	unsigned int i, j, len = 0, opaque_words;
++	u32 *p_in;
++	u32 index_count = fdev->fl_stripeindices_length;
++	u32 dev_count = fdev->fl_device_length;
++	int error = 0;
++	__be32 *p;
++
++	opaque_words = fl_devinfo_xdr_words(fdev);
++	dprintk("%s: Begin indx_cnt: %u dev_cnt: %u total size %u\n",
++		__func__,
++		index_count,
++		dev_count,
++		opaque_words*4);
++
++	/* check space for opaque length */
++	p = p_in = exp_xdr_reserve_qwords(xdr, opaque_words);
++	if (!p) {
++		error =  -ETOOSMALL;
++		goto out;
++	}
++
++	/* Fill in length later */
++	p++;
++
++	/* encode device list indices */
++	p = exp_xdr_encode_u32(p, index_count);
++	for (i = 0; i < index_count; i++)
++		p = exp_xdr_encode_u32(p, fdev->fl_stripeindices_list[i]);
++
++	/* encode device list */
++	p = exp_xdr_encode_u32(p, dev_count);
++	for (i = 0; i < dev_count; i++) {
++		struct pnfs_filelayout_multipath *mp = &fdev->fl_device_list[i];
++
++		p = exp_xdr_encode_u32(p, mp->fl_multipath_length);
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			struct pnfs_filelayout_devaddr *da =
++						&mp->fl_multipath_list[j];
++
++			/* Encode device info */
++			p = exp_xdr_encode_opaque(p, da->r_netid.data,
++						     da->r_netid.len);
++			p = exp_xdr_encode_opaque(p, da->r_addr.data,
++						     da->r_addr.len);
++		}
++	}
++
++	/* backfill in length. Subtract 4 for da_addr_body size */
++	len = (char *)p - (char *)p_in;
++	exp_xdr_encode_u32(p_in, len - 4);
++
++	error = 0;
++out:
++	dprintk("%s: End err %d xdrlen %d\n",
++		__func__, error, len);
++	return error;
++}
++EXPORT_SYMBOL(filelayout_encode_devinfo);
++
++/* Encodes the loc_body structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++enum nfsstat4
++filelayout_encode_layout(struct exp_xdr_stream *xdr,
++			 const struct pnfs_filelayout_layout *flp)
++{
++	u32 len = 0, nfl_util, fhlen, i;
++	u32 *layoutlen_p;
++	enum nfsstat4 nfserr;
++	__be32 *p;
++
++	dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
++		__func__,
++		flp->device_id.pnfs_fsid,
++		flp->device_id.pnfs_devid,
++		flp->lg_first_stripe_index,
++		flp->lg_fh_length);
++
++	/* Ensure file system added at least one file handle */
++	if (flp->lg_fh_length <= 0) {
++		dprintk("%s: File Layout has no file handles!!\n", __func__);
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto out;
++	}
++
++	/* Ensure room for len, devid, util, first_stripe_index,
++	 * pattern_offset, number of filehandles */
++	p = layoutlen_p = exp_xdr_reserve_qwords(xdr, 1+2+2+1+1+2+1);
++	if (!p) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	/* save spot for opaque file layout length, fill-in later*/
++	p++;
++
++	/* encode device id */
++	p = exp_xdr_encode_u64(p, flp->device_id.sbid);
++	p = exp_xdr_encode_u64(p, flp->device_id.devid);
++
++	/* set and encode flags */
++	nfl_util = flp->lg_stripe_unit;
++	if (flp->lg_commit_through_mds)
++		nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
++	if (flp->lg_stripe_type == STRIPE_DENSE)
++		nfl_util |= NFL4_UFLG_DENSE;
++	p = exp_xdr_encode_u32(p, nfl_util);
++
++	/* encode first stripe index */
++	p = exp_xdr_encode_u32(p, flp->lg_first_stripe_index);
++
++	/* encode striping pattern start */
++	p = exp_xdr_encode_u64(p, flp->lg_pattern_offset);
++
++	/* encode number of file handles */
++	p = exp_xdr_encode_u32(p, flp->lg_fh_length);
++
++	/* encode file handles */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		fhlen = flp->lg_fh_list[i].fh_size;
++		p = exp_xdr_reserve_space(xdr, 4 + fhlen);
++		if (!p) {
++			nfserr = NFS4ERR_TOOSMALL;
++			goto out;
++		}
++		p = exp_xdr_encode_opaque(p, &flp->lg_fh_list[i].fh_base, fhlen);
++	}
++
++	/* Set number of bytes encoded =  total_bytes_encoded - length var */
++	len = (char *)p - (char *)layoutlen_p;
++	exp_xdr_encode_u32(layoutlen_p, len - 4);
++
++	nfserr = NFS4_OK;
++out:
++	dprintk("%s: End err %u xdrlen %d\n",
++		__func__, nfserr, len);
++	return nfserr;
++}
++EXPORT_SYMBOL(filelayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
+@@ -0,0 +1,289 @@
++/*
++ *  pnfs_osd_xdr_enc.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static int pnfs_osd_xdr_encode_data_map(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_data_map *data_map)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 1+2+1+1+1+1);
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, data_map->odm_num_comps);
++	p = exp_xdr_encode_u64(p, data_map->odm_stripe_unit);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_width);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_depth);
++	p = exp_xdr_encode_u32(p, data_map->odm_mirror_cnt);
++	p = exp_xdr_encode_u32(p, data_map->odm_raid_algorithm);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline int pnfs_osd_xdr_encode_objid(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_objid *object_id)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2+2+2+2);
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)&object_id->oid_device_id;
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u64(p, dev_id->sbid);
++	p = exp_xdr_encode_u64(p, dev_id->devid);
++	p = exp_xdr_encode_u64(p, object_id->oid_partition_id);
++	p = exp_xdr_encode_u64(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * enum pnfs_osd_cap_key_sec4 {
++ * 	PNFS_OSD_CAP_KEY_SEC_NONE = 0,
++ * 	PNFS_OSD_CAP_KEY_SEC_SSV  = 1
++ * };
++ *
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static int pnfs_osd_xdr_encode_object_cred(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_object_cred *olo_comp)
++{
++	__be32 *p;
++	int err;
++
++	err = pnfs_osd_xdr_encode_objid(xdr, &olo_comp->oc_object_id);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_space(xdr, 3*4 + 4+olo_comp->oc_cap.cred_len);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, olo_comp->oc_osd_version);
++
++	/* No sec for now */
++	p = exp_xdr_encode_u32(p, PNFS_OSD_CAP_KEY_SEC_NONE);
++	p = exp_xdr_encode_u32(p, 0); /* opaque oc_capability_key<> */
++
++	exp_xdr_encode_opaque(p, olo_comp->oc_cap.cred,
++			      olo_comp->oc_cap.cred_len);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_layout {
++ * 	struct pnfs_osd_data_map	olo_map;
++ * 	u32				olo_comps_index;
++ * 	u32				olo_num_comps;
++ * 	struct pnfs_osd_object_cred	*olo_comps;
++ * };
++ */
++int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *pol)
++{
++	__be32 *p;
++	u32 i;
++	int err;
++
++	err = pnfs_osd_xdr_encode_data_map(xdr, &pol->olo_map);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, pol->olo_comps_index);
++	p = exp_xdr_encode_u32(p, pol->olo_num_comps);
++
++	for (i = 0; i < pol->olo_num_comps; i++) {
++		err = pnfs_osd_xdr_encode_object_cred(xdr, &pol->olo_comps[i]);
++		if (err)
++			return err;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_layout);
++
++static int _encode_string(struct exp_xdr_stream *xdr,
++			  const struct nfs4_string *str)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr, 4 + str->len);
++
++	if (!p)
++		return -E2BIG;
++	exp_xdr_encode_opaque(p, str->data, str->len);
++	return 0;
++}
++
++/* struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr)
++{
++	__be32 *p;
++	int err;
++
++	p = exp_xdr_reserve_space(xdr, 4 + 4 + sizeof(devaddr->oda_lun));
++	if (!p)
++		return -E2BIG;
++
++	/* Empty oda_targetid */
++	p = exp_xdr_encode_u32(p, OBJ_TARGET_ANON);
++
++	/* Empty oda_targetaddr for now */
++	p = exp_xdr_encode_u32(p, 0);
++
++	/* oda_lun */
++	exp_xdr_encode_bytes(p, devaddr->oda_lun, sizeof(devaddr->oda_lun));
++
++	err = _encode_string(xdr, &devaddr->oda_systemid);
++	if (err)
++		return err;
++
++	err = pnfs_osd_xdr_encode_object_cred(xdr,
++					      &devaddr->oda_root_obj_cred);
++	if (err)
++		return err;
++
++	err = _encode_string(xdr, &devaddr->oda_osdname);
++	if (err)
++		return err;
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_deviceaddr);
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p)
++{
++	lou->dsu_valid = be32_to_cpu(*p++);
++	if (lou->dsu_valid)
++		p = xdr_decode_hyper(p, &lou->dsu_delta);
++	lou->olu_ioerr_flag = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_layoutupdate);
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline __be32 *
++pnfs_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
++{
++	/* FIXME: p = xdr_decode_fixed(...) */
++	memcpy(objid->oid_device_id.data, p, sizeof(objid->oid_device_id.data));
++	p += XDR_QUADLEN(sizeof(objid->oid_device_id.data));
++
++	p = xdr_decode_hyper(p, &objid->oid_partition_id);
++	p = xdr_decode_hyper(p, &objid->oid_object_id);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p)
++{
++	p = pnfs_osd_xdr_decode_objid(p, &ioerr->oer_component);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_offset);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_length);
++	ioerr->oer_iswrite = be32_to_cpu(*p++);
++	ioerr->oer_errno = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
+diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
+--- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
+@@ -19,6 +19,7 @@
+ #include <linux/gfs2_ondisk.h>
+ #include <linux/slow-work.h>
+ #include <linux/quotaops.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "gfs2.h"
+ #include "incore.h"
+@@ -1146,6 +1147,9 @@ static int fill_super(struct super_block
+ 	sb->s_magic = GFS2_MAGIC;
+ 	sb->s_op = &gfs2_super_ops;
+ 	sb->s_export_op = &gfs2_export_ops;
++#if defined(CONFIG_PNFSD)
++	sb->s_pnfs_op = &pnfs_dlm_export_ops;
++#endif /* CONFIG_PNFSD */
+ 	sb->s_xattr = gfs2_xattr_handlers;
+ 	sb->s_qcop = &gfs2_quotactl_ops;
+ 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
+--- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
+@@ -224,6 +224,31 @@ config LOCKD_V4
+ config EXPORTFS
+ 	tristate
+ 
++config EXPORTFS_FILE_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 files layout type.
++	  Must be automatically selected by supporting filesystems.
++
++config EXPORTFS_OSD_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 objects layout type.
++	  Must be automatically selected by supporting osd
++	  filesystems.
++
++	  If unsure, say N.
++
++config EXPORTFS_BLOCK_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 blocks layout type.
++	  Must be automatically selected by supporting filesystems.
++
++
+ config NFS_ACL_SUPPORT
+ 	tristate
+ 	select FS_POSIX_ACL
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
+@@ -0,0 +1,66 @@
++#include <linux/module.h>
++#include <linux/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/sched.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY NFSDBG_PNFS_LD
++
++struct pipefs_list bl_device_list;
++struct dentry *bl_device_pipe;
++
++ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t len)
++{
++	int err;
++	struct pipefs_hdr *msg;
++
++	dprintk("Entering %s...\n", __func__);
++
++	msg = pipefs_readmsg(filp, src, len);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: unable to read pipefs message.\n");
++		return PTR_ERR(msg);
++	}
++
++	/* now assign the result, which wakes the blocked thread */
++	err = pipefs_assign_upcall_reply(msg, &bl_device_list);
++	if (err) {
++		dprintk("ERROR: failed to assign upcall with id %u\n",
++			msg->msgid);
++		kfree(msg);
++	}
++	return len;
++}
++
++static const struct rpc_pipe_ops bl_pipe_ops = {
++	.upcall         = pipefs_generic_upcall,
++	.downcall       = bl_pipe_downcall,
++	.destroy_msg    = pipefs_generic_destroy_msg,
++};
++
++int bl_pipe_init(void)
++{
++	dprintk("%s: block_device pipefs registering...\n", __func__);
++	bl_device_pipe = pipefs_mkpipe("bl_device_pipe", &bl_pipe_ops, 1);
++	if (IS_ERR(bl_device_pipe))
++		dprintk("ERROR, unable to make block_device pipe\n");
++
++	if (!bl_device_pipe)
++		dprintk("bl_device_pipe is NULL!\n");
++	else
++	dprintk("bl_device_pipe created!\n");
++	pipefs_init_list(&bl_device_list);
++	return 0;
++}
++
++void bl_pipe_exit(void)
++{
++	dprintk("%s: block_device pipefs unregistering...\n", __func__);
++	if (IS_ERR(bl_device_pipe))
++		return ;
++	pipefs_closepipe(bl_device_pipe);
++	return;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
+@@ -0,0 +1,1160 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++
++#include <linux/buffer_head.h> /* various write calls */
++#include <linux/bio.h> /* struct bio */
++#include <linux/vmalloc.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
++
++/* Callback operations to the pNFS client */
++static struct pnfs_client_operations *pnfs_block_callback_ops;
++
++static void print_page(struct page *page)
++{
++	dprintk("PRINTPAGE page %p\n", page);
++	dprintk("        PagePrivate %d\n", PagePrivate(page));
++	dprintk("        PageUptodate %d\n", PageUptodate(page));
++	dprintk("        PageError %d\n", PageError(page));
++	dprintk("        PageDirty %d\n", PageDirty(page));
++	dprintk("        PageReferenced %d\n", PageReferenced(page));
++	dprintk("        PageLocked %d\n", PageLocked(page));
++	dprintk("        PageWriteback %d\n", PageWriteback(page));
++	dprintk("        PageMappedToDisk %d\n", PageMappedToDisk(page));
++	dprintk("\n");
++}
++
++/* Given the be associated with isect, determine if page data needs to be
++ * initialized.
++ */
++static int is_hole(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_NONE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return !is_sector_initialized(be->be_inval, isect);
++}
++
++/* Given the be associated with isect, determine if page data can be
++ * written to disk.
++ */
++static int is_writable(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return is_sector_initialized(be->be_inval, isect);
++}
++
++static int
++dont_like_caller(struct nfs_page *req)
++{
++	if (atomic_read(&req->wb_complete)) {
++		/* Called by _multi */
++		return 1;
++	} else {
++		/* Called by _one */
++		return 0;
++	}
++}
++
++static enum pnfs_try_status
++bl_commit(struct nfs_write_data *nfs_data,
++	  int sync)
++{
++	dprintk("%s enter\n", __func__);
++	return PNFS_NOT_ATTEMPTED;
++}
++
++/* The data we are handed might be spread across several bios.  We need
++ * to track when the last one is finished.
++ */
++struct parallel_io {
++	struct kref refcnt;
++	struct rpc_call_ops call_ops;
++	void (*pnfs_callback) (void *data);
++	void *data;
++};
++
++static inline struct parallel_io *alloc_parallel(void *data)
++{
++	struct parallel_io *rv;
++
++	rv  = kmalloc(sizeof(*rv), GFP_KERNEL);
++	if (rv) {
++		rv->data = data;
++		kref_init(&rv->refcnt);
++	}
++	return rv;
++}
++
++static inline void get_parallel(struct parallel_io *p)
++{
++	kref_get(&p->refcnt);
++}
++
++static void destroy_parallel(struct kref *kref)
++{
++	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
++
++	dprintk("%s enter\n", __func__);
++	p->pnfs_callback(p->data);
++	kfree(p);
++}
++
++static inline void put_parallel(struct parallel_io *p)
++{
++	kref_put(&p->refcnt, destroy_parallel);
++}
++
++static struct bio *
++bl_submit_bio(int rw, struct bio *bio)
++{
++	if (bio) {
++		get_parallel(bio->bi_private);
++		dprintk("%s submitting %s bio %u@%llu\n", __func__,
++			rw == READ ? "read" : "write",
++			bio->bi_size, (u64)bio->bi_sector);
++		submit_bio(rw, bio);
++	}
++	return NULL;
++}
++
++static inline void
++bl_done_with_rpage(struct page *page, const int ok)
++{
++	if (ok) {
++		ClearPagePnfsErr(page);
++		SetPageUptodate(page);
++	} else {
++		ClearPageUptodate(page);
++		SetPageError(page);
++		SetPagePnfsErr(page);
++	}
++	/* Page is unlocked via rpc_release.  Should really be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_read(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_rpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++static void bl_read_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	pnfs_block_callback_ops->nfs_readlist_complete(rdata);
++}
++
++static void
++bl_end_par_io_read(void *data)
++{
++	struct nfs_read_data *rdata = data;
++
++	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
++	schedule_work(&rdata->task.u.tk_work);
++}
++
++/* We don't want normal .rpc_call_done callback used, so we replace it
++ * with this stub.
++ */
++static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
++{
++	return;
++}
++
++static enum pnfs_try_status
++bl_read_pagelist(struct nfs_read_data *rdata,
++		 unsigned nr_pages)
++{
++	int i, hole;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t f_offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct page **pages = rdata->args.pages;
++	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
++	       nr_pages, f_offset, count);
++
++	if (dont_like_caller(rdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		goto use_mds;
++	}
++	if ((nr_pages == 1) && PagePnfsErr(rdata->req->wb_page)) {
++		/* We want to fall back to mds in case of read_page
++		 * after error on read_pages.
++		 */
++		dprintk("%s PG_pnfserr set\n", __func__);
++		goto use_mds;
++	}
++	par = alloc_parallel(rdata);
++	if (!par)
++		goto use_mds;
++	par->call_ops = *rdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_read;
++	/* At this point, we can no longer jump to use_mds */
++
++	isect = (sector_t) (f_offset >> 9);
++	/* Code assumes extents are page-aligned */
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			put_extent(cow_read);
++			bio = bl_submit_bio(READ, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(rdata->pdata.lseg),
++					     isect, &cow_read);
++			if (!be) {
++				/* Error out this page */
++				bl_done_with_rpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++			if (cow_read) {
++				sector_t cow_length = cow_read->be_length -
++					(isect - cow_read->be_f_offset);
++				extent_length = min(extent_length, cow_length);
++			}
++		}
++		hole = is_hole(be, isect);
++		if (hole && !cow_read) {
++			bio = bl_submit_bio(READ, bio);
++			/* Fill hole w/ zeroes w/o accessing device */
++			dprintk("%s Zeroing page for hole\n", __func__);
++			zero_user(pages[i], 0,
++				  min_t(int, PAGE_CACHE_SIZE, count));
++			print_page(pages[i]);
++			bl_done_with_rpage(pages[i], 1);
++		} else {
++			struct pnfs_block_extent *be_read;
++
++			be_read = (hole && cow_read) ? cow_read : be;
++			for (;;) {
++				if (!bio) {
++					bio = bio_alloc(GFP_NOIO, nr_pages - i);
++					if (!bio) {
++						/* Error out this page */
++						bl_done_with_rpage(pages[i], 0);
++						break;
++					}
++					bio->bi_sector = isect -
++						be_read->be_f_offset +
++						be_read->be_v_offset;
++					bio->bi_bdev = be_read->be_mdev;
++					bio->bi_end_io = bl_end_io_read;
++					bio->bi_private = par;
++				}
++				if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++					break;
++				bio = bl_submit_bio(READ, bio);
++			}
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	if ((isect << 9) >= rdata->inode->i_size) {
++		rdata->res.eof = 1;
++		rdata->res.count = rdata->inode->i_size - f_offset;
++	} else {
++		rdata->res.count = (isect << 9) - f_offset;
++	}
++	put_extent(be);
++	put_extent(cow_read);
++	bl_submit_bio(READ, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++
++ use_mds:
++	dprintk("Giving up and using normal NFS\n");
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static void mark_extents_written(struct pnfs_block_layout *bl,
++				 __u64 offset, __u32 count)
++{
++	sector_t isect, end;
++	struct pnfs_block_extent *be;
++
++	dprintk("%s(%llu, %u)\n", __func__, offset, count);
++	if (count == 0)
++		return;
++	isect = (offset & (long)(PAGE_CACHE_MASK)) >> 9;
++	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
++	end >>= 9;
++	while (isect < end) {
++		sector_t len;
++		be = find_get_extent(bl, isect, NULL);
++		BUG_ON(!be); /* FIXME */
++		len = min(end, be->be_f_offset + be->be_length) - isect;
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			mark_for_commit(be, isect, len); /* What if fails? */
++		isect += len;
++		put_extent(be);
++	}
++}
++
++/* STUB - this needs thought */
++static inline void
++bl_done_with_wpage(struct page *page, const int ok)
++{
++	if (!ok) {
++		SetPageError(page);
++		SetPagePnfsErr(page);
++		/* This is an inline copy of nfs_zap_mapping */
++		/* This is oh so fishy, and needs deep thought */
++		if (page->mapping->nrpages != 0) {
++			struct inode *inode = page->mapping->host;
++			spin_lock(&inode->i_lock);
++			NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	/* end_page_writeback called in rpc_release.  Should be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_write(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_wpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++/* Function scheduled for call during bl_end_par_io_write,
++ * it marks sectors as written and extends the commitlist.
++ */
++static void bl_write_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	if (!wdata->task.tk_status) {
++		/* Marks for LAYOUTCOMMIT */
++		/* BUG - this should be called after each bio, not after
++		 * all finish, unless have some way of storing success/failure
++		 */
++		mark_extents_written(BLK_LSEG2EXT(wdata->pdata.lseg),
++				     wdata->args.offset, wdata->args.count);
++	}
++	pnfs_block_callback_ops->nfs_writelist_complete(wdata);
++}
++
++/* Called when last of bios associated with a bl_write_pagelist call finishes */
++static void
++bl_end_par_io_write(void *data)
++{
++	struct nfs_write_data *wdata = data;
++
++	/* STUB - ignoring error handling */
++	wdata->task.tk_status = 0;
++	wdata->verf.committed = NFS_FILE_SYNC;
++	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
++	schedule_work(&wdata->task.u.tk_work);
++}
++
++static enum pnfs_try_status
++bl_write_pagelist(struct nfs_write_data *wdata,
++		  unsigned nr_pages,
++		  int sync)
++{
++	int i;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t offset = wdata->args.offset;
++	size_t count = wdata->args.count;
++	struct page **pages = wdata->args.pages;
++	int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
++	if (!wdata->req->wb_lseg) {
++		dprintk("%s no lseg, falling back to MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	if (dont_like_caller(wdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
++	 * We want to write each, and if there is an error remove it from
++	 * list and call
++	 * nfs_retry_request(req) to have it redone using nfs.
++	 * QUEST? Do as block or per req?  Think have to do per block
++	 * as part of end_bio
++	 */
++	par = alloc_parallel(wdata);
++	if (!par)
++		return PNFS_NOT_ATTEMPTED;
++	par->call_ops = *wdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_write;
++	/* At this point, have to be more careful with error handling */
++
++	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> 9);
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			bio = bl_submit_bio(WRITE, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(wdata->pdata.lseg),
++					     isect, NULL);
++			if (!be || !is_writable(be, isect)) {
++				/* FIXME */
++				bl_done_with_wpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++		}
++		for (;;) {
++			if (!bio) {
++				bio = bio_alloc(GFP_NOIO, nr_pages - i);
++				if (!bio) {
++					/* Error out this page */
++					/* FIXME */
++					bl_done_with_wpage(pages[i], 0);
++					break;
++				}
++				bio->bi_sector = isect - be->be_f_offset +
++					be->be_v_offset;
++				bio->bi_bdev = be->be_mdev;
++				bio->bi_end_io = bl_end_io_write;
++				bio->bi_private = par;
++			}
++			if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++				break;
++			bio = bl_submit_bio(WRITE, bio);
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	wdata->res.count = (isect << 9) - (offset & (long)PAGE_CACHE_MASK);
++	put_extent(be);
++	bl_submit_bio(WRITE, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++}
++
++/* FIXME - range ignored */
++static void
++release_extents(struct pnfs_block_layout *bl,
++		struct nfs4_pnfs_layout_segment *range)
++{
++	int i;
++	struct pnfs_block_extent *be;
++
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		while (!list_empty(&bl->bl_extents[i])) {
++			be = list_first_entry(&bl->bl_extents[i],
++					      struct pnfs_block_extent,
++					      be_node);
++			list_del(&be->be_node);
++			put_extent(be);
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++}
++
++static void
++release_inval_marks(struct pnfs_inval_markings *marks)
++{
++	struct pnfs_inval_tracking *pos, *temp;
++
++	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
++		list_del(&pos->it_link);
++		kfree(pos);
++	}
++	return;
++}
++
++/* Note we are relying on caller locking to prevent nasty races. */
++static void
++bl_free_layout(struct pnfs_layout_type *lo)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++
++	dprintk("%s enter\n", __func__);
++	release_extents(bl, NULL);
++	release_inval_marks(&bl->bl_inval);
++	kfree(bl);
++}
++
++static struct pnfs_layout_type *
++bl_alloc_layout(struct inode *inode)
++{
++	struct pnfs_block_layout	*bl;
++
++	dprintk("%s enter\n", __func__);
++	bl = kzalloc(sizeof(*bl), GFP_KERNEL);
++	if (!bl)
++		return NULL;
++	spin_lock_init(&bl->bl_ext_lock);
++	INIT_LIST_HEAD(&bl->bl_extents[0]);
++	INIT_LIST_HEAD(&bl->bl_extents[1]);
++	INIT_LIST_HEAD(&bl->bl_commit);
++	bl->bl_count = 0;
++	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> 9;
++	INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
++	return &bl->bl_layout;
++}
++
++static void
++bl_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter\n", __func__);
++	kfree(lseg);
++}
++
++/* Because the generic infrastructure does not correctly merge layouts,
++ * we pretty much ignore lseg, and store all data layout wide, so we
++ * can correctly merge.  Eventually we should push some correct merge
++ * behavior up to the generic code, as the current behavior tends to
++ * cause lots of unnecessary overlapping LAYOUTGET requests.
++ */
++static struct pnfs_layout_segment *
++bl_alloc_lseg(struct pnfs_layout_type *lo,
++	      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct pnfs_layout_segment *lseg;
++	int status;
++
++	dprintk("%s enter\n", __func__);
++	lseg = kzalloc(sizeof(*lseg) + 0, GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++	status = nfs4_blk_process_layoutget(lo, lgr);
++	if (status) {
++		/* We don't want to call the full-blown bl_free_lseg,
++		 * since on error extents were not touched.
++		 */
++		/* STUB - we really want to distinguish between 2 error
++		 * conditions here.  This lseg failed, but lo data structures
++		 * are OK, or we hosed the lo data structures.  The calling
++		 * code probably needs to distinguish this too.
++		 */
++		kfree(lseg);
++		return ERR_PTR(status);
++	}
++	return lseg;
++}
++
++static int
++bl_setup_layoutcommit(struct pnfs_layout_type *lo,
++		      struct pnfs_layoutcommit_arg *arg)
++{
++	struct nfs_server *nfss = PNFS_NFS_SERVER(lo);
++	struct bl_layoutupdate_data *layoutupdate_data;
++
++	dprintk("%s enter\n", __func__);
++	/* Need to ensure commit is block-size aligned */
++	if (nfss->pnfs_blksize) {
++		u64 mask = nfss->pnfs_blksize - 1;
++		u64 offset = arg->lseg.offset & mask;
++
++		arg->lseg.offset -= offset;
++		arg->lseg.length += offset + mask;
++		arg->lseg.length &= ~mask;
++	}
++
++	layoutupdate_data = kmalloc(sizeof(struct bl_layoutupdate_data),
++					 GFP_KERNEL);
++	if (unlikely(!layoutupdate_data))
++		return -ENOMEM;
++	INIT_LIST_HEAD(&layoutupdate_data->ranges);
++	arg->layoutdriver_data = layoutupdate_data;
++
++	return 0;
++}
++
++static void
++bl_encode_layoutcommit(struct pnfs_layout_type *lo, struct xdr_stream *xdr,
++		       const struct pnfs_layoutcommit_arg *arg)
++{
++	dprintk("%s enter\n", __func__);
++	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
++}
++
++static void
++bl_cleanup_layoutcommit(struct pnfs_layout_type *lo,
++			struct pnfs_layoutcommit_arg *arg, int status)
++{
++	dprintk("%s enter\n", __func__);
++	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), arg, status);
++	kfree(arg->layoutdriver_data);
++}
++
++static void free_blk_mountid(struct block_mount_id *mid)
++{
++	if (mid) {
++		struct pnfs_block_dev *dev;
++		spin_lock(&mid->bm_lock);
++		while (!list_empty(&mid->bm_devlist)) {
++			dev = list_first_entry(&mid->bm_devlist,
++					       struct pnfs_block_dev,
++					       bm_node);
++			list_del(&dev->bm_node);
++			free_block_dev(dev);
++		}
++		spin_unlock(&mid->bm_lock);
++		kfree(mid);
++	}
++}
++
++/* This is mostly copied form the filelayout's get_device_info function.
++ * It seems much of this should be at the generic pnfs level.
++ */
++static struct pnfs_block_dev *
++nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
++			struct pnfs_deviceid *d_id,
++			struct list_head *sdlist)
++{
++	struct pnfs_device *dev;
++	struct pnfs_block_dev *rv = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	int i, rc;
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s max_resp_sz %u max_pages %d\n",
++		__func__, max_resp_sz, max_pages);
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev) {
++		dprintk("%s kmalloc failed\n", __func__);
++		return NULL;
++	}
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(dev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set dev->area */
++	dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!dev->area)
++		goto out_free;
++
++	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
++	dev->layout_type = LAYOUT_BLOCK_VOLUME;
++	dev->dev_notify_types = 0;
++	dev->pages = pages;
++	dev->pgbase = 0;
++	dev->pglen = PAGE_SIZE * max_pages;
++	dev->mincount = 0;
++
++	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
++	rc = pnfs_block_callback_ops->nfs_getdeviceinfo(server, dev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	rv = nfs4_blk_decode_device(server, dev, sdlist);
++ out_free:
++	if (dev->area != NULL)
++		vunmap(dev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(dev);
++	return rv;
++}
++
++
++/*
++ * Retrieve the list of available devices for the mountpoint.
++ */
++static int
++bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
++{
++	struct block_mount_id *b_mt_id = NULL;
++	struct pnfs_mount_type *mtype = NULL;
++	struct pnfs_devicelist *dlist = NULL;
++	struct pnfs_block_dev *bdev;
++	LIST_HEAD(block_disklist);
++	int status = 0, i;
++
++	dprintk("%s enter\n", __func__);
++
++	if (server->pnfs_blksize == 0) {
++		dprintk("%s Server did not return blksize\n", __func__);
++		return -EINVAL;
++	}
++	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
++	if (!b_mt_id) {
++		status = -ENOMEM;
++		goto out_error;
++	}
++	/* Initialize nfs4 block layout mount id */
++	spin_lock_init(&b_mt_id->bm_lock);
++	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
++
++	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
++	if (!dlist)
++		goto out_error;
++	dlist->eof = 0;
++	while (!dlist->eof) {
++		status = pnfs_block_callback_ops->nfs_getdevicelist(
++							server, fh, dlist);
++		if (status)
++			goto out_error;
++		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
++			__func__, dlist->num_devs, dlist->eof);
++		/* For each device returned in dlist, call GETDEVICEINFO, and
++		 * decode the opaque topology encoding to create a flat
++		 * volume topology, matching VOLUME_SIMPLE disk signatures
++		 * to disks in the visible block disk list.
++		 * Construct an LVM meta device from the flat volume topology.
++		 */
++		for (i = 0; i < dlist->num_devs; i++) {
++			bdev = nfs4_blk_get_deviceinfo(server, fh,
++						     &dlist->dev_id[i],
++						     &block_disklist);
++			if (!bdev)
++				goto out_error;
++			spin_lock(&b_mt_id->bm_lock);
++			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
++			spin_unlock(&b_mt_id->bm_lock);
++		}
++	}
++	dprintk("%s SUCCESS\n", __func__);
++	server->pnfs_ld_data = b_mt_id;
++
++ out_return:
++	kfree(dlist);
++	return status;
++
++ out_error:
++	free_blk_mountid(b_mt_id);
++	kfree(mtype);
++	goto out_return;
++}
++
++static int
++bl_uninitialize_mountpoint(struct nfs_server *server)
++{
++	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
++
++	dprintk("%s enter\n", __func__);
++	free_blk_mountid(b_mt_id);
++	dprintk("%s RETURNS\n", __func__);
++	return 0;
++}
++
++/* STUB - mark intersection of layout and page as bad, so is not
++ * used again.
++ */
++static void mark_bad_read(void)
++{
++	return;
++}
++
++/* Copied from buffer.c */
++static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
++{
++	if (uptodate) {
++		set_buffer_uptodate(bh);
++	} else {
++		/* This happens, due to failed READA attempts. */
++		clear_buffer_uptodate(bh);
++	}
++	unlock_buffer(bh);
++}
++
++/* Copied from buffer.c */
++static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
++{
++	__end_buffer_read_notouch(bh, uptodate);
++}
++
++/*
++ * map_block:  map a requested I/0 block (isect) into an offset in the LVM
++ * meta block_device
++ */
++static void
++map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
++{
++	dprintk("%s enter be=%p\n", __func__, be);
++
++	set_buffer_mapped(bh);
++	bh->b_bdev = be->be_mdev;
++	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
++		(be->be_mdev->bd_inode->i_blkbits - 9);
++
++	dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
++				__func__, (long)isect,
++				(long)bh->b_blocknr,
++				bh->b_size);
++	return;
++}
++
++/* Given an unmapped page, zero it (or read in page for COW),
++ * and set appropriate flags/markings, but it is safe to not initialize
++ * the range given in [from, to).
++ */
++/* This is loosely based on nobh_write_begin */
++static int
++init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
++		    unsigned from, unsigned to, sector_t **pages_to_mark)
++{
++	struct buffer_head *bh;
++	int inval, ret = -EIO;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect;
++
++	dprintk("%s enter, %p\n", __func__, page);
++	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
++	if (!bh) {
++		ret = -ENOMEM;
++		goto cleanup;
++	}
++
++	isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
++	be = find_get_extent(bl, isect, &cow_read);
++	if (!be)
++		goto cleanup;
++	inval = is_hole(be, isect);
++	dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
++	if (inval) {
++		if (be->be_state == PNFS_BLOCK_NONE_DATA) {
++			dprintk("%s PANIC - got NONE_DATA extent %p\n",
++				__func__, be);
++			goto cleanup;
++		}
++		map_block(isect, be, bh);
++		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
++	}
++	if (PageUptodate(page)) {
++		/* Do nothing */
++	} else if (inval & !cow_read) {
++		zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
++	} else if (0 < from || PAGE_CACHE_SIZE > to) {
++		struct pnfs_block_extent *read_extent;
++
++		read_extent = (inval && cow_read) ? cow_read : be;
++		map_block(isect, read_extent, bh);
++		lock_buffer(bh);
++		bh->b_end_io = end_buffer_read_nobh;
++		submit_bh(READ, bh);
++		dprintk("%s: Waiting for buffer read\n", __func__);
++		/* XXX Don't really want to hold layout lock here */
++		wait_on_buffer(bh);
++		if (!buffer_uptodate(bh))
++			goto cleanup;
++	}
++	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		/* There is a BUG here if is a short copy after write_begin,
++		 * but I think this is a generic fs bug.  The problem is that
++		 * we have marked the page as initialized, but it is possible
++		 * that the section not copied may never get copied.
++		 */
++		ret = mark_initialized_sectors(be->be_inval, isect,
++					       PAGE_CACHE_SECTORS,
++					       pages_to_mark);
++		/* Want to preallocate mem so above can't fail */
++		if (ret)
++			goto cleanup;
++	}
++	SetPageMappedToDisk(page);
++	ret = 0;
++
++cleanup:
++	free_buffer_head(bh);
++	put_extent(be);
++	put_extent(cow_read);
++	if (ret) {
++		/* Need to mark layout with bad read...should now
++		 * just use nfs4 for reads and writes.
++		 */
++		mark_bad_read();
++	}
++	return ret;
++}
++
++static int
++bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
++	       unsigned count, struct pnfs_fsdata *fsdata)
++{
++	unsigned from, to;
++	int ret;
++	sector_t *pages_to_mark = NULL;
++	struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
++
++	dprintk("%s enter, %u@%lld\n", __func__, count, pos);
++	print_page(page);
++	/* The following code assumes blocksize >= PAGE_CACHE_SIZE */
++	if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
++		dprintk("%s Can't handle blocksize %llu\n", __func__,
++			(u64)bl->bl_blocksize);
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		return 0;
++	}
++	if (PageMappedToDisk(page)) {
++		/* Basically, this is a flag that says we have
++		 * successfully called write_begin already on this page.
++		 */
++		/* NOTE - there are cache consistency issues here.
++		 * For example, what if the layout is recalled, then regained?
++		 * If the file is closed and reopened, will the page flags
++		 * be reset?  If not, we'll have to use layout info instead of
++		 * the page flag.
++		 */
++		return 0;
++	}
++	from = pos & (PAGE_CACHE_SIZE - 1);
++	to = from + count;
++	ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
++	if (ret) {
++		dprintk("%s init page failed with %i", __func__, ret);
++		/* Revert back to plain NFS and just continue on with
++		 * write.  This assumes there is no request attached, which
++		 * should be true if we get here.
++		 */
++		BUG_ON(PagePrivate(page));
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		kfree(pages_to_mark);
++		ret = 0;
++	} else {
++		fsdata->private = pages_to_mark;
++	}
++	return ret;
++}
++
++/* CAREFUL - what happens if copied < count??? */
++static int
++bl_write_end(struct inode *inode, struct page *page, loff_t pos,
++	     unsigned count, unsigned copied, struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter, %u@%lld, lseg=%p\n", __func__, count, pos, lseg);
++	print_page(page);
++	if (lseg)
++		SetPageUptodate(page);
++	return 0;
++}
++
++/* Return any memory allocated to fsdata->private, and take advantage
++ * of no page locks to mark pages noted in write_begin as needing
++ * initialization.
++ */
++static void
++bl_write_end_cleanup(struct file *filp, struct pnfs_fsdata *fsdata)
++{
++	struct page *page;
++	pgoff_t index;
++	sector_t *pos;
++	struct address_space *mapping = filp->f_mapping;
++	struct pnfs_fsdata *fake_data;
++	struct pnfs_layout_segment *lseg;
++
++	if (!fsdata)
++		return;
++	lseg = fsdata->lseg;
++	if (!lseg)
++		return;
++	pos = fsdata->private;
++	if (!pos)
++		return;
++	dprintk("%s enter with pos=%llu\n", __func__, (u64)(*pos));
++	for (; *pos != ~0; pos++) {
++		index = *pos >> (PAGE_CACHE_SHIFT - 9);
++		/* XXX How do we properly deal with failures here??? */
++		page = grab_cache_page_write_begin(mapping, index, 0);
++		if (!page) {
++			printk(KERN_ERR "%s BUG BUG BUG NoMem\n", __func__);
++			continue;
++		}
++		dprintk("%s: Examining block page\n", __func__);
++		print_page(page);
++		if (!PageMappedToDisk(page)) {
++			/* XXX How do we properly deal with failures here??? */
++			dprintk("%s Marking block page\n", __func__);
++			init_page_for_write(BLK_LSEG2EXT(fsdata->lseg), page,
++					    PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
++					    NULL);
++			print_page(page);
++			fake_data = kzalloc(sizeof(*fake_data), GFP_KERNEL);
++			if (!fake_data) {
++				printk(KERN_ERR "%s BUG BUG BUG NoMem\n",
++				       __func__);
++				unlock_page(page);
++				continue;
++			}
++			get_lseg(lseg);
++			fake_data->lseg = lseg;
++			fake_data->bypass_eof = 1;
++			mapping->a_ops->write_end(filp, mapping,
++						  index << PAGE_CACHE_SHIFT,
++						  PAGE_CACHE_SIZE,
++						  PAGE_CACHE_SIZE,
++						  page, fake_data);
++			/* Note fake_data is freed by nfs_write_end */
++		} else
++			unlock_page(page);
++	}
++	kfree(fsdata->private);
++	fsdata->private = NULL;
++}
++
++static ssize_t
++bl_get_stripesize(struct pnfs_layout_type *lo)
++{
++	dprintk("%s enter\n", __func__);
++	return 0;
++}
++
++/* This is called by nfs_can_coalesce_requests via nfs_pageio_do_add_request.
++ * Should return False if there is a reason requests can not be coalesced,
++ * otherwise, should default to returning True.
++ */
++static int
++bl_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++	   struct nfs_page *req)
++{
++	dprintk("%s enter\n", __func__);
++	if (pgio->pg_iswrite)
++		return prev->wb_lseg == req->wb_lseg;
++	else
++		return 1;
++}
++
++static struct layoutdriver_io_operations blocklayout_io_operations = {
++	.commit				= bl_commit,
++	.read_pagelist			= bl_read_pagelist,
++	.write_pagelist			= bl_write_pagelist,
++	.write_begin			= bl_write_begin,
++	.write_end			= bl_write_end,
++	.write_end_cleanup		= bl_write_end_cleanup,
++	.alloc_layout			= bl_alloc_layout,
++	.free_layout			= bl_free_layout,
++	.alloc_lseg			= bl_alloc_lseg,
++	.free_lseg			= bl_free_lseg,
++	.setup_layoutcommit		= bl_setup_layoutcommit,
++	.encode_layoutcommit		= bl_encode_layoutcommit,
++	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
++	.initialize_mountpoint		= bl_initialize_mountpoint,
++	.uninitialize_mountpoint	= bl_uninitialize_mountpoint,
++};
++
++static struct layoutdriver_policy_operations blocklayout_policy_operations = {
++	.get_stripesize			= bl_get_stripesize,
++	.pg_test			= bl_pg_test,
++};
++
++static struct pnfs_layoutdriver_type blocklayout_type = {
++	.id = LAYOUT_BLOCK_VOLUME,
++	.name = "LAYOUT_BLOCK_VOLUME",
++	.ld_io_ops = &blocklayout_io_operations,
++	.ld_policy_ops = &blocklayout_policy_operations,
++};
++
++static int __init nfs4blocklayout_init(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
++
++	pnfs_block_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
++	bl_pipe_init();
++	return 0;
++}
++
++static void __exit nfs4blocklayout_exit(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
++	       __func__);
++
++	pnfs_unregister_layoutdriver(&blocklayout_type);
++	bl_pipe_exit();
++}
++
++module_init(nfs4blocklayout_init);
++module_exit(nfs4blocklayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
+@@ -0,0 +1,335 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/buffer_head.h> /* __bread */
++
++#include <linux/genhd.h>
++#include <linux/blkdev.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
++{
++	uint32_t *q = p + XDR_QUADLEN(nbytes);
++	if (unlikely(q > end || q < p))
++		return NULL;
++	return p;
++}
++EXPORT_SYMBOL(blk_overflow);
++
++/* Open a block_device by device number. */
++struct block_device *nfs4_blkdev_get(dev_t dev)
++{
++	struct block_device *bd;
++
++	dprintk("%s enter\n", __func__);
++	bd = open_by_devnum(dev, FMODE_READ);
++	if (IS_ERR(bd))
++		goto fail;
++	return bd;
++fail:
++	dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++	return NULL;
++}
++
++/*
++ * Release the block device
++ */
++int nfs4_blkdev_put(struct block_device *bdev)
++{
++	dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
++			MINOR(bdev->bd_dev));
++	bd_release(bdev);
++	return blkdev_put(bdev, FMODE_READ);
++}
++
++/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
++ * in dev->dev_addr_buf.
++ */
++struct pnfs_block_dev *
++nfs4_blk_decode_device(struct nfs_server *server,
++		       struct pnfs_device *dev,
++		       struct list_head *sdlist)
++{
++	struct pnfs_block_dev *rv = NULL;
++	struct block_device *bd = NULL;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint32_t major, minor;
++
++	dprintk("%s enter\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return NULL;
++	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
++	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
++		dev->mincount);
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
++				    dev->mincount);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out_err;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out_err;
++	}
++	if (reply->status != BL_DEVICE_REQUEST_PROC) {
++		dprintk("%s failed to open device: %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++	memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
++	memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
++		sizeof(uint32_t));
++	bd = nfs4_blkdev_get(MKDEV(major, minor));
++	if (IS_ERR(bd)) {
++		dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++
++	rv = kzalloc(sizeof(*rv), GFP_KERNEL);
++	if (!rv)
++		goto out_err;
++
++	rv->bm_mdev = bd;
++	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
++	dprintk("%s Created device %s with bd_block_size %u\n",
++		__func__,
++		bd->bd_disk->disk_name,
++		bd->bd_block_size);
++	kfree(reply);
++	kfree(msg);
++	return rv;
++
++out_err:
++	kfree(rv);
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return NULL;
++}
++
++/* Map deviceid returned by the server to constructed block_device */
++static struct block_device *translate_devid(struct pnfs_layout_type *lo,
++					    struct pnfs_deviceid *id)
++{
++	struct block_device *rv = NULL;
++	struct block_mount_id *mid;
++	struct pnfs_block_dev *dev;
++
++	dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
++	mid = BLK_ID(lo);
++	spin_lock(&mid->bm_lock);
++	list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
++		if (memcmp(id->data, dev->bm_mdevid.data,
++			   NFS4_PNFS_DEVICEID4_SIZE) == 0) {
++			rv = dev->bm_mdev;
++			goto out;
++		}
++	}
++ out:
++	spin_unlock(&mid->bm_lock);
++	dprintk("%s returning %p\n", __func__, rv);
++	return rv;
++}
++
++/* Tracks info needed to ensure extents in layout obey constraints of spec */
++struct layout_verification {
++	u32 mode;	/* R or RW */
++	u64 start;	/* Expected start of next non-COW extent */
++	u64 inval;	/* Start of INVAL coverage */
++	u64 cowread;	/* End of COW read coverage */
++};
++
++/* Verify the extent meets the layout requirements of the pnfs-block draft,
++ * section 2.3.1.
++ */
++static int verify_extent(struct pnfs_block_extent *be,
++			 struct layout_verification *lv)
++{
++	if (lv->mode == IOMODE_READ) {
++		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
++		    be->be_state == PNFS_BLOCK_INVALID_DATA)
++			return -EIO;
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	}
++	/* lv->mode == IOMODE_RW */
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		if (lv->cowread > lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		lv->inval = lv->start;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
++		if (be->be_f_offset > lv->start)
++			return -EIO;
++		if (be->be_f_offset < lv->inval)
++			return -EIO;
++		if (be->be_f_offset < lv->cowread)
++			return -EIO;
++		/* It looks like you might want to min this with lv->start,
++		 * but you really don't.
++		 */
++		lv->inval = lv->inval + be->be_length;
++		lv->cowread = be->be_f_offset + be->be_length;
++		return 0;
++	} else
++		return -EIO;
++}
++
++/* XDR decode pnfs_block_layout4 structure */
++int
++nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
++			   struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t *end = (uint32_t *)((char *)lgr->layout.buf + lgr->layout.len);
++	int i, status = -EIO;
++	uint32_t count;
++	struct pnfs_block_extent *be = NULL, *save;
++	uint64_t tmp; /* Used by READSECTOR */
++	struct layout_verification lv = {
++		.mode = lgr->lseg.iomode,
++		.start = lgr->lseg.offset >> 9,
++		.inval = lgr->lseg.offset >> 9,
++		.cowread = lgr->lseg.offset >> 9,
++	};
++
++	LIST_HEAD(extents);
++
++	BLK_READBUF(p, end, 4);
++	READ32(count);
++
++	dprintk("%s enter, number of extents %i\n", __func__, count);
++	BLK_READBUF(p, end, (28 + NFS4_PNFS_DEVICEID4_SIZE) * count);
++
++	/* Decode individual extents, putting them in temporary
++	 * staging area until whole layout is decoded to make error
++	 * recovery easier.
++	 */
++	for (i = 0; i < count; i++) {
++		be = alloc_extent();
++		if (!be) {
++			status = -ENOMEM;
++			goto out_err;
++		}
++		READ_DEVID(&be->be_devid);
++		be->be_mdev = translate_devid(lo, &be->be_devid);
++		if (!be->be_mdev)
++			goto out_err;
++		/* The next three values are read in as bytes,
++		 * but stored as 512-byte sector lengths
++		 */
++		READ_SECTOR(be->be_f_offset);
++		READ_SECTOR(be->be_length);
++		READ_SECTOR(be->be_v_offset);
++		READ32(be->be_state);
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			be->be_inval = &bl->bl_inval;
++		if (verify_extent(be, &lv)) {
++			dprintk("%s verify failed\n", __func__);
++			goto out_err;
++		}
++		list_add_tail(&be->be_node, &extents);
++	}
++	if (p != end) {
++		dprintk("%s Undecoded cruft at end of opaque\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lgr->lseg.offset + lgr->lseg.length != lv.start << 9) {
++		dprintk("%s Final length mismatch\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lv.start < lv.cowread) {
++		dprintk("%s Final uncovered COW extent\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	/* Extents decoded properly, now try to merge them in to
++	 * existing layout extents.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	list_for_each_entry_safe(be, save, &extents, be_node) {
++		list_del(&be->be_node);
++		status = add_and_merge_extent(bl, be);
++		if (status) {
++			spin_unlock(&bl->bl_ext_lock);
++			/* This is a fairly catastrophic error, as the
++			 * entire layout extent lists are now corrupted.
++			 * We should have some way to distinguish this.
++			 */
++			be = NULL;
++			goto out_err;
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	status = 0;
++ out:
++	dprintk("%s returns %i\n", __func__, status);
++	return status;
++
++ out_err:
++	put_extent(be);
++	while (!list_empty(&extents)) {
++		be = list_first_entry(&extents, struct pnfs_block_extent,
++				      be_node);
++		list_del(&be->be_node);
++		put_extent(be);
++	}
++	goto out;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
+@@ -0,0 +1,120 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdm.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2007 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Fred Isaman <iisaman@umich.edu>
++ *  Andy Adamson <andros@citi.umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include <linux/genhd.h> /* gendisk - used in a dprintk*/
++#include <linux/sched.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Defines used for calculating memory usage in nfs4_blk_flatten() */
++#define ARGSIZE   24    /* Max bytes needed for linear target arg string */
++#define SPECSIZE (sizeof8(struct dm_target_spec) + ARGSIZE)
++#define SPECS_PER_PAGE (PAGE_SIZE / SPECSIZE)
++#define SPEC_HEADER_ADJUST (SPECS_PER_PAGE - \
++			    (PAGE_SIZE - sizeof8(struct dm_ioctl)) / SPECSIZE)
++#define roundup8(x) (((x)+7) & ~7)
++#define sizeof8(x) roundup8(sizeof(x))
++
++static int dev_remove(dev_t dev)
++{
++	int ret = 1;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint64_t bl_dev;
++	uint32_t major = MAJOR(dev), minor = MINOR(dev);
++
++	dprintk("Entering %s\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return ret;
++
++	memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
++	memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
++				    sizeof(uint64_t));
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out;
++	}
++
++	if (reply->status == BL_DEVICE_REQUEST_PROC)
++		ret = 0; /*TODO: what to return*/
++out:
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return ret;
++}
++
++/*
++ * Release meta device
++ */
++static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
++{
++	int rv;
++
++	dprintk("%s Releasing\n", __func__);
++	/* XXX Check return? */
++	rv = nfs4_blkdev_put(bdev->bm_mdev);
++	dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);
++
++	rv = dev_remove(bdev->bm_mdev->bd_dev);
++	dprintk("%s Returns %d\n", __func__, rv);
++	return rv;
++}
++
++void free_block_dev(struct pnfs_block_dev *bdev)
++{
++	if (bdev) {
++		if (bdev->bm_mdev) {
++			dprintk("%s Removing DM device: %d:%d\n",
++				__func__,
++				MAJOR(bdev->bm_mdev->bd_dev),
++				MINOR(bdev->bm_mdev->bd_dev));
++			/* XXX Check status ?? */
++			nfs4_blk_metadev_release(bdev);
++		}
++		kfree(bdev);
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
+@@ -0,0 +1,303 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#ifndef FS_NFS_NFS4BLOCKLAYOUT_H
++#define FS_NFS_NFS4BLOCKLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/pnfs_xdr.h> /* Needed by nfs4_pnfs.h */
++#include <linux/nfs4_pnfs.h>
++#include <linux/dm-ioctl.h> /* Needed for struct dm_ioctl*/
++
++#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
++
++#define PG_pnfserr PG_owner_priv_1
++#define PagePnfsErr(page)	test_bit(PG_pnfserr, &(page)->flags)
++#define SetPagePnfsErr(page)	set_bit(PG_pnfserr, &(page)->flags)
++#define ClearPagePnfsErr(page)	clear_bit(PG_pnfserr, &(page)->flags)
++
++extern int dm_dev_create(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_dev_remove(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_do_resume(struct dm_ioctl *param);
++extern int dm_table_load(struct dm_ioctl *param, size_t param_size);
++
++struct block_mount_id {
++	spinlock_t			bm_lock;    /* protects list */
++	struct list_head		bm_devlist; /* holds pnfs_block_dev */
++};
++
++struct pnfs_block_dev {
++	struct list_head		bm_node;
++	struct pnfs_deviceid		bm_mdevid;    /* associated devid */
++	struct block_device		*bm_mdev;     /* meta device itself */
++};
++
++/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
++struct visible_block_device {
++	struct list_head	vi_node;
++	struct block_device	*vi_bdev;
++	int			vi_mapped;
++	int			vi_put_done;
++};
++
++enum blk_vol_type {
++	PNFS_BLOCK_VOLUME_SIMPLE   = 0,	/* maps to a single LU */
++	PNFS_BLOCK_VOLUME_SLICE    = 1,	/* slice of another volume */
++	PNFS_BLOCK_VOLUME_CONCAT   = 2,	/* concatenation of multiple volumes */
++	PNFS_BLOCK_VOLUME_STRIPE   = 3	/* striped across multiple volumes */
++};
++
++/* All disk offset/lengths are stored in 512-byte sectors */
++struct pnfs_blk_volume {
++	uint32_t		bv_type;
++	sector_t 		bv_size;
++	struct pnfs_blk_volume 	**bv_vols;
++	int 			bv_vol_n;
++	union {
++		dev_t			bv_dev;
++		sector_t		bv_stripe_unit;
++		sector_t 		bv_offset;
++	};
++};
++
++/* Since components need not be aligned, cannot use sector_t */
++struct pnfs_blk_sig_comp {
++	int64_t 	bs_offset;  /* In bytes */
++	uint32_t   	bs_length;  /* In bytes */
++	char 		*bs_string;
++};
++
++/* Maximum number of signatures components in a simple volume */
++# define PNFS_BLOCK_MAX_SIG_COMP 16
++
++struct pnfs_blk_sig {
++	int 				si_num_comps;
++	struct pnfs_blk_sig_comp	si_comps[PNFS_BLOCK_MAX_SIG_COMP];
++};
++
++enum exstate4 {
++	PNFS_BLOCK_READWRITE_DATA	= 0,
++	PNFS_BLOCK_READ_DATA		= 1,
++	PNFS_BLOCK_INVALID_DATA		= 2, /* mapped, but data is invalid */
++	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
++};
++
++#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
++
++struct my_tree_t {
++	sector_t		mtt_step_size;	/* Internal sector alignment */
++	struct list_head	mtt_stub; /* Should be a radix tree */
++};
++
++struct pnfs_inval_markings {
++	spinlock_t	im_lock;
++	struct my_tree_t im_tree;	/* Sectors that need LAYOUTCOMMIT */
++	sector_t	im_block_size;	/* Server blocksize in sectors */
++};
++
++struct pnfs_inval_tracking {
++	struct list_head it_link;
++	int		 it_sector;
++	int		 it_tags;
++};
++
++/* sector_t fields are all in 512-byte sectors */
++struct pnfs_block_extent {
++	struct kref	be_refcnt;
++	struct list_head be_node;	/* link into lseg list */
++	struct pnfs_deviceid be_devid;  /* STUB - remevable??? */
++	struct block_device *be_mdev;
++	sector_t	be_f_offset;	/* the starting offset in the file */
++	sector_t	be_length;	/* the size of the extent */
++	sector_t	be_v_offset;	/* the starting offset in the volume */
++	enum exstate4	be_state;	/* the state of this extent */
++	struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */
++};
++
++/* Shortened extent used by LAYOUTCOMMIT */
++struct pnfs_block_short_extent {
++	struct list_head bse_node;
++	struct pnfs_deviceid bse_devid;	/* STUB - removable??? */
++	struct block_device *bse_mdev;
++	sector_t	bse_f_offset;	/* the starting offset in the file */
++	sector_t	bse_length;	/* the size of the extent */
++};
++
++static inline void
++INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
++{
++	spin_lock_init(&marks->im_lock);
++	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
++	marks->im_block_size = blocksize;
++	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
++					   blocksize);
++}
++
++enum extentclass4 {
++	RW_EXTENT	= 0, /* READWRTE and INVAL */
++	RO_EXTENT	= 1, /* READ and NONE */
++	EXTENT_LISTS	= 2,
++};
++
++static inline int choose_list(enum exstate4 state)
++{
++	if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA)
++		return RO_EXTENT;
++	else
++		return RW_EXTENT;
++}
++
++struct pnfs_block_layout {
++	struct pnfs_layout_type bl_layout;
++	struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */
++	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
++	struct list_head	bl_extents[EXTENT_LISTS]; /* R and RW extents */
++	struct list_head	bl_commit;	/* Needs layout commit */
++	unsigned int		bl_count;	/* entries in bl_commit */
++	sector_t		bl_blocksize;  /* Server blocksize in sectors */
++};
++
++/* this struct is comunicated between:
++ * bl_setup_layoutcommit && bl_encode_layoutcommit && bl_cleanup_layoutcommit
++ */
++struct bl_layoutupdate_data {
++	struct list_head ranges;
++};
++
++#define BLK_ID(lo) ((struct block_mount_id *)(PNFS_NFS_SERVER(lo)->pnfs_ld_data))
++
++static inline struct pnfs_block_layout *
++BLK_LO2EXT(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct pnfs_block_layout, bl_layout);
++}
++
++static inline struct pnfs_block_layout *
++BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
++{
++	return BLK_LO2EXT(lseg->layout);
++}
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
++
++#define BLK_READBUF(p, e, nbytes)  do { \
++	p = blk_overflow(p, e, nbytes); \
++	if (!p) { \
++		printk(KERN_WARNING \
++			"%s: reply buffer overflowed in line %d.\n", \
++			__func__, __LINE__); \
++		goto out_err; \
++	} \
++} while (0)
++
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {                  \
++	(x) = (uint64_t)ntohl(*p++) << 32;           \
++	(x) |= ntohl(*p++);                     \
++} while (0)
++#define COPYMEM(x, nbytes) do {                 \
++	memcpy((x), p, nbytes);                 \
++	p += XDR_QUADLEN(nbytes);               \
++} while (0)
++#define READ_DEVID(x)	COPYMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++#define READ_SECTOR(x)     do { \
++	READ64(tmp); \
++	if (tmp & 0x1ff) { \
++		printk(KERN_WARNING \
++		       "%s Value not 512-byte aligned at line %d\n", \
++		       __func__, __LINE__);			     \
++		goto out_err; \
++	} \
++	(x) = tmp >> 9; \
++} while (0)
++
++#define WRITE32(n)               do { \
++	*p++ = htonl(n); \
++	} while (0)
++#define WRITE64(n)               do {                           \
++	*p++ = htonl((uint32_t)((n) >> 32));			\
++	*p++ = htonl((uint32_t)(n));				\
++} while (0)
++#define WRITEMEM(ptr, nbytes)     do {                          \
++	p = xdr_encode_opaque_fixed(p, ptr, nbytes);	\
++} while (0)
++#define WRITE_DEVID(x)  WRITEMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++
++/* blocklayoutdev.c */
++struct block_device *nfs4_blkdev_get(dev_t dev);
++int nfs4_blkdev_put(struct block_device *bdev);
++struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
++					      struct pnfs_device *dev,
++					      struct list_head *sdlist);
++int nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
++			       struct nfs4_pnfs_layoutget_res *lgr);
++int nfs4_blk_create_block_disk_list(struct list_head *);
++void nfs4_blk_destroy_disk_list(struct list_head *);
++/* blocklayoutdm.c */
++int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
++void free_block_dev(struct pnfs_block_dev *bdev);
++/* extents.c */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++		struct pnfs_block_extent **cow_read);
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages);
++void put_extent(struct pnfs_block_extent *be);
++struct pnfs_block_extent *alloc_extent(void);
++struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
++int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   struct xdr_stream *xdr,
++				   const struct pnfs_layoutcommit_arg *arg);
++void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   const struct pnfs_layoutcommit_arg *arg,
++				   int status);
++int add_and_merge_extent(struct pnfs_block_layout *bl,
++			 struct pnfs_block_extent *new);
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length);
++
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++extern struct pipefs_list bl_device_list;
++extern struct dentry *bl_device_pipe;
++
++int bl_pipe_init(void);
++void bl_pipe_exit(void);
++
++#define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
++#define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
++#define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
++#define BL_DEVICE_REQUEST_PROC         0x1 /* User level process succeeds */
++#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
++
++#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
+@@ -0,0 +1,948 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include "blocklayout.h"
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Bit numbers */
++#define EXTENT_INITIALIZED 0
++#define EXTENT_WRITTEN     1
++#define EXTENT_IN_COMMIT   2
++#define INTERNAL_EXISTS    MY_MAX_TAGS
++#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
++
++/* Returns largest t<=s s.t. t%base==0 */
++static inline sector_t normalize(sector_t s, int base)
++{
++	sector_t tmp = s; /* Since do_div modifies its argument */
++	return s - do_div(tmp, base);
++}
++
++static inline sector_t normalize_up(sector_t s, int base)
++{
++	return normalize(s + base - 1, base);
++}
++
++/* Complete stub using list while determine API wanted */
++
++/* Returns tags, or negative */
++static int32_t _find_entry(struct my_tree_t *tree, u64 s)
++{
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu) enter\n", __func__, s);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s)
++			return pos->it_tags & INTERNAL_MASK;
++		else
++			break;
++	}
++	return -ENOENT;
++}
++
++static inline
++int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
++{
++	int32_t tags;
++
++	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
++	s = normalize(s, tree->mtt_step_size);
++	tags = _find_entry(tree, s);
++	if ((tags < 0) || !(tags & (1 << tag)))
++		return 0;
++	else
++		return 1;
++}
++
++/* Creates entry with tag, or if entry already exists, unions tag to it.
++ * If storage is not NULL, newly created entry will use it.
++ * Returns number of entries added, or negative on error.
++ */
++static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
++		      struct pnfs_inval_tracking *storage)
++{
++	int found = 0;
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s) {
++			found = 1;
++			break;
++		} else
++			break;
++	}
++	if (found) {
++		pos->it_tags |= (1 << tag);
++		return 0;
++	} else {
++		struct pnfs_inval_tracking *new;
++		if (storage)
++			new = storage;
++		else {
++			new = kmalloc(sizeof(*new), GFP_KERNEL);
++			if (!new)
++				return -ENOMEM;
++		}
++		new->it_sector = s;
++		new->it_tags = (1 << tag);
++		list_add(&new->it_link, &pos->it_link);
++		return 1;
++	}
++}
++
++/* XXXX Really want option to not create */
++/* Over range, unions tag with existing entries, else creates entry with tag */
++static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
++{
++	u64 i;
++
++	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
++	for (i = normalize(s, tree->mtt_step_size); i < s + length;
++	     i += tree->mtt_step_size)
++		if (_add_entry(tree, i, tag, NULL))
++			return -ENOMEM;
++	return 0;
++}
++
++/* Ensure that future operations on given range of tree will not malloc */
++static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
++{
++	u64 start, end, s;
++	int count, i, used = 0, status = -ENOMEM;
++	struct pnfs_inval_tracking **storage;
++
++	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
++	start = normalize(offset, tree->mtt_step_size);
++	end = normalize_up(offset + length, tree->mtt_step_size);
++	count = (int)(end - start) / (int)tree->mtt_step_size;
++
++	/* Pre-malloc what memory we might need */
++	storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
++	if (!storage)
++		return -ENOMEM;
++	for (i = 0; i < count; i++) {
++		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
++				     GFP_KERNEL);
++		if (!storage[i])
++			goto out_cleanup;
++	}
++
++	/* Now need lock - HOW??? */
++
++	for (s = start; s < end; s += tree->mtt_step_size)
++		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
++
++	/* Unlock - HOW??? */
++	status = 0;
++
++ out_cleanup:
++	for (i = used; i < count; i++) {
++		if (!storage[i])
++			break;
++		kfree(storage[i]);
++	}
++	kfree(storage);
++	return status;
++}
++
++static void set_needs_init(sector_t *array, sector_t offset)
++{
++	sector_t *p = array;
++
++	dprintk("%s enter\n", __func__);
++	if (!p)
++		return;
++	while (*p < offset)
++		p++;
++	if (*p == offset)
++		return;
++	else if (*p == ~0) {
++		*p++ = offset;
++		*p = ~0;
++		return;
++	} else {
++		sector_t *save = p;
++		dprintk("%s Adding %llu\n", __func__, (u64)offset);
++		while (*p != ~0)
++			p++;
++		p++;
++		memmove(save + 1, save, (char *)p - (char *)save);
++		*save = offset;
++		return;
++	}
++}
++
++/* We are relying on page lock to serialize this */
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Assume start, end already sector aligned */
++static int
++_range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
++{
++	struct pnfs_inval_tracking *pos;
++	u64 expect = 0;
++
++	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector >= end)
++			continue;
++		if (!expect) {
++			if ((pos->it_sector == end - tree->mtt_step_size) &&
++			    (pos->it_tags & (1 << tag))) {
++				expect = pos->it_sector - tree->mtt_step_size;
++				if (expect < start)
++					return 1;
++				continue;
++			} else {
++				return 0;
++			}
++		}
++		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
++			return 0;
++		expect -= tree->mtt_step_size;
++		if (expect < start)
++			return 1;
++	}
++	return 0;
++}
++
++static int is_range_written(struct pnfs_inval_markings *marks,
++			    sector_t start, sector_t end)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Marks sectors in [offest, offset_length) as having been initialized.
++ * All lengths are step-aligned, where step is min(pagesize, blocksize).
++ * Notes where partial block is initialized, and helps prepare it for
++ * complete initialization later.
++ */
++/* Currently assumes offset is page-aligned */
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages)
++{
++	sector_t s, start, end;
++	sector_t *array = NULL; /* Pages to mark */
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n",
++		__func__, (u64)offset, (u64)length);
++	s = max((sector_t) 3,
++		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
++	dprintk("%s set max=%llu\n", __func__, (u64)s);
++	if (pages) {
++		array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
++		if (!array)
++			goto outerr;
++		array[0] = ~0;
++	}
++
++	start = normalize(offset, marks->im_block_size);
++	end = normalize_up(offset + length, marks->im_block_size);
++	if (_preload_range(&marks->im_tree, start, end - start))
++		goto outerr;
++
++	spin_lock(&marks->im_lock);
++
++	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
++	     s < offset; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s pre-area pages\n", __func__);
++		/* Portion of used block is not initialized */
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
++		goto out_unlock;
++	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
++	     s < end; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s post-area pages\n", __func__);
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++
++	spin_unlock(&marks->im_lock);
++
++	if (pages) {
++		if (array[0] == ~0) {
++			kfree(array);
++			*pages = NULL;
++		} else
++			*pages = array;
++	}
++	return 0;
++
++ out_unlock:
++	spin_unlock(&marks->im_lock);
++ outerr:
++	if (pages) {
++		kfree(array);
++		*pages = NULL;
++	}
++	return -ENOMEM;
++}
++
++/* Marks sectors in [offest, offset+length) as having been written to disk.
++ * All lengths should be block aligned.
++ */
++int mark_written_sectors(struct pnfs_inval_markings *marks,
++			 sector_t offset, sector_t length)
++{
++	int status;
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
++		(u64)offset, (u64)length);
++	spin_lock(&marks->im_lock);
++	status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
++	spin_unlock(&marks->im_lock);
++	return status;
++}
++
++static void print_short_extent(struct pnfs_block_short_extent *be)
++{
++	dprintk("PRINT SHORT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->bse_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->bse_length);
++	}
++}
++
++void print_clist(struct list_head *list, unsigned int count)
++{
++	struct pnfs_block_short_extent *be;
++	unsigned int i = 0;
++
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, bse_node) {
++		i++;
++		print_short_extent(be);
++	}
++	if (i != count)
++		dprintk("\n\nExpected %u entries\n\n\n", count);
++	dprintk("****************\n");
++}
++
++/* Note: In theory, we should do more checking that devid's match between
++ * old and new, but if they don't, the lists are too corrupt to salvage anyway.
++ */
++/* Note this is very similar to add_and_merge_extent */
++static void add_to_commitlist(struct pnfs_block_layout *bl,
++			      struct pnfs_block_short_extent *new)
++{
++	struct list_head *clist = &bl->bl_commit;
++	struct pnfs_block_short_extent *old, *save;
++	sector_t end = new->bse_f_offset + new->bse_length;
++
++	dprintk("%s enter\n", __func__);
++	print_short_extent(new);
++	print_clist(clist, bl->bl_count);
++	bl->bl_count++;
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe(old, save, clist, bse_node) {
++		if (new->bse_f_offset < old->bse_f_offset)
++			break;
++		if (end <= old->bse_f_offset + old->bse_length) {
++			/* Range is already in list */
++			bl->bl_count--;
++			kfree(new);
++			return;
++		} else if (new->bse_f_offset <=
++				old->bse_f_offset + old->bse_length) {
++			/* new overlaps or abuts existing be */
++			if (new->bse_mdev == old->bse_mdev) {
++				/* extend new to fully replace old */
++				new->bse_length += new->bse_f_offset -
++						old->bse_f_offset;
++				new->bse_f_offset = old->bse_f_offset;
++				list_del(&old->bse_node);
++				bl->bl_count--;
++				kfree(old);
++			}
++		}
++	}
++	/* Note that if we never hit the above break, old will not point to a
++	 * valid extent.  However, in that case &old->bse_node==list.
++	 */
++	list_add_tail(&new->bse_node, &old->bse_node);
++	/* Scan forward for overlaps.  If we find any, extend new and
++	 * remove the overlapped extent.
++	 */
++	old = list_prepare_entry(new, clist, bse_node);
++	list_for_each_entry_safe_continue(old, save, clist, bse_node) {
++		if (end < old->bse_f_offset)
++			break;
++		/* new overlaps or abuts old */
++		if (new->bse_mdev == old->bse_mdev) {
++			if (end < old->bse_f_offset + old->bse_length) {
++				/* extend new to fully cover old */
++				end = old->bse_f_offset + old->bse_length;
++				new->bse_length = end - new->bse_f_offset;
++			}
++			list_del(&old->bse_node);
++			bl->bl_count--;
++			kfree(old);
++		}
++	}
++	dprintk("%s: after merging\n", __func__);
++	print_clist(clist, bl->bl_count);
++}
++
++/* Note the range described by offset, length is guaranteed to be contained
++ * within be.
++ */
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length)
++{
++	sector_t new_end, end = offset + length;
++	struct pnfs_block_short_extent *new;
++	struct pnfs_block_layout *bl = container_of(be->be_inval,
++						    struct pnfs_block_layout,
++						    bl_inval);
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return -ENOMEM;
++
++	mark_written_sectors(be->be_inval, offset, length);
++	/* We want to add the range to commit list, but it must be
++	 * block-normalized, and verified that the normalized range has
++	 * been entirely written to disk.
++	 */
++	new->bse_f_offset = offset;
++	offset = normalize(offset, bl->bl_blocksize);
++	if (offset < new->bse_f_offset) {
++		if (is_range_written(be->be_inval, offset, new->bse_f_offset))
++			new->bse_f_offset = offset;
++		else
++			new->bse_f_offset = offset + bl->bl_blocksize;
++	}
++	new_end = normalize_up(end, bl->bl_blocksize);
++	if (end < new_end) {
++		if (is_range_written(be->be_inval, end, new_end))
++			end = new_end;
++		else
++			end = new_end - bl->bl_blocksize;
++	}
++	if (end <= new->bse_f_offset) {
++		kfree(new);
++		return 0;
++	}
++	new->bse_length = end - new->bse_f_offset;
++	new->bse_devid = be->be_devid;
++	new->bse_mdev = be->be_mdev;
++
++	spin_lock(&bl->bl_ext_lock);
++	/* new will be freed, either by add_to_commitlist if it decides not
++	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
++	 */
++	add_to_commitlist(bl, new);
++	spin_unlock(&bl->bl_ext_lock);
++	return 0;
++}
++
++static void print_bl_extent(struct pnfs_block_extent *be)
++{
++	dprintk("PRINT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->be_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->be_length);
++		dprintk("        be_v_offset %llu\n", (u64)be->be_v_offset);
++		dprintk("        be_state    %d\n", be->be_state);
++	}
++}
++
++static void
++destroy_extent(struct kref *kref)
++{
++	struct pnfs_block_extent *be;
++
++	be = container_of(kref, struct pnfs_block_extent, be_refcnt);
++	dprintk("%s be=%p\n", __func__, be);
++	kfree(be);
++}
++
++void
++put_extent(struct pnfs_block_extent *be)
++{
++	if (be) {
++		dprintk("%s enter %p (%i)\n", __func__, be,
++			atomic_read(&be->be_refcnt.refcount));
++		kref_put(&be->be_refcnt, destroy_extent);
++	}
++}
++
++struct pnfs_block_extent *alloc_extent(void)
++{
++	struct pnfs_block_extent *be;
++
++	be = kmalloc(sizeof(struct pnfs_block_extent), GFP_KERNEL);
++	if (!be)
++		return NULL;
++	INIT_LIST_HEAD(&be->be_node);
++	kref_init(&be->be_refcnt);
++	be->be_inval = NULL;
++	return be;
++}
++
++struct pnfs_block_extent *
++get_extent(struct pnfs_block_extent *be)
++{
++	if (be)
++		kref_get(&be->be_refcnt);
++	return be;
++}
++
++void print_elist(struct list_head *list)
++{
++	struct pnfs_block_extent *be;
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, be_node) {
++		print_bl_extent(be);
++	}
++	dprintk("****************\n");
++}
++
++static inline int
++extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new)
++{
++	/* Note this assumes new->be_f_offset >= old->be_f_offset */
++	return (new->be_state == old->be_state) &&
++		((new->be_state == PNFS_BLOCK_NONE_DATA) ||
++		 ((new->be_v_offset - old->be_v_offset ==
++		   new->be_f_offset - old->be_f_offset) &&
++		  new->be_mdev == old->be_mdev));
++}
++
++/* Adds new to appropriate list in bl, modifying new and removing existing
++ * extents as appropriate to deal with overlaps.
++ *
++ * See find_get_extent for list constraints.
++ *
++ * Refcount on new is already set.  If end up not using it, or error out,
++ * need to put the reference.
++ *
++ * Lock is held by caller.
++ */
++int
++add_and_merge_extent(struct pnfs_block_layout *bl,
++		     struct pnfs_block_extent *new)
++{
++	struct pnfs_block_extent *be, *tmp;
++	sector_t end = new->be_f_offset + new->be_length;
++	struct list_head *list;
++
++	dprintk("%s enter with be=%p\n", __func__, new);
++	print_bl_extent(new);
++	list = &bl->bl_extents[choose_list(new->be_state)];
++	print_elist(list);
++
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
++		if (new->be_f_offset >= be->be_f_offset + be->be_length)
++			break;
++		if (new->be_f_offset >= be->be_f_offset) {
++			if (end <= be->be_f_offset + be->be_length) {
++				/* new is a subset of existing be*/
++				if (extents_consistent(be, new)) {
++					dprintk("%s: new is subset, ignoring\n",
++						__func__);
++					put_extent(new);
++					return 0;
++				} else {
++					goto out_err;
++				}
++			} else {
++				/* |<--   be   -->|
++				 *          |<--   new   -->| */
++				if (extents_consistent(be, new)) {
++					/* extend new to fully replace be */
++					new->be_length += new->be_f_offset -
++						be->be_f_offset;
++					new->be_f_offset = be->be_f_offset;
++					new->be_v_offset = be->be_v_offset;
++					dprintk("%s: removing %p\n", __func__, be);
++					list_del(&be->be_node);
++					put_extent(be);
++				} else {
++					goto out_err;
++				}
++			}
++		} else if (end >= be->be_f_offset + be->be_length) {
++			/* new extent overlap existing be */
++			if (extents_consistent(be, new)) {
++				/* extend new to fully replace be */
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		} else if (end > be->be_f_offset) {
++			/*           |<--   be   -->|
++			 *|<--   new   -->| */
++			if (extents_consistent(new, be)) {
++				/* extend new to fully replace be */
++				new->be_length += be->be_f_offset + be->be_length -
++					new->be_f_offset - new->be_length;
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		}
++	}
++	/* Note that if we never hit the above break, be will not point to a
++	 * valid extent.  However, in that case &be->be_node==list.
++	 */
++	list_add(&new->be_node, &be->be_node);
++	dprintk("%s: inserting new\n", __func__);
++	print_elist(list);
++	/* STUB - The per-list consistency checks have all been done,
++	 * should now check cross-list consistency.
++	 */
++	return 0;
++
++ out_err:
++	put_extent(new);
++	return -EIO;
++}
++
++/* Returns extent, or NULL.  If a second READ extent exists, it is returned
++ * in cow_read, if given.
++ *
++ * The extents are kept in two seperate ordered lists, one for READ and NONE,
++ * one for READWRITE and INVALID.  Within each list, we assume:
++ * 1. Extents are ordered by file offset.
++ * 2. For any given isect, there is at most one extents that matches.
++ */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++	    struct pnfs_block_extent **cow_read)
++{
++	struct pnfs_block_extent *be, *cow, *ret;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	cow = ret = NULL;
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret &&
++		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				if (!ret)
++					ret = be;
++				else if (be->be_state != PNFS_BLOCK_READ_DATA)
++					put_extent(be);
++				else
++					cow = be;
++				break;
++			}
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	if (cow_read)
++		*cow_read = cow;
++	print_bl_extent(ret);
++	return ret;
++}
++
++/* Similar to find_get_extent, but called with lock held, and ignores cow */
++static struct pnfs_block_extent *
++find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
++{
++	struct pnfs_block_extent *be, *ret = NULL;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret)
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				ret = be;
++				break;
++			}
++		}
++	}
++	print_bl_extent(ret);
++	return ret;
++}
++
++int
++encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			       struct xdr_stream *xdr,
++			       const struct pnfs_layoutcommit_arg *arg)
++{
++	sector_t start, end;
++	struct pnfs_block_short_extent *lce, *save;
++	unsigned int count = 0;
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct list_head *ranges = &bld->ranges;
++	__be32 *p, *xdr_start;
++
++	dprintk("%s enter\n", __func__);
++	start = arg->lseg.offset >> 9;
++	end = start + (arg->lseg.length >> 9);
++	dprintk("%s set start=%llu, end=%llu\n",
++		__func__, (u64)start, (u64)end);
++
++	/* BUG - creation of bl_commit is buggy - need to wait for
++	 * entire block to be marked WRITTEN before it can be added.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	/* Want to adjust for possible truncate */
++	/* We now want to adjust argument range */
++
++	/* XDR encode the ranges found */
++	xdr_start = xdr_reserve_space(xdr, 8);
++	if (!xdr_start)
++		goto out;
++	list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) {
++		p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data));
++		if (!p)
++			break;
++		WRITE_DEVID(&lce->bse_devid);
++		WRITE64(lce->bse_f_offset << 9);
++		WRITE64(lce->bse_length << 9);
++		WRITE64(0LL);
++		WRITE32(PNFS_BLOCK_READWRITE_DATA);
++		list_del(&lce->bse_node);
++		list_add_tail(&lce->bse_node, ranges);
++		bl->bl_count--;
++		count++;
++	}
++	xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
++	xdr_start[1] = cpu_to_be32(count);
++out:
++	spin_unlock(&bl->bl_ext_lock);
++	dprintk("%s found %i ranges\n", __func__, count);
++	return 0;
++}
++
++/* Helper function to set_to_rw that initialize a new extent */
++static void
++_prep_new_extent(struct pnfs_block_extent *new,
++		 struct pnfs_block_extent *orig,
++		 sector_t offset, sector_t length, int state)
++{
++	kref_init(&new->be_refcnt);
++	/* don't need to INIT_LIST_HEAD(&new->be_node) */
++	memcpy(&new->be_devid, &orig->be_devid, sizeof(struct pnfs_deviceid));
++	new->be_mdev = orig->be_mdev;
++	new->be_f_offset = offset;
++	new->be_length = length;
++	new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset;
++	new->be_state = state;
++	new->be_inval = orig->be_inval;
++}
++
++/* Tries to merge be with extent in front of it in list.
++ * Frees storage if not used.
++ */
++static struct pnfs_block_extent *
++_front_merge(struct pnfs_block_extent *be, struct list_head *head,
++	     struct pnfs_block_extent *storage)
++{
++	struct pnfs_block_extent *prev;
++
++	if (!storage)
++		goto no_merge;
++	if (&be->be_node == head || be->be_node.prev == head)
++		goto no_merge;
++	prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node);
++	if ((prev->be_f_offset + prev->be_length != be->be_f_offset) ||
++	    !extents_consistent(prev, be))
++		goto no_merge;
++	_prep_new_extent(storage, prev, prev->be_f_offset,
++			 prev->be_length + be->be_length, prev->be_state);
++	list_replace(&prev->be_node, &storage->be_node);
++	put_extent(prev);
++	list_del(&be->be_node);
++	put_extent(be);
++	return storage;
++
++ no_merge:
++	kfree(storage);
++	return be;
++}
++
++static u64
++set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length)
++{
++	u64 rv = offset + length;
++	struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old;
++	struct pnfs_block_extent *children[3];
++	struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL;
++	int i = 0, j;
++
++	dprintk("%s(%llu, %llu)\n", __func__, offset, length);
++	/* Create storage for up to three new extents e1, e2, e3 */
++	e1 = kmalloc(sizeof(*e1), GFP_KERNEL);
++	e2 = kmalloc(sizeof(*e2), GFP_KERNEL);
++	e3 = kmalloc(sizeof(*e3), GFP_KERNEL);
++	/* BUG - we are ignoring any failure */
++	if (!e1 || !e2 || !e3)
++		goto out_nosplit;
++
++	spin_lock(&bl->bl_ext_lock);
++	be = find_get_extent_locked(bl, offset);
++	rv = be->be_f_offset + be->be_length;
++	if (be->be_state != PNFS_BLOCK_INVALID_DATA) {
++		spin_unlock(&bl->bl_ext_lock);
++		goto out_nosplit;
++	}
++	/* Add e* to children, bumping e*'s krefs */
++	if (be->be_f_offset != offset) {
++		_prep_new_extent(e1, be, be->be_f_offset,
++				 offset - be->be_f_offset,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e1;
++		print_bl_extent(e1);
++	} else
++		merge1 = e1;
++	_prep_new_extent(e2, be, offset,
++			 min(length, be->be_f_offset + be->be_length - offset),
++			 PNFS_BLOCK_READWRITE_DATA);
++	children[i++] = e2;
++	print_bl_extent(e2);
++	if (offset + length < be->be_f_offset + be->be_length) {
++		_prep_new_extent(e3, be, e2->be_f_offset + e2->be_length,
++				 be->be_f_offset + be->be_length -
++				 offset - length,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e3;
++		print_bl_extent(e3);
++	} else
++		merge2 = e3;
++
++	/* Remove be from list, and insert the e* */
++	/* We don't get refs on e*, since this list is the base reference
++	 * set when init'ed.
++	 */
++	if (i < 3)
++		children[i] = NULL;
++	new = children[0];
++	list_replace(&be->be_node, &new->be_node);
++	put_extent(be);
++	new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1);
++	for (j = 1; j < i; j++) {
++		old = new;
++		new = children[j];
++		list_add(&new->be_node, &old->be_node);
++	}
++	if (merge2) {
++		/* This is a HACK, should just create a _back_merge function */
++		new = list_entry(new->be_node.next,
++				 struct pnfs_block_extent, be_node);
++		new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2);
++	}
++	spin_unlock(&bl->bl_ext_lock);
++
++	/* Since we removed the base reference above, be is now scheduled for
++	 * destruction.
++	 */
++	put_extent(be);
++	dprintk("%s returns %llu after split\n", __func__, rv);
++	return rv;
++
++ out_nosplit:
++	kfree(e1);
++	kfree(e2);
++	kfree(e3);
++	dprintk("%s returns %llu without splitting\n", __func__, rv);
++	return rv;
++}
++
++void
++clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			      const struct pnfs_layoutcommit_arg *arg,
++			      int status)
++{
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct pnfs_block_short_extent *lce, *save;
++
++	dprintk("%s status %d\n", __func__, status);
++	list_for_each_entry_safe_reverse(lce, save, &bld->ranges, bse_node) {
++		if (likely(!status)) {
++			u64 offset = lce->bse_f_offset;
++			u64 end = offset + lce->bse_length;
++
++			do {
++				offset = set_to_rw(bl, offset, end - offset);
++			} while (offset < end);
++
++			kfree(lce);
++		} else {
++			spin_lock(&bl->bl_ext_lock);
++			add_to_commitlist(bl, lce);
++			spin_unlock(&bl->bl_ext_lock);
++		}
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
+@@ -0,0 +1,6 @@
++#
++# Makefile for the pNFS block layout driver kernel module
++#
++obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
++blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o \
++			extents.o block-device-discovery-pipe.o
+diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
+--- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
+@@ -8,6 +8,8 @@
+ #ifndef __LINUX_FS_NFS_CALLBACK_H
+ #define __LINUX_FS_NFS_CALLBACK_H
+ 
++#include <linux/pnfs_xdr.h>
++
+ #define NFS4_CALLBACK 0x40000000
+ #define NFS4_CALLBACK_XDRSIZE 2048
+ #define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
+@@ -72,6 +74,8 @@ struct cb_recallargs {
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++#include <linux/pnfs_xdr.h>
++
+ struct referring_call {
+ 	uint32_t			rc_sequenceid;
+ 	uint32_t			rc_slotid;
+@@ -111,6 +115,13 @@ extern int nfs41_validate_delegation_sta
+ 
+ #define RCA4_TYPE_MASK_RDATA_DLG	0
+ #define RCA4_TYPE_MASK_WDATA_DLG	1
++#define RCA4_TYPE_MASK_DIR_DLG         2
++#define RCA4_TYPE_MASK_FILE_LAYOUT     3
++#define RCA4_TYPE_MASK_BLK_LAYOUT      4
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN  8
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+ 
+ struct cb_recallanyargs {
+ 	struct sockaddr	*craa_addr;
+@@ -127,6 +138,37 @@ struct cb_recallslotargs {
+ extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ 					  void *dummy);
+ 
++struct cb_pnfs_layoutrecallargs {
++	struct sockaddr		*cbl_addr;
++	struct nfs_fh		cbl_fh;
++	struct nfs4_pnfs_layout_segment cbl_seg;
++	struct nfs_fsid		cbl_fsid;
++	uint32_t		cbl_recall_type;
++	uint32_t		cbl_layout_type;
++	uint32_t		cbl_layoutchanged;
++	nfs4_stateid		cbl_stateid;
++};
++
++extern unsigned pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
++				     void *dummy);
++
++struct cb_pnfs_devicenotifyitem {
++	uint32_t		cbd_notify_type;
++	uint32_t		cbd_layout_type;
++	struct pnfs_deviceid	cbd_dev_id;
++	uint32_t		cbd_immediate;
++};
++
++/* XXX: Should be dynamic up to max compound size */
++#define NFS4_DEV_NOTIFY_MAXENTRIES 10
++struct cb_pnfs_devicenotifyargs {
++	struct sockaddr			*addr;
++	int				 ndevs;
++	struct cb_pnfs_devicenotifyitem	 devs[NFS4_DEV_NOTIFY_MAXENTRIES];
++};
++
++extern unsigned pnfs_cb_devicenotify(struct cb_pnfs_devicenotifyargs *args,
++				     void *dummy);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
+--- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
+@@ -8,10 +8,15 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/slab.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/writeback.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #ifdef NFS_DEBUG
+ #define NFSDBG_FACILITY NFSDBG_CALLBACK
+@@ -62,16 +67,6 @@ out:
+ 	return res->status;
+ }
+ 
+-static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
+-{
+-#if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion > 0)
+-		return nfs41_validate_delegation_stateid;
+-#endif
+-	return nfs4_validate_delegation_stateid;
+-}
+-
+-
+ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+@@ -92,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_re
+ 		inode = nfs_delegation_find_inode(clp, &args->fh);
+ 		if (inode != NULL) {
+ 			/* Set up a helper thread to actually return the delegation */
+-			switch (nfs_async_inode_return_delegation(inode, &args->stateid,
+-								  nfs_validate_delegation_stateid(clp))) {
++			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ 				case 0:
+ 					res = 0;
+ 					break;
+@@ -116,24 +110,364 @@ out:
+ 
+ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+-	if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
+-					 sizeof(delegation->stateid.data)) != 0)
++	if (delegation == NULL || memcmp(delegation->stateid.u.data,
++					 stateid->u.data,
++					 sizeof(delegation->stateid.u.data)))
+ 		return 0;
+ 	return 1;
+ }
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static bool
++pnfs_is_next_layout_stateid(const struct pnfs_layout_type *lo,
++			    const nfs4_stateid stateid)
++{
++	int seqlock;
++	bool res;
++	u32 oldseqid, newseqid;
++
++	do {
++		seqlock = read_seqbegin(&lo->seqlock);
++		oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
++		newseqid = be32_to_cpu(stateid.u.stateid.seqid);
++		res = !memcmp(lo->stateid.u.stateid.other,
++			      stateid.u.stateid.other,
++			      NFS4_STATEID_OTHER_SIZE);
++		if (res) { /* comparing layout stateids */
++			if (oldseqid == ~0)
++				res = (newseqid == 1);
++			else
++				res = (newseqid == oldseqid + 1);
++		} else { /* open stateid */
++			res = !memcmp(lo->stateid.u.data,
++				      &zero_stateid,
++				      NFS4_STATEID_SIZE);
++			if (res)
++				res = (newseqid == 1);
++		}
++	} while (read_seqretry(&lo->seqlock, seqlock));
++
++	return res;
++}
++
++/*
++ * Retrieve an inode based on layout recall parameters
++ *
++ * Note: caller must iput(inode) to dereference the inode.
++ */
++static struct inode *
++nfs_layoutrecall_find_inode(struct nfs_client *clp,
++			    const struct cb_pnfs_layoutrecallargs *args)
++{
++	struct nfs_inode *nfsi;
++	struct pnfs_layout_type *layout;
++	struct nfs_server *server;
++	struct inode *ino = NULL;
++
++	dprintk("%s: Begin recall_type=%d clp %p\n",
++		__func__, args->cbl_recall_type, clp);
++
++	spin_lock(&clp->cl_lock);
++	list_for_each_entry(layout, &clp->cl_layouts, lo_layouts) {
++		nfsi = PNFS_NFS_INODE(layout);
++		if (!nfsi)
++			continue;
++
++		dprintk("%s: Searching inode=%lu\n",
++			__func__, nfsi->vfs_inode.i_ino);
++
++		if (args->cbl_recall_type == RETURN_FILE) {
++		    if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
++			continue;
++		} else if (args->cbl_recall_type == RETURN_FSID) {
++			server = NFS_SERVER(&nfsi->vfs_inode);
++			if (server->fsid.major != args->cbl_fsid.major ||
++			    server->fsid.minor != args->cbl_fsid.minor)
++				continue;
++		}
++
++		/* Make sure client didn't clean up layout without
++		 * telling the server */
++		if (!has_layout(nfsi))
++			continue;
++
++		ino = igrab(&nfsi->vfs_inode);
++		dprintk("%s: Found inode=%p\n", __func__, ino);
++		break;
++	}
++	spin_unlock(&clp->cl_lock);
++	return ino;
++}
++
++struct recall_layout_threadargs {
++	struct inode *inode;
++	struct nfs_client *clp;
++	struct completion started;
++	struct cb_pnfs_layoutrecallargs *rl;
++	int result;
++};
++
++static int pnfs_recall_layout(void *data)
++{
++	struct inode *inode, *ino;
++	struct nfs_client *clp;
++	struct cb_pnfs_layoutrecallargs rl;
++	struct nfs4_pnfs_layoutreturn *lrp;
++	struct recall_layout_threadargs *args =
++		(struct recall_layout_threadargs *)data;
++	int status = 0;
++
++	daemonize("nfsv4-layoutreturn");
++
++	dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
++		__func__, args->rl->cbl_recall_type,
++		args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
++
++	clp = args->clp;
++	inode = args->inode;
++	rl = *args->rl;
++
++	/* support whole file layouts only */
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	if (rl.cbl_recall_type == RETURN_FILE) {
++		if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
++						rl.cbl_stateid))
++			status = pnfs_return_layout(inode, &rl.cbl_seg,
++						    &rl.cbl_stateid, RETURN_FILE,
++						    false);
++		else
++			status = cpu_to_be32(NFS4ERR_DELAY);
++		if (status)
++			dprintk("%s RETURN_FILE error: %d\n", __func__, status);
++		else
++			status =  cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++		args->result = status;
++		complete(&args->started);
++		goto out;
++	}
++
++	status = cpu_to_be32(NFS4_OK);
++	args->result = status;
++	complete(&args->started);
++	args = NULL;
++
++	/* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
++	while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
++		/* FIXME: need to check status on pnfs_return_layout */
++		pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
++		iput(ino);
++	}
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (!lrp) {
++		dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
++			__func__);
++		goto out;
++	}
++
++	/* send final layoutreturn */
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = rl.cbl_layout_type;
++	lrp->args.return_type = rl.cbl_recall_type;
++	lrp->args.lseg = rl.cbl_seg;
++	lrp->args.inode = inode;
++	pnfs4_proc_layoutreturn(lrp, true);
++
++out:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	nfs_put_client(clp);
++	module_put_and_exit(0);
++	dprintk("%s: exit status %d\n", __func__, 0);
++	return 0;
++}
++
++/*
++ * Asynchronous layout recall!
++ */
++static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
++				    struct cb_pnfs_layoutrecallargs *rl)
++{
++	struct recall_layout_threadargs data = {
++		.clp = clp,
++		.inode = inode,
++		.rl = rl,
++	};
++	struct task_struct *t;
++	int status = -EAGAIN;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* FIXME: do not allow two concurrent layout recalls */
++	if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
++		return status;
++
++	init_completion(&data.started);
++	__module_get(THIS_MODULE);
++	if (!atomic_inc_not_zero(&clp->cl_count))
++		goto out_put_no_client;
++
++	t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
++	if (IS_ERR(t)) {
++		printk(KERN_INFO "NFS: Layout recall callback thread failed "
++			"for client (clientid %08x/%08x)\n",
++			(unsigned)(clp->cl_clientid >> 32),
++			(unsigned)(clp->cl_clientid));
++		status = PTR_ERR(t);
++		goto out_module_put;
++	}
++	wait_for_completion(&data.started);
++	return data.result;
++out_module_put:
++	nfs_put_client(clp);
++out_put_no_client:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	module_put(THIS_MODULE);
++	return status;
++}
++
++static int pnfs_recall_all_layouts(struct nfs_client *clp)
++{
++	struct cb_pnfs_layoutrecallargs rl;
++	struct inode *inode;
++	int status = 0;
++
++	rl.cbl_recall_type = RETURN_ALL;
++	rl.cbl_seg.iomode = IOMODE_ANY;
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	/* we need the inode to get the nfs_server struct */
++	inode = nfs_layoutrecall_find_inode(clp, &rl);
++	if (!inode)
++		return status;
++	status = pnfs_async_return_layout(clp, inode, &rl);
++	iput(inode);
++
++	return status;
++}
++
++__be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
++			    void *dummy)
++{
++	struct nfs_client *clp;
++	struct inode *inode = NULL;
++	__be32 res;
++	int status;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
++	clp  = nfs_find_client(args->cbl_addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->cbl_addr));
++		goto out;
++	}
++
++	res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		/* the callback must come from the MDS personality */
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
++			goto loop;
++		if (args->cbl_recall_type == RETURN_FILE) {
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (inode != NULL) {
++				status = pnfs_async_return_layout(clp, inode,
++								  args);
++				if (status)
++					res = cpu_to_be32(NFS4ERR_DELAY);
++				iput(inode);
++			}
++		} else { /* _ALL or _FSID */
++			/* we need the inode to get the nfs_server struct */
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (!inode)
++				goto loop;
++			status = pnfs_async_return_layout(clp, inode, args);
++			if (status)
++				res = cpu_to_be32(NFS4ERR_DELAY);
++			iput(inode);
++		}
++loop:
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
++/* Remove the deviceid(s) from the nfs_client deviceid cache */
++static __be32 pnfs_devicenotify_client(struct nfs_client *clp,
++				       struct cb_pnfs_devicenotifyargs *args)
++{
++	uint32_t type;
++	int i;
++
++	dprintk("%s: --> clp %p\n", __func__, clp);
++
++	for (i = 0; i < args->ndevs; i++) {
++		struct cb_pnfs_devicenotifyitem *dev = &args->devs[i];
++		type = dev->cbd_notify_type;
++		if (type == NOTIFY_DEVICEID4_DELETE && clp->cl_devid_cache)
++			nfs4_delete_device(clp->cl_devid_cache,
++					   &dev->cbd_dev_id);
++		else if (type == NOTIFY_DEVICEID4_CHANGE)
++			printk(KERN_ERR "%s: NOTIFY_DEVICEID4_CHANGE "
++					"not supported\n", __func__);
++	}
++	return 0;
++}
++
++__be32 pnfs_cb_devicenotify(struct cb_pnfs_devicenotifyargs *args,
++			    void *dummy)
++{
++	struct nfs_client *clp;
++	__be32 res = 0;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = __constant_htonl(NFS4ERR_INVAL);
++	clp = nfs_find_client(args->addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->addr));
++		goto out;
++	}
++
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		res = pnfs_devicenotify_client(clp, args);
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
+ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+ 	if (delegation == NULL)
+ 		return 0;
+ 
+-	/* seqid is 4-bytes long */
+-	if (((u32 *) &stateid->data)[0] != 0)
++	if (stateid->u.stateid.seqid != 0)
+ 		return 0;
+-	if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
+-		   sizeof(stateid->data)-4))
++	if (memcmp(&delegation->stateid.u.stateid.other,
++		   &stateid->u.stateid.other,
++		   NFS4_STATEID_OTHER_SIZE))
+ 		return 0;
+ 
+ 	return 1;
+@@ -335,13 +669,37 @@ out:
+ 	return status;
+ }
+ 
++static inline bool
++validate_bitmap_values(const unsigned long *mask)
++{
++	int i;
++
++	if (*mask == 0)
++		return true;
++	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
++	    test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
++		return true;
++	for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	return false;
++}
++
+ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+ 	__be32 status;
+ 	fmode_t flags = 0;
+ 
+-	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
++	status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ 	clp = nfs_find_client(args->craa_addr, 4);
+ 	if (clp == NULL)
+ 		goto out;
+@@ -349,16 +707,25 @@ __be32 nfs4_callback_recallany(struct cb
+ 	dprintk("NFS: RECALL_ANY callback request from %s\n",
+ 		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ 
++	status = cpu_to_be32(NFS4ERR_INVAL);
++	if (!validate_bitmap_values((const unsigned long *)
++				    &args->craa_type_mask))
++		return status;
++
++	status = cpu_to_be32(NFS4_OK);
+ 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags = FMODE_READ;
+ 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags |= FMODE_WRITE;
++	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
++		     &args->craa_type_mask))
++		if (pnfs_recall_all_layouts(clp) == -EAGAIN)
++			status = cpu_to_be32(NFS4ERR_DELAY);
+ 
+ 	if (flags)
+ 		nfs_expire_all_delegation_types(clp, flags);
+-	status = htonl(NFS4_OK);
+ out:
+ 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ 	return status;
+diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
+--- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
+@@ -22,6 +22,8 @@
+ #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ 
+ #if defined(CONFIG_NFS_V4_1)
++#define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
++#define CB_OP_DEVICENOTIFY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+ 					4 + 1 + 3)
+ #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+@@ -136,7 +138,7 @@ static __be32 decode_stateid(struct xdr_
+ 	p = read_buf(xdr, 16);
+ 	if (unlikely(p == NULL))
+ 		return htonl(NFS4ERR_RESOURCE);
+-	memcpy(stateid->data, p, 16);
++	memcpy(stateid->u.data, p, 16);
+ 	return 0;
+ }
+ 
+@@ -220,6 +222,148 @@ out:
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static __be32 decode_pnfs_layoutrecall_args(struct svc_rqst *rqstp,
++					    struct xdr_stream *xdr,
++					    struct cb_pnfs_layoutrecallargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++
++	args->cbl_addr = svc_addr(rqstp);
++	p = read_buf(xdr, 4 * sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_BADXDR);
++		goto out;
++	}
++
++	args->cbl_layout_type = ntohl(*p++);
++	args->cbl_seg.iomode = ntohl(*p++);
++	args->cbl_layoutchanged = ntohl(*p++);
++	args->cbl_recall_type = ntohl(*p++);
++
++	if (likely(args->cbl_recall_type == RETURN_FILE)) {
++		status = decode_fh(xdr, &args->cbl_fh);
++		if (unlikely(status != 0))
++			goto out;
++
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_seg.offset);
++		p = xdr_decode_hyper(p, &args->cbl_seg.length);
++		status = decode_stateid(xdr, &args->cbl_stateid);
++		if (unlikely(status != 0))
++			goto out;
++	} else if (args->cbl_recall_type == RETURN_FSID) {
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
++		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
++	}
++	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d "
++		"fsid %llx-%llx fhsize %d\n", __func__,
++		args->cbl_layout_type, args->cbl_seg.iomode,
++		args->cbl_layoutchanged, args->cbl_recall_type,
++		args->cbl_fsid.major, args->cbl_fsid.minor,
++		args->cbl_fh.size);
++out:
++	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
++	return status;
++}
++
++static
++__be32 decode_pnfs_devicenotify_args(struct svc_rqst *rqstp,
++				     struct xdr_stream *xdr,
++				     struct cb_pnfs_devicenotifyargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++	u32 tmp;
++	int n, i;
++	args->ndevs = 0;
++
++	args->addr = svc_addr(rqstp);
++
++	/* Num of device notifications */
++	p = read_buf(xdr, sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_RESOURCE);
++		goto out;
++	}
++	n = ntohl(*p++);
++	if (n <= 0)
++		goto out;
++
++	/* XXX: need to possibly return error in this case */
++	if (n > NFS4_DEV_NOTIFY_MAXENTRIES) {
++		dprintk("%s: Processing (%d) notifications out of (%d)\n",
++			__func__, NFS4_DEV_NOTIFY_MAXENTRIES, n);
++		n = NFS4_DEV_NOTIFY_MAXENTRIES;
++	}
++
++	/* Decode each dev notification */
++	for (i = 0; i < n; i++) {
++		struct cb_pnfs_devicenotifyitem *dev = &args->devs[i];
++
++		p = read_buf(xdr, (4 * sizeof(uint32_t))
++			     + NFS4_PNFS_DEVICEID4_SIZE);
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_RESOURCE);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* bitmap size */
++		if (tmp != 1) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_notify_type = ntohl(*p++);
++		if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		    dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* opaque size */
++		if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 8)) ||
++		    ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 4))) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_layout_type = ntohl(*p++);
++		memcpy(dev->cbd_dev_id.data, p, NFS4_PNFS_DEVICEID4_SIZE);
++		p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++
++		if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
++			p = read_buf(xdr, sizeof(uint32_t));
++			if (unlikely(p == NULL)) {
++				status = htonl(NFS4ERR_DELAY);
++				goto out;
++			}
++			dev->cbd_immediate = ntohl(*p++);
++		} else {
++			dev->cbd_immediate = 0;
++		}
++
++		args->ndevs++;
++
++		dprintk("%s: type %d layout 0x%x immediate %d\n",
++			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
++			dev->cbd_immediate);
++	}
++out:
++	dprintk("%s: status %d ndevs %d\n",
++		__func__, ntohl(status), args->ndevs);
++	return status;
++}
++
+ static __be32 decode_sessionid(struct xdr_stream *xdr,
+ 				 struct nfs4_sessionid *sid)
+ {
+@@ -574,11 +718,11 @@ preprocess_nfs41_op(int nop, unsigned in
+ 	case OP_CB_SEQUENCE:
+ 	case OP_CB_RECALL_ANY:
+ 	case OP_CB_RECALL_SLOT:
++	case OP_CB_LAYOUTRECALL:
++	case OP_CB_NOTIFY_DEVICEID:
+ 		*op = &callback_ops[op_nr];
+ 		break;
+ 
+-	case OP_CB_LAYOUTRECALL:
+-	case OP_CB_NOTIFY_DEVICEID:
+ 	case OP_CB_NOTIFY:
+ 	case OP_CB_PUSH_DELEG:
+ 	case OP_CB_RECALLABLE_OBJ_AVAIL:
+@@ -739,6 +883,18 @@ static struct callback_op callback_ops[]
+ 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+ 	},
+ #if defined(CONFIG_NFS_V4_1)
++	[OP_CB_LAYOUTRECALL] = {
++		.process_op = (callback_process_op_t)pnfs_cb_layoutrecall,
++		.decode_args =
++			(callback_decode_arg_t)decode_pnfs_layoutrecall_args,
++		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
++	},
++	[OP_CB_NOTIFY_DEVICEID] = {
++		.process_op = (callback_process_op_t)pnfs_cb_devicenotify,
++		.decode_args =
++			(callback_decode_arg_t)decode_pnfs_devicenotify_args,
++		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
++	},
+ 	[OP_CB_SEQUENCE] = {
+ 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
+ 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
+@@ -39,6 +39,7 @@
+ #include <net/ipv6.h>
+ #include <linux/nfs_xdr.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -48,6 +49,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_CLIENT
+ 
+@@ -150,11 +152,14 @@ static struct nfs_client *nfs_alloc_clie
+ 	clp->cl_boot_time = CURRENT_TIME;
+ 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+ 	clp->cl_minorversion = cl_init->minorversion;
++	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ #endif
+ 	cred = rpc_lookup_machine_cred();
+ 	if (!IS_ERR(cred))
+ 		clp->cl_machine_cred = cred;
+-
++#if defined(CONFIG_NFS_V4_1)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++#endif
+ 	nfs_fscache_get_client_cookie(clp);
+ 
+ 	return clp;
+@@ -178,7 +183,7 @@ static void nfs4_clear_client_minor_vers
+ 		clp->cl_session = NULL;
+ 	}
+ 
+-	clp->cl_call_sync = _nfs4_call_sync;
++	clp->cl_mvops = nfs_v4_minor_ops[0];
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
+@@ -188,7 +193,7 @@ static void nfs4_clear_client_minor_vers
+ static void nfs4_destroy_callback(struct nfs_client *clp)
+ {
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+-		nfs_callback_down(clp->cl_minorversion);
++		nfs_callback_down(clp->cl_mvops->minor_version);
+ }
+ 
+ static void nfs4_shutdown_client(struct nfs_client *clp)
+@@ -251,6 +256,7 @@ void nfs_put_client(struct nfs_client *c
+ 		nfs_free_client(clp);
+ 	}
+ }
++EXPORT_SYMBOL(nfs_put_client);
+ 
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ /*
+@@ -343,7 +349,7 @@ static int nfs_sockaddr_match_ipaddr(con
+  * Test if two socket addresses represent the same actual socket,
+  * by comparing (only) relevant fields, including the port number.
+  */
+-static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ 			    const struct sockaddr *sa2)
+ {
+ 	if (sa1->sa_family != sa2->sa_family)
+@@ -357,6 +363,7 @@ static int nfs_sockaddr_cmp(const struct
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs_sockaddr_cmp);
+ 
+ /*
+  * Find a client by IP address and protocol version
+@@ -548,6 +555,7 @@ int nfs4_check_client_ready(struct nfs_c
+ 		return -EPROTONOSUPPORT;
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs4_check_client_ready);
+ 
+ /*
+  * Initialise the timeout values for a connection
+@@ -865,9 +873,34 @@ error:
+ }
+ 
+ /*
++ * Initialize the pNFS layout driver and setup pNFS related parameters
++ */
++static void nfs4_init_pnfs(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
++{
++#if defined(CONFIG_NFS_V4_1)
++	struct nfs_client *clp = server->nfs_client;
++
++	if (nfs4_has_session(clp) &&
++	    (clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
++		server->pnfs_blksize = fsinfo->blksize;
++		set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype);
++		pnfs_set_ds_iosize(server);
++	}
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++static void nfs4_uninit_pnfs(struct nfs_server *server)
++{
++#if defined(CONFIG_NFS_V4_1)
++	if (server->nfs_client && nfs4_has_session(server->nfs_client))
++		unmount_pnfs_layoutdriver(server);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++/*
+  * Load up the server record from information gained in an fsinfo record
+  */
+-static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
++static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
+ {
+ 	unsigned long max_rpc_payload;
+ 
+@@ -897,6 +930,8 @@ static void nfs_server_set_fsinfo(struct
+ 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ 		server->wsize = NFS_MAX_FILE_IO_SIZE;
+ 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++	nfs4_init_pnfs(server, mntfh, fsinfo);
++
+ 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+ 
+ 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+@@ -938,7 +973,7 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	if (error < 0)
+ 		goto out_error;
+ 
+-	nfs_server_set_fsinfo(server, &fsinfo);
++	nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ 
+ 	/* Get some general file system info */
+ 	if (server->namelen == 0) {
+@@ -1016,6 +1051,7 @@ void nfs_free_server(struct nfs_server *
+ {
+ 	dprintk("--> nfs_free_server()\n");
+ 
++	nfs4_uninit_pnfs(server);
+ 	spin_lock(&nfs_client_lock);
+ 	list_del(&server->client_link);
+ 	list_del(&server->master_link);
+@@ -1126,7 +1162,7 @@ static int nfs4_init_callback(struct nfs
+ 				return error;
+ 		}
+ 
+-		error = nfs_callback_up(clp->cl_minorversion,
++		error = nfs_callback_up(clp->cl_mvops->minor_version,
+ 					clp->cl_rpcclient->cl_xprt);
+ 		if (error < 0) {
+ 			dprintk("%s: failed to start callback. Error = %d\n",
+@@ -1143,10 +1179,8 @@ static int nfs4_init_callback(struct nfs
+  */
+ static int nfs4_init_client_minor_version(struct nfs_client *clp)
+ {
+-	clp->cl_call_sync = _nfs4_call_sync;
+-
+ #if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion) {
++	if (clp->cl_mvops->minor_version) {
+ 		struct nfs4_session *session = NULL;
+ 		/*
+ 		 * Create the session and mark it expired.
+@@ -1158,7 +1192,13 @@ static int nfs4_init_client_minor_versio
+ 			return -ENOMEM;
+ 
+ 		clp->cl_session = session;
+-		clp->cl_call_sync = _nfs4_call_sync_session;
++		/*
++		 * The create session reply races with the server back
++		 * channel probe. Mark the client NFS_CS_SESSION_INITING
++		 * so that the client back channel can find the
++		 * nfs_client struct
++		 */
++		clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+@@ -1216,7 +1256,7 @@ error:
+ /*
+  * Set up an NFS4 client
+  */
+-static int nfs4_set_client(struct nfs_server *server,
++int nfs4_set_client(struct nfs_server *server,
+ 		const char *hostname,
+ 		const struct sockaddr *addr,
+ 		const size_t addrlen,
+@@ -1259,6 +1299,7 @@ error:
+ 	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ 	return error;
+ }
++EXPORT_SYMBOL(nfs4_set_client);
+ 
+ 
+ /*
+@@ -1448,7 +1489,7 @@ struct nfs_server *nfs4_create_referral_
+ 				data->authflavor,
+ 				parent_server->client->cl_xprt->prot,
+ 				parent_server->client->cl_timeout,
+-				parent_client->cl_minorversion);
++				parent_client->cl_mvops->minor_version);
+ 	if (error < 0)
+ 		goto error;
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
+@@ -0,0 +1,292 @@
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/sched.h>
++#include <linux/exportfs.h>
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/workqueue.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/nfs_fs.h>
++
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd4_block.h>
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++static ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++    char __user *, size_t);
++static ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
++static void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops bl_upcall_ops = {
++	.upcall		= bl_pipe_upcall,
++	.downcall	= bl_pipe_downcall,
++	.destroy_msg	= bl_pipe_destroy_msg,
++};
++
++bl_comm_t	*bl_comm_global;
++
++int
++nfsd_bl_start(void)
++{
++	bl_comm_t	*bl_comm = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	dprintk("%s: starting pipe\n", __func__);
++	if (bl_comm_global)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	bl_comm = kzalloc(sizeof (*bl_comm), GFP_KERNEL);
++	if (!bl_comm) {
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	/* FIXME: rename to "spnfs_block" */
++	bl_comm->pipe_dentry = rpc_mkpipe(nd.path.dentry, "pnfs_block", bl_comm,
++					 &bl_upcall_ops, 0);
++	if (IS_ERR(bl_comm->pipe_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++	mutex_init(&bl_comm->lock);
++	mutex_init(&bl_comm->pipe_lock);
++	init_waitqueue_head(&bl_comm->pipe_wq);
++
++	bl_comm_global = bl_comm;
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(bl_comm);
++	return rc;
++}
++
++void
++nfsd_bl_stop(void)
++{
++	bl_comm_t	*c = bl_comm_global;
++
++	dprintk("%s: stopping pipe\n", __func__);
++	if (!c)
++		return;
++	rpc_unlink(c->pipe_dentry);
++	rpc_put_mount();
++	bl_comm_global = NULL;
++	kfree(c);
++}
++
++static ssize_t
++bl_pipe_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *dst,
++    size_t buflen)
++{
++	char	*data	= (char *)msg->data + msg->copied;
++	ssize_t	mlen	= msg->len - msg->copied,
++		left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen		-= left;
++	msg->copied	+= mlen;
++	msg->errno	= 0;
++
++	return mlen;
++}
++
++static ssize_t
++bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode	*rpci	= RPC_I(filp->f_dentry->d_inode);
++	bl_comm_t		*bc	= (bl_comm_t *)rpci->private;
++	bl_comm_msg_t		*im	= &bc->msg;
++	int			ret;
++	bl_comm_res_t		*res;
++	
++
++	if (mlen == 0) {
++		im->msg_status = PNFS_BLOCK_FAILURE;
++		im->msg_res = NULL;
++		wake_up(&bc->pipe_wq);
++		return -EFAULT;
++	}
++	
++	if ((res = kmalloc(mlen, GFP_KERNEL)) == NULL)
++		return -ENOMEM;
++	
++	if (copy_from_user(res, src, mlen)) {
++		kfree(res);
++		return -EFAULT;
++	}
++	
++	mutex_lock(&bc->pipe_lock);
++	
++	ret		= mlen;
++	im->msg_status	= res->res_status;
++	im->msg_res	= res;
++	
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++	return ret;
++}
++
++static void
++bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	bl_comm_msg_t	*im = msg->data;
++	bl_comm_t	*bc = container_of(im, struct bl_comm, msg);
++	
++	if (msg->errno >= 0)
++		return;
++
++	mutex_lock(&bc->pipe_lock);
++	im->msg_status = PNFS_BLOCK_FAILURE;
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++}
++
++int
++bl_upcall(bl_comm_t *bc, bl_comm_msg_t *upmsg, bl_comm_res_t **res)
++{
++	struct rpc_pipe_msg	msg;
++	DECLARE_WAITQUEUE(wq, current);
++	int			rval	= 1;
++	bl_comm_msg_t		*m	= &bc->msg;
++	
++	if (bc == NULL) {
++		dprintk("%s: No pNFS block daemon available\n", __func__);
++		return 1;
++	}
++	
++	mutex_lock(&bc->lock);
++	mutex_lock(&bc->pipe_lock);
++	
++	memcpy(m, upmsg, sizeof (*m));
++	
++	memset(&msg, 0, sizeof (msg));
++	msg.data = m;
++	msg.len = sizeof (*m);
++	
++	add_wait_queue(&bc->pipe_wq, &wq);
++	rval = rpc_queue_upcall(bc->pipe_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&bc->pipe_wq, &wq);
++		goto out;
++	}
++	
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&bc->pipe_lock);
++	schedule();
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(&bc->pipe_wq, &wq);
++	mutex_lock(&bc->pipe_lock);
++	
++	if (m->msg_status == PNFS_BLOCK_SUCCESS) {
++		*res = m->msg_res;
++		rval = 0;
++	} else
++		rval = 1;
++	
++out:
++	mutex_unlock(&bc->pipe_lock);
++	mutex_unlock(&bc->lock);
++	return rval;
++}
++
++static ssize_t ctl_write(struct file *file, const char __user *buf, size_t len,
++    loff_t *offset)
++{
++	int		cmd,
++			rc;
++	bl_comm_t	*bc	= bl_comm_global;
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof (int)))
++		return -EFAULT;
++	switch (cmd) {
++	case PNFS_BLOCK_CTL_STOP:
++		msg.msg_type = PNFS_UPCALL_MSG_STOP;
++		(void) bl_upcall(bc, &msg, &res);
++		kfree(res);
++		nfsd_bl_stop();
++		break;
++		
++	case PNFS_BLOCK_CTL_START:
++		rc = nfsd_bl_start();
++		if (rc != 0)
++			return rc;
++		break;
++		
++	case PNFS_BLOCK_CTL_VERS:
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bc, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++			    __func__);
++			return 0;
++		}
++		kfree(res);
++		break;
++		
++	default:
++		dprintk("%s: unknown ctl command %d\n", __func__, cmd);
++		break;
++	}
++	return len;
++}
++
++static struct file_operations ctl_ops = {
++	.write	= ctl_write,
++};
++
++/*
++ * bl_init_proc -- set up proc interfaces
++ *
++ * Creating a pnfs_block directory isn't really required at this point
++ * since we've only got a single node in that directory. If the need for
++ * more nodes doesn't present itself shortly this code should revert
++ * to a single top level node. McNeal 11-Aug-2008.
++ */
++int
++bl_init_proc(void)
++{
++	struct proc_dir_entry *e;
++
++	e = proc_mkdir("fs/pnfs_block", NULL);
++	if (!e)
++		return -ENOMEM;
++
++	e = create_proc_entry("fs/pnfs_block/ctl", 0, NULL);
++	if (!e)
++		return -ENOMEM;
++	e->proc_fops = &ctl_ops;
++
++	return 0;
++}
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
+@@ -0,0 +1,1672 @@
++/*
++ *  bl_ops.c
++ *  spNFS
++ *
++ *  Created by Rick McNeal on 4/1/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++
++/*
++ * Block layout operations.
++ *
++ * These functions, with the exception of pnfs_block_enabled, are assigned to
++ * the super block s_export_op structure.
++ */
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/genhd.h>
++#include <linux/fs.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd/export.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/spinlock_types.h>
++#include <linux/dm-ioctl.h>
++#include <asm/uaccess.h>
++#include <linux/falloc.h>
++#include <linux/nfsd4_block.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY	NFSDDBG_PNFS
++
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++
++#define BL_LAYOUT_HASH_BITS	4
++#define BL_LAYOUT_HASH_SIZE	(1 << BL_LAYOUT_HASH_BITS)
++#define BL_LAYOUT_HASH_MASK	(BL_LAYOUT_HASH_SIZE - 1)
++#define BL_LIST_REQ	(sizeof (struct dm_ioctl) + 256)
++
++#define bl_layout_hashval(id) \
++	((id) & BL_LAYOUT_HASH_MASK)
++
++#define BLL_F_END(p) ((p)->bll_foff + (p)->bll_len)
++#define BLL_S_END(p) ((p)->bll_soff + (p)->bll_len)
++#define _2SECTS(v) ((v) >> 9)
++
++#ifndef READ32
++#define READ32(x)	(x) = ntohl(*p++)
++#define READ64(x)	do {			\
++(x) = (u64)ntohl(*p++) << 32;	\
++(x) |= ntohl(*p++);		\
++} while (0)
++#endif
++
++
++typedef enum {True, False} boolean_t;
++/* ---- block layoutget and commit structure ---- */
++typedef struct bl_layout_rec {
++	struct list_head	blr_hash,
++				blr_layouts;
++	dev_t			blr_rdev;
++	struct inode		*blr_inode;
++	int			blr_recalled;	// debug
++	u64			blr_orig_size,
++				blr_commit_size,
++				blr_ext_size;
++	spinlock_t		blr_lock;	// Protects blr_layouts
++} bl_layout_rec_t;
++
++static struct list_head layout_hash;
++static struct list_head layout_hashtbl[BL_LAYOUT_HASH_SIZE];
++static spinlock_t layout_hashtbl_lock;
++
++/* ---- prototypes ---- */
++static boolean_t device_slice(dev_t devid);
++static boolean_t device_dm(dev_t devid);
++static boolean_t layout_inode_add(struct inode *i, bl_layout_rec_t **);
++static bl_layout_rec_t *layout_inode_find(struct inode *i);
++static void layout_inode_del(struct inode *i);
++static char *map_state2name(enum pnfs_block_extent_state4 s);
++static pnfs_blocklayout_devinfo_t *bld_alloc(struct list_head *volume, int type);
++static void bld_free(pnfs_blocklayout_devinfo_t *bld);
++static pnfs_blocklayout_devinfo_t *bld_simple(struct list_head *volumes,
++    dev_t devid, int local_index);
++static pnfs_blocklayout_devinfo_t *bld_slice(struct list_head *volumes,
++    dev_t devid, int my_loc, int idx);
++static int layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg);
++struct list_head *layout_cache_iter(bl_layout_rec_t *r,
++    struct list_head *bl_possible, struct nfsd4_layout_seg *seg);
++static void layout_cache_merge(bl_layout_rec_t *r, struct list_head *h);
++static int layout_cache_update(bl_layout_rec_t *r, struct list_head *h);
++static void layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg);
++static void print_bll(pnfs_blocklayout_layout_t *b, char *);
++static inline boolean_t layout_cache_fill_from_list(bl_layout_rec_t *r,
++    struct list_head *h, struct nfsd4_layout_seg *seg);
++static inline void bll_collapse(bl_layout_rec_t *r,
++    pnfs_blocklayout_layout_t *c);
++static pnfs_blocklayout_layout_t *bll_alloc(u64 offset, u64 len,
++    enum bl_cache_state state, struct list_head *h);
++static pnfs_blocklayout_layout_t *bll_alloc_dup(pnfs_blocklayout_layout_t *b,
++    enum bl_cache_state c, struct list_head *h);
++static inline boolean_t layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++    enum pnfs_block_extent_state4 *s);
++static void extents_setup(struct fiemap_extent_info *fei);
++static void extents_count(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_get(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_process(struct fiemap_extent_info *fei,
++    struct list_head *bl_candidates, struct nfsd4_layout_seg *, dev_t dev,
++    pnfs_blocklayout_layout_t *b);
++static void extents_cleanup(struct fiemap_extent_info *fei);
++
++void
++nfsd_bl_init(void)
++{
++	int	i;
++	dprintk("%s loaded\n", __func__);
++
++	spin_lock_init(&layout_hashtbl_lock);
++	INIT_LIST_HEAD(&layout_hash);
++	for (i = 0; i < BL_LAYOUT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&layout_hashtbl[i]);
++	bl_init_proc();
++}
++
++/*
++ * pnfs_block_enabled -- check to see if this file system should be export as
++ * block pnfs
++ */
++int
++pnfs_block_enabled(struct inode *inode, int ex_flags)
++{
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res	= NULL;
++	static int bl_comm_once	= 0;
++	
++	dprintk("--> %s\n", __func__);
++	/*
++	 * FIXME: Figure out method to determine if this file system should
++	 * be exported. The following areas need to be checked.
++	 * (1) Validate that this file system was exported as a pNFS
++	 *     block-layout
++	 * (2) Has there been successful communication with the
++	 *     volume daemon?
++	 */
++	/* Check #1 */
++#ifdef notyet
++	if (!(ex_flags & NFSEXP_PNFS_BLOCK)) {
++		dprintk("%s: pnfs_block not set in export\n", __func__);
++		return 0;
++	}
++#endif
++	
++	/* Check #1 */
++	if (!bl_comm_once) {
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bl_comm_global, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++				__func__);
++			return 0;
++		}
++		if (msg.u.msg_vers != res->u.vers) {
++			dprintk("%s: vers mismatch, kernel != daemon\n",
++				__func__);
++			kfree(res);
++			return 0;
++		}
++	}
++	bl_comm_once = 1;
++
++	kfree(res);
++	
++	dprintk("<-- %s okay\n", __func__);
++	return 1;
++}
++
++int
++bl_layout_type(struct super_block *sb)
++{
++	return LAYOUT_BLOCK_VOLUME;
++}
++
++int
++bl_getdeviceiter(struct super_block *sb,
++		 u32 layout_type,
++		 struct nfsd4_pnfs_dev_iter_res *res)
++{
++	res->gd_eof = 1;	
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_devid	= sb->s_dev;
++	res->gd_verf	= 1;
++	res->gd_cookie	= 1;
++	return 0;
++}
++
++static int
++bl_getdeviceinfo_slice(struct super_block *sb, struct exp_xdr_stream *xdr,
++		       const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld_slice_p,
++					*bld_simple_p,
++					*bld;
++	int				status		= -EIO,
++					location	= 0;
++	struct list_head		volumes;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++
++	bld_simple_p = bld_simple(&volumes, devid->devid,
++				  location++);
++	if (!bld_simple_p)
++		goto out;
++	bld_slice_p = bld_slice(&volumes, devid->devid, location++,
++	    bld_simple_p->bld_index_loc);
++
++	if (!bld_slice_p)
++		goto out;
++	
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		if (bld->bld_type == PNFS_BLOCK_VOLUME_SIMPLE)
++			kfree(bld->u.simple.bld_sig);
++		bld_free(bld);
++	}
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++static int
++bl_getdeviceinfo_dm(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld		= NULL;
++	int				status		= -EIO,	// default to error
++					i,
++					location	= 0;
++	struct list_head		volumes;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++	
++	msg.msg_type = PNFS_UPCALL_MSG_DMGET;
++	msg.u.msg_dev = devid->devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: upcall for DMGET failed\n", __func__);
++		goto out;
++	}
++		
++	/*
++	 * Don't use bld_alloc() here. If used this will be the first volume
++	 * type added to the list whereas the protocol requires it to be the
++	 * last.
++	 */
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		goto out;
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type			= PNFS_BLOCK_VOLUME_STRIPE;
++	bld->u.stripe.bld_stripes	= res->u.stripe.num_stripes;
++	bld->u.stripe.bld_chunk_size	= res->u.stripe.stripe_size * 512LL;
++	dprintk("%s: stripes %d, chunk_size %Lu\n", __func__,
++	    bld->u.stripe.bld_stripes, bld->u.stripe.bld_chunk_size / 512LL);
++	
++	bld->u.stripe.bld_stripe_indexs = kmalloc(bld->u.stripe.bld_stripes *
++						  sizeof (int), GFP_KERNEL);
++	if (!bld->u.stripe.bld_stripe_indexs)
++		goto out;
++
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++) {
++		dev_t			dev;
++		pnfs_blocklayout_devinfo_t	*bldp;
++		
++		dev = MKDEV(res->u.stripe.devs[i].major,
++			    res->u.stripe.devs[i].minor);
++		if (dev == 0)
++			goto out;
++		
++		bldp = bld_simple(&volumes, dev, location++);
++		if (!bldp) {
++			dprintk("%s: bld_simple failed\n", __func__);
++			goto out;
++		}
++		bldp = bld_slice(&volumes, dev, location++, bldp->bld_index_loc);
++
++		if (!bldp) {
++			dprintk("%s: bld_slice failed\n", __func__);
++			goto out;
++		}
++		bld->u.stripe.bld_stripe_indexs[i] = bldp->bld_index_loc;
++
++	}
++	list_add_tail(&bld->bld_list, &volumes);
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++	
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SLICE:
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				// No memory to release for these
++				break;
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				kfree(bld->u.simple.bld_sig);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				kfree(bld->u.stripe.bld_stripe_indexs);
++				break;
++		}
++		bld_free(bld);
++	}
++	kfree(res);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++/*
++ * bl_getdeviceinfo -- determine device tree for requested devid
++ */
++int
++bl_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		 u32 layout_type,
++		 const struct nfsd4_pnfs_deviceid *devid)
++{
++	if (device_slice(devid->devid) == True)
++		return bl_getdeviceinfo_slice(sb, xdr, devid);
++	else if (device_dm(devid->devid) == True)
++		return bl_getdeviceinfo_dm(sb, xdr, devid);
++	return -EINVAL;
++}
++
++enum nfsstat4
++bl_layoutget(struct inode *i, struct exp_xdr_stream *xdr,
++	     const struct nfsd4_pnfs_layoutget_arg *arg,
++	     struct nfsd4_pnfs_layoutget_res *res)
++{
++	pnfs_blocklayout_layout_t	*b;
++	bl_layout_rec_t			*r;
++	struct list_head		bl_possible,
++					*bl_candidates	= NULL;
++	boolean_t			del_on_error	= False;
++	int				adj;
++	enum nfsstat4			nfserr		= NFS4_OK;
++	
++	dprintk("--> %s (inode=[0x%x:%lu], offset=%Lu, len=%Lu, iomode=%d)\n",
++	    __func__, i->i_sb->s_dev, i->i_ino, _2SECTS(res->lg_seg.offset),
++	    _2SECTS(res->lg_seg.length), res->lg_seg.iomode);
++
++	if (res->lg_seg.length == 0) {
++		printk("%s: request length of 0, error condition\n", __func__);
++		return NFS4ERR_BADLAYOUT;
++	}
++	
++	/*
++	 * Adjust the length as required per spec.
++	 * - First case is were the length is set to (u64)-1. Cheap means to
++	 *   define the end of the file.
++	 * - Second case is were the I/O mode is read-only, but the request is
++	 *   past the end of the file so the request needs to be trimed.
++	 */
++	if ((res->lg_seg.length == NFS4_MAX_UINT64) ||
++	    (((res->lg_seg.offset + res->lg_seg.length) > i->i_size) &&
++	     (res->lg_seg.iomode == IOMODE_READ)))
++		res->lg_seg.length = i->i_size - res->lg_seg.offset;
++	
++	adj = (res->lg_seg.offset & 511) ? res->lg_seg.offset & 511 : 0;
++	res->lg_seg.offset -= adj;
++	res->lg_seg.length = (res->lg_seg.length + adj + 511) & ~511;
++	
++	if (res->lg_seg.iomode != IOMODE_READ)
++		if (i->i_op->fallocate(i, FALLOC_FL_KEEP_SIZE,
++				       res->lg_seg.offset, res->lg_seg.length))
++			return NFS4ERR_IO;
++		
++	INIT_LIST_HEAD(&bl_possible);
++	
++	if ((r = layout_inode_find(i)) == NULL) {
++		if (layout_inode_add(i, &r) == False) {
++			printk("%s: layout_inode_add failed\n", __func__);
++			return NFS4ERR_IO;
++		}
++		del_on_error = True;
++	}
++	BUG_ON(!r);
++	
++	spin_lock(&r->blr_lock);
++	
++	if (layout_cache_fill_from(r, &bl_possible, &res->lg_seg)) {
++		/*
++		 * This will send LAYOUTTRYAGAIN error to the client.
++		 */
++		dprintk("%s: layout_cache_fill_from() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	res->lg_return_on_close	= 1;
++	res->lg_seg.length	= 0;
++	
++	bl_candidates = layout_cache_iter(r, &bl_possible, &res->lg_seg);
++	if (!bl_candidates) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	layout_cache_merge(r, bl_candidates);
++	if (layout_cache_update(r, bl_candidates)) {
++		/* ---- Failed to allocate memory. ---- */
++		dprintk("%s: layout_cache_update() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	nfserr = blocklayout_encode_layout(xdr, bl_candidates);
++	if (nfserr)
++		dprintk("%s: layoutget xdr routine failed\n", __func__);
++	
++layoutget_cleanup:
++	if (bl_candidates) {
++		while (!list_empty(bl_candidates)) {
++			b = list_entry(bl_candidates->next,
++			    struct pnfs_blocklayout_layout, bll_list);
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++	}
++
++	spin_unlock(&r->blr_lock);
++	if (unlikely(nfserr)) {
++		if (del_on_error == True)
++			layout_inode_del(i);
++		res->lg_seg.length = 0;
++		res->lg_seg.offset = 0;
++	}
++	
++	dprintk("<-- %s (rval %u)\n", __func__, nfserr);
++	return nfserr;
++}
++
++/*
++ * bl_layoutcommit -- commit changes, especially size, to file systemj
++ *
++ * Currently this routine isn't called and everything is handled within
++ * nfsd4_layoutcommit(). By not calling this routine the server doesn't
++ * handle a partial return, a set of extents, of the layout. The extents
++ * are decoded here, but nothing is done with them. If this routine is
++ * be called the interface must change to pass the 'dentry' pointer such
++ * that notify_change() can be called.
++ */
++int
++bl_layoutcommit(struct inode *i,
++		const struct nfsd4_pnfs_layoutcommit_arg *args,
++		struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	bl_layout_rec_t			*r;
++	int				status	= 0;
++	u64				lw_plus;
++	
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	r = layout_inode_find(i);
++	if (r) {
++		lw_plus = args->lc_last_wr + 1;
++		if (args->lc_newoffset) {
++			dprintk("  lc_last_wr %Lu\n", lw_plus);
++			if (r->blr_orig_size < lw_plus) {
++				r->blr_orig_size	= lw_plus;
++				res->lc_size_chg	= 1;
++				res->lc_newsize		= lw_plus;
++			}
++		}
++
++		if (args->lc_up_len) {
++			int	extents,
++				i;
++			struct pnfs_blocklayout_layout *b;
++			__be32 *p = args->lc_up_layout;
++			
++			/*
++			 * Client is returning a set of extents which
++			 * should/could be used to update the file system.
++			 * See section 2.3.2 in draft-ietf-nfsv4-pnfs-block-08
++			 */
++			READ32(extents);
++			dprintk("  Client returning %d extents: data size %d\n",
++			    extents, args->lc_up_len);
++			b = kmalloc(sizeof (struct pnfs_blocklayout_layout) *
++				    extents, GFP_KERNEL);
++			if (b) {
++				for (i = 0; i < extents; i++) {
++					READ64(b[i].bll_vol_id.sbid);
++					READ64(b[i].bll_vol_id.devid);
++					READ64(b[i].bll_foff);
++					READ64(b[i].bll_len);
++					READ64(b[i].bll_soff);
++					READ32(b[i].bll_es);
++					dprintk("  %d: foff %Lu, len %Lu, soff %Lu "
++					    "state %s\n",
++					    i, _2SECTS(b[i].bll_foff),
++					    _2SECTS(b[i].bll_len),
++					    _2SECTS(b[i].bll_soff),
++					    map_state2name(b[i].bll_es));
++				}
++				kfree(b);
++			} else {
++				status = -ENOMEM;
++			}
++		}
++	} else
++		dprintk("%s: Unexpected commit to inode %p\n", __func__, i);
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutreturn(struct inode *i,
++		const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	int				status	= 0;
++	bl_layout_rec_t			*r;
++
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	
++	r = layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		layout_cache_del(r, &args->lr_seg);
++		spin_unlock(&r->blr_lock);
++		dprintk("    ext_size %Lu, i_size %Lu, orig_size %Lu\n",
++		    r->blr_ext_size, i->i_size, r->blr_orig_size);
++	}
++
++	layout_inode_del(i);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block		*sb;
++	struct nfsd4_pnfs_cb_layout	lr;
++	bl_layout_rec_t			*r;
++	pnfs_blocklayout_layout_t	*b;
++	u64				adj;
++	
++	dprintk("--> %s\n", __func__);
++	BUG_ON(!len);
++	switch (type) {
++		case RETURN_FILE:
++			sb = inode->i_sb;
++			dprintk("  recalling layout [0x%x:%lu], %Lu:%Lu\n",
++			    inode->i_sb->s_dev, inode->i_ino,
++				_2SECTS(offset), _2SECTS(len));
++			break;
++		case RETURN_FSID:
++			sb = inode->i_sb;
++			dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++				__func__);
++			return 0;
++		case RETURN_ALL:
++			/*
++			 * XXX figure out how to get a sb since there's no
++			 * inode ptr
++			 */
++			dprintk("%s: recalling all layouts (unimplemented)\n",
++				__func__);
++			return 0;
++		default:
++			return -EINVAL;
++	}
++	
++restart:
++	r = layout_inode_find(inode);
++	if (r && len && !r->blr_recalled) {
++		spin_lock(&r->blr_lock);
++		list_for_each_entry(b, &r->blr_layouts, bll_list) {
++			if (!r->blr_recalled && !b->bll_recalled &&
++			    (offset >= b->bll_foff) && (offset < BLL_F_END(b))) {
++				b->bll_recalled		= 1;
++				lr.cbl_recall_type	= type;
++				lr.cbl_seg.layout_type	= LAYOUT_BLOCK_VOLUME;
++				lr.cbl_seg.clientid	= 0;
++				lr.cbl_seg.offset	= 0;
++				lr.cbl_seg.length	= NFS4_MAX_UINT64;
++				r->blr_recalled		= 1;
++				dprintk("  FULL LAYOUTRECALL\n");
++				lr.cbl_seg.iomode = IOMODE_ANY;
++
++				/*
++				 * Currently there are only two cases where the
++				 * layout is being returned.
++				 *    (1) Someone is issuing a NFS_WRITE operation
++				 *        to this layout.
++				 *    (2) The file has been truncated which means
++				 *        the layout is immediately made invalid.
++				 * In both cases the client must write any
++				 * uncommitted modifications to the server via
++				 * NFS_WRITE.
++				 */
++				lr.cbl_layoutchanged = 1;
++
++				/*
++				 * Need to drop the lock because we'll get a
++				 * layoutreturn which will block waiting for
++				 * the lock. The request will come in on the
++				 * same thread which will cause a deadlock.
++				 */
++				spin_unlock(&r->blr_lock);
++				nfsd_layout_recall_cb(sb, inode, &lr);
++				adj = MIN(b->bll_len - (offset - b->bll_foff),
++				    len);
++				offset += adj;
++				len -= adj;
++				if (!len) {
++					spin_lock(&r->blr_lock);
++					break;
++				}
++				/*
++				 * Since layoutreturn will have been called we
++				 * can't assume blr_layouts is still valid,
++				 * so restart.
++				 */
++				goto restart;
++			}
++		}
++		spin_unlock(&r->blr_lock);
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++/*
++ * []------------------------------------------------------------------[]
++ * | Support functions from here on down.				|
++ * []------------------------------------------------------------------[]
++ */
++
++/*
++ * bld_simple -- given a dev_t build a simple volume structure
++ *
++ * Simple volume contains the device signature and offset to that data in
++ * the storage volume.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_simple(struct list_head *volumes, dev_t devid, int local_index)
++{
++	pnfs_blocklayout_devinfo_t	*bld	= NULL;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res	= NULL;
++	
++	msg.msg_type = PNFS_UPCALL_MSG_GETSIG;
++	msg.u.msg_dev = devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: Failed to get signature information\n", __func__);
++		goto error;
++	}
++	
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SIMPLE);
++	if (!bld)
++		return NULL;
++	
++	bld->u.simple.bld_offset = (res->u.sig.sector * 512LL) + res->u.sig.offset;
++	bld->u.simple.bld_sig_len = res->u.sig.len;
++	bld->u.simple.bld_sig = kmalloc(res->u.sig.len, GFP_KERNEL);
++	if (!bld->u.simple.bld_sig)
++		goto error;
++	
++	memcpy(bld->u.simple.bld_sig, res->u.sig.sig, res->u.sig.len);
++	kfree(res);
++	return bld;
++	
++error:
++	if (bld)
++		bld_free(bld);
++	if (res)
++		kfree(res);
++	dprintk("%s: error in bld_simple\n", __func__);
++	return NULL;
++}
++
++/*
++ * bld_slice -- given a dev_t build a slice volume structure
++ *
++ * A slice volume contains the length of the slice/partition and its offset
++ * from the beginning of the storage volume. There's also a reference to
++ * the "simple" volume which contains this slice.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_slice(struct list_head *volumes, dev_t devid, int my_loc, int simple_loc)
++{
++	pnfs_blocklayout_devinfo_t	*bld;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SLICE);
++	if (!bld)
++		return NULL;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_GETSLICE;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Upcall to get slice info failed\n");
++		bld_free(bld);
++		return NULL;
++	}
++	
++	bld->bld_devid.devid = devid;
++	bld->bld_index_loc	= my_loc;
++	bld->u.slice.bld_start	= res->u.slice.start * 512LL;
++	bld->u.slice.bld_len	= res->u.slice.length * 512LL;
++	bld->u.slice.bld_index	= simple_loc;
++
++	dprintk("%s: start %Lu, len %Lu\n", __func__,
++		bld->u.slice.bld_start / 512LL, bld->u.slice.bld_len / 512LL);
++
++	kfree(res);
++	dprintk("<-- %s (rval %p)\n", __func__, bld);
++	return bld;
++}
++
++static int
++layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	dprintk("--> %s\n", __func__);
++	
++	if (!list_empty(&r->blr_layouts))
++		if (layout_cache_fill_from_list(r, h, seg) == False)
++			return -EIO;
++	
++	/*
++	 * This deals with two conditions.
++	 *    (1) When blr_layouts is empty we need to create the first entry
++	 *    (2) When the range requested falls past the end of any current
++	 *        layout the residual must be taken care of.
++	 */	
++	if (seg->length) {
++		n = bll_alloc(seg->offset, seg->length, BLOCK_LAYOUT_NEW, h);
++		if (!n)
++			return -ENOMEM;
++		dprintk("  remaining at %Lu, len %Lu\n", _2SECTS(n->bll_foff),
++			_2SECTS(n->bll_len));
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++struct list_head *
++layout_cache_iter(bl_layout_rec_t *r, struct list_head *bl_possible,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n		= NULL;
++	struct list_head		*bl_candidates	= NULL;
++	struct fiemap_extent_info	fei;
++	struct inode			*i;
++	dev_t				dev;
++	
++	dev	= r->blr_rdev;
++	i	= r->blr_inode;
++	
++	dprintk("--> %s\n", __func__);
++	bl_candidates = kmalloc(sizeof (*bl_candidates), GFP_KERNEL);
++	if (!bl_candidates)
++		return NULL;
++	INIT_LIST_HEAD(bl_candidates);
++	extents_setup(&fei);
++	
++	list_for_each_entry(b, bl_possible, bll_list) {
++		if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			extents_count(&fei, i, b->bll_foff, b->bll_len);
++			if (fei.fi_extents_mapped) {
++				
++				/*
++				 * Common case here. Got a range which has
++				 * extents. Now get those extents and process
++				 * them into pNFS extents.
++				 */
++				if (extents_get(&fei, i, b->bll_foff,
++				    b->bll_len) == False)
++					goto cleanup;
++				if (extents_process(&fei, bl_candidates,
++				    seg, dev, b) == False)
++					goto cleanup;
++				extents_cleanup(&fei);
++				
++			} else if (seg->iomode == IOMODE_READ) {
++				
++				/*
++				 * Found a hole in a file while reading. No 
++				 * problem, just create a pNFS extent for the
++				 * range and let the client know there's no
++				 * backing store.
++				 */
++				n = bll_alloc(b->bll_foff, b->bll_len,
++				    BLOCK_LAYOUT_NEW, bl_candidates);
++				n->bll_es = PNFS_BLOCK_NONE_DATA;
++				n->bll_vol_id.sbid = 0;
++				n->bll_vol_id.devid = dev;
++				seg->length += b->bll_len;
++			} else {
++				
++				/*
++				 * There's a problem here. Since the iomode
++				 * is read/write fallocate should have allocated
++				 * any necessary storage for the given range.
++				 */
++				dprintk("    Extent count for RW is 0\n");
++				goto cleanup;
++			}
++			
++		} else {
++			n = bll_alloc_dup(b, b->bll_cache_state, bl_candidates);
++			seg->length += n->bll_len;
++		}
++
++		if (r->blr_ext_size < (b->bll_foff + b->bll_len))
++			r->blr_ext_size = b->bll_foff + b->bll_len;
++	}
++	
++	while (!list_empty(bl_possible)) {
++		b = list_entry(bl_possible->next,
++		    struct pnfs_blocklayout_layout, bll_list);
++		list_del(&b->bll_list);
++		kfree(b);
++	}
++		
++	b = list_first_entry(bl_candidates, struct pnfs_blocklayout_layout,
++	    bll_list);
++	seg->offset = b->bll_foff;
++	dprintk("<-- %s okay\n", __func__);
++	return bl_candidates;
++	
++cleanup:
++	extents_cleanup(&fei);
++	if (bl_candidates)
++		kfree(bl_candidates);
++	dprintk("<-- %s, error occurred\n", __func__);
++	return NULL;
++}
++
++/*
++ * layout_cache_merge -- collapse layouts which make up a contiguous range.
++ */
++static void
++layout_cache_merge(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*p;
++	
++	dprintk("--> %s\n", __func__);
++restart:
++	p = NULL;
++	list_for_each_entry(b, h, bll_list) {
++		if (p && (BLL_S_END(p) == b->bll_soff) &&
++		    (p->bll_es == b->bll_es) &&
++		    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++			/*
++			 * We've got a condidate.
++			 */
++#ifdef too_verbose
++			dprintk("  merge %Lu(f):%Lu(l):%Lu(s) into %Lu(f):%Lu(l):%Lu(s)\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff),
++				_2SECTS(p->bll_foff), _2SECTS(p->bll_len),
++				_2SECTS(b->bll_soff));
++#endif
++			
++			if (p->bll_cache_state == BLOCK_LAYOUT_CACHE)
++				p->bll_cache_state = BLOCK_LAYOUT_UPDATE;
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else if (p && (BLL_F_END(p) == b->bll_foff) &&
++			   (p->bll_es == b->bll_es) &&
++			   (b->bll_es == PNFS_BLOCK_NONE_DATA)) {
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else
++			p = b;
++	}
++	dprintk("<-- %s\n", __func__);
++}
++
++static int
++layout_cache_update(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*c,
++					*n;
++	boolean_t			status = 0;
++	
++	dprintk("--> %s\n", __func__);
++	if (list_empty(&r->blr_layouts)) {
++		/* ---- Just add entries and return ---- */
++		dprintk("  cache empty for inode 0x%x:%ld\n", r->blr_rdev,
++			r->blr_inode->i_ino);
++		list_for_each_entry(b, h, bll_list) {
++			c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE,
++					  &r->blr_layouts);
++			if (!c) {
++				status = -ENOMEM;
++				break;
++			}
++			dprintk("    adding %Lu(f):%Lu(l):%Lu(s):%d\n",
++				_2SECTS(c->bll_foff), _2SECTS(c->bll_len),
++				_2SECTS(c->bll_soff), c->bll_es);
++		}
++		return status;
++	}
++	
++	list_for_each_entry(b, h, bll_list) {
++		BUG_ON(!b->bll_vol_id.devid);
++		if (b->bll_cache_state == BLOCK_LAYOUT_UPDATE) {
++			boolean_t found = False;
++			list_for_each_entry(c, &r->blr_layouts, bll_list) {
++				if ((b->bll_soff >= c->bll_soff) &&
++				    (b->bll_soff < BLL_S_END(c)) &&
++				    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++					u64	u;
++					
++					if ((b->bll_foff < c->bll_foff) ||
++					    (b->bll_foff > BLL_F_END(c)))
++						BUG();
++					
++					u = BLL_S_END(b) - BLL_S_END(c);
++					/*
++					 * The updated cache entry has to be
++					 * different than the current.
++					 * Otherwise the cache state for 'b'
++					 * should be BLOCK_LAYOUT_CACHE.
++					 */
++					BUG_ON(BLL_S_END(b) < BLL_S_END(c));
++					
++					dprintk("  "
++						"updating %Lu(f):%Lu(l):%Lu(s) to len %Lu\n",
++						_2SECTS(c->bll_foff),
++						_2SECTS(c->bll_len),
++						_2SECTS(c->bll_soff),
++						_2SECTS(c->bll_len + u));
++					c->bll_len += u;
++					bll_collapse(r, c);
++					found = True;
++					break;
++				}
++			}
++
++			if (found == False) {
++				dprintk("  ERROR Expected to find"
++				    " %Lu(f):%Lu(l):%Lu(s), but didn't\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff));
++				list_for_each_entry(c, &r->blr_layouts, bll_list)
++					print_bll(c, "Cached");
++				BUG();
++			}
++		} else if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			c = list_first_entry(&r->blr_layouts,
++			    struct pnfs_blocklayout_layout, bll_list);
++			if (b->bll_foff < c->bll_foff) {
++				/*
++				 * Special case where new entry is before
++				 * first cached entry.
++				 */
++				c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE, NULL);
++				list_add(&c->bll_list, &r->blr_layouts);
++				dprintk("  new entry at head of list at %Lu, "
++					"len %Lu\n",
++					_2SECTS(c->bll_foff), _2SECTS(c->bll_len));
++			} else {
++				list_for_each_entry(c, &r->blr_layouts,
++				    bll_list) {
++					n = list_entry(c->bll_list.next,
++					    struct pnfs_blocklayout_layout,
++					    bll_list);
++					/*
++					 * This is ugly, but can't think of
++					 * another way to examine this case.
++					 * Consider the following. Need to
++					 * add an entry which starts at 40
++					 * and the cache has the following
++					 * entries:
++					 * Start    Length
++					 * 10       5
++					 * 30       5
++					 * 50       5
++					 * So, need to look and see if the new
++					 * entry starts after the current
++					 * cache, but before the next one.
++					 * There's a catch in that the next
++					 * entry might not be valid as it's
++					 * really just a pointer to the list
++					 * head.
++					 */
++					if (((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (c->bll_list.next == &r->blr_layouts)) ||
++					    ((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (b->bll_foff < n->bll_foff))) {
++						
++						n = bll_alloc_dup(b,
++								  BLOCK_LAYOUT_CACHE, NULL);
++						dprintk("  adding new %Lu:%Lu"
++							" after %Lu:%Lu\n",
++							_2SECTS(n->bll_foff),
++							_2SECTS(n->bll_len),
++							_2SECTS(c->bll_foff),
++							_2SECTS(c->bll_len));
++						list_add(&n->bll_list,
++							 &c->bll_list);
++						break;
++					}
++				}
++			}
++		}
++	}
++	dprintk("<-- %s\n", __func__);
++	return status;
++}
++
++static void
++layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg_in)
++{
++	struct pnfs_blocklayout_layout	*b,
++					*n;
++	u64				len;
++	struct nfsd4_layout_seg		seg = *seg_in;
++	
++	dprintk("--> %s\n", __func__);
++	if (seg.length == NFS4_MAX_UINT64) {
++		r->blr_recalled = 0;
++		dprintk("  Fast return of all layouts\n");
++		while (!list_empty(&r->blr_layouts)) {
++			b = list_entry(r->blr_layouts.next,
++				       struct pnfs_blocklayout_layout, bll_list);
++			dprintk("    foff %Lu, len %Lu, soff %Lu\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff));
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++		dprintk("<-- %s\n", __func__);
++		return;
++	}
++
++restart:
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg.offset == b->bll_foff) {
++			/*
++			 * This handle the following three cases:
++			 * (1) return layout matches entire cache layout
++			 * (2) return layout matches beginning portion of cache
++			 * (3) return layout matches entire cache layout and
++			 *     into next entry. Varies from #1 in end case.
++			 */
++			dprintk("  match on offsets, %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length));
++			len = MIN(seg.length, b->bll_len);
++			b->bll_foff	+= len;
++			b->bll_soff	+= len;
++			b->bll_len	-= len;
++			seg.length	-= len;
++			seg.offset	+= len;
++			if (!b->bll_len) {
++				list_del(&b->bll_list);
++				kfree(b);
++				dprintk("    removing cache line\n");
++				if (!seg.length) {
++					dprintk("    also finished\n");
++					goto complete;
++				}
++				/*
++				 * Since 'b' was freed we can't continue at the
++				 * next entry which is referenced as
++				 * b->bll_list.next by the list_for_each_entry
++				 * macro. Need to restart the loop.
++				 * TODO: Think about creating a dummy 'b' which
++				 *       would keep list_for_each_entry() happy.
++				 */
++				goto restart;
++			}
++			if (!seg.length) {
++				dprintk("    finished, but cache line not"
++					"empty\n");
++				goto complete;
++			}
++		} else if ((seg.offset >= b->bll_foff) &&
++		    (seg.offset < BLL_F_END(b))) {
++			/*
++			 * layout being returned is within this cache line.
++			 */
++			dprintk("  layout %Lu:%Lu within cache line %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length),
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			BUG_ON(!seg.length);
++			if ((seg.offset + seg.length) >= BLL_F_END(b)) {
++				/*
++				 * Layout returned starts in the middle of
++				 * cache entry and just need to trim back
++				 * cache to shorter length.
++				 */
++				dprintk("    trim back cache line\n");
++				len = seg.offset - b->bll_foff;
++				seg.offset += b->bll_len - len;
++				seg.length -= b->bll_len - len;
++				b->bll_len = len;
++				if (!seg.length)
++					return;
++			} else {
++				/*
++				 * Need to split current cache layout because
++				 * chunk is being removed from the middle.
++				 */
++				dprintk("    split cache line\n");
++				len = seg.offset + seg.length;
++				n = bll_alloc(len,
++					      (b->bll_foff + b->bll_len) - len,
++					      BLOCK_LAYOUT_CACHE, NULL);
++				n->bll_soff = b->bll_soff + len;
++				list_add(&n->bll_list, &b->bll_list);
++				b->bll_len = seg.offset - b->bll_foff;
++				return;
++			}
++		}
++	}
++complete:
++	if (list_empty(&r->blr_layouts))
++		r->blr_recalled = 0;
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++ * layout_cache_fill_from_list -- fills from cache list
++ *
++ * NOTE: This routine was only seperated out from layout_cache_file_from()
++ * to reduce the indentation level which makes the code easier to read.
++ */
++static inline boolean_t
++layout_cache_fill_from_list(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n;
++	enum pnfs_block_extent_state4	s;
++	
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg->offset < b->bll_foff) {
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, b->bll_foff - seg->offset),
++			    BLOCK_LAYOUT_NEW, NULL);
++			if (!n)
++				return False;
++			
++			list_add(&n->bll_list, h->prev);
++			dprintk("  new: %Lu:%Lu, added before %Lu:%Lu\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++		
++		if ((seg->offset >= b->bll_foff) &&
++		    (seg->offset < BLL_F_END(b))) {
++			if (layout_conflict(b, seg->iomode, &s) == False) {
++				dprintk("  CONFLICT FOUND: "
++				    "%Lu(f):%Lu(l):%Lu(s) state %d, iomode %d\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff), b->bll_es,
++				    seg->iomode);
++				return False;
++			}
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, BLL_F_END(b) - seg->offset),
++			    BLOCK_LAYOUT_CACHE, h);
++			dprintk("  CACHE hit: Found %Lu(f):%Lu(l): "
++			    "in %Lu(f):%Lu(l):%Lu(s):%d\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++			    _2SECTS(b->bll_soff), b->bll_es);
++			if (!n)
++				return False;
++			
++			n->bll_soff = b->bll_soff + seg->offset - b->bll_foff;
++			n->bll_vol_id.sbid = 0;
++			n->bll_vol_id.devid = b->bll_vol_id.devid;
++			n->bll_es = s;
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++	}
++	return True;
++}
++
++static u64
++bll_alloc_holey(struct list_head *bl_candidates, u64 offset, u64 length,
++    dev_t dev)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	n = bll_alloc(offset, length, BLOCK_LAYOUT_NEW, bl_candidates);
++	if (!n)
++		return 0;
++	n->bll_es = PNFS_BLOCK_NONE_DATA;
++	n->bll_vol_id.sbid = 0;
++	n->bll_vol_id.devid = dev;
++	
++	return n->bll_len;
++}
++
++static void
++extents_setup(struct fiemap_extent_info *fei)
++{
++	fei->fi_extents_start	= NULL;
++}
++
++/*
++ * extents_count -- Determine the number of extents for a given range.
++ *
++ * No need to call set_fs() here because the function
++ * doesn't use copy_to_user() if it's only counting
++ * the number of extents needed.
++ */
++static void
++extents_count(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	dprintk("    Need fiemap of %Ld:%Ld\n", _2SECTS(foff), _2SECTS(len));
++	fei->fi_flags		= FIEMAP_FLAG_SYNC;
++	fei->fi_extents_max	= 0;
++	fei->fi_extents_start	= NULL;
++	fei->fi_extents_mapped	= 0;
++	i->i_op->fiemap(i, fei, foff, len + (1 << i->i_sb->s_blocksize_bits) - 1);
++}
++
++/*
++ * extents_get -- Get list of extents for range
++ *
++ * extents_count() must have been called before this routine such that
++ * fi_extents_mapped is known.
++ */
++static boolean_t
++extents_get(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	int			m_space,
++				rval;
++	struct fiemap_extent	*fe;
++	mm_segment_t		old_fs = get_fs();
++	
++	/*
++	 * Now malloc the correct amount of space
++	 * needed. It's possible for the file to have changed
++	 * between calls which would require more space for
++	 * the extents. If that occurs the last extent will
++	 * not have FIEMAP_EXTENT_LAST set and the error will
++	 * be caught in extents_process().
++	 */
++	m_space = fei->fi_extents_mapped * sizeof (struct fiemap_extent);
++	fe = kmalloc(m_space, GFP_KERNEL);
++	if (!fe)
++		return False;
++	memset(fe, 0, m_space);
++	
++	fei->fi_extents_max	= fei->fi_extents_mapped;
++	fei->fi_extents_mapped	= 0;
++	fei->fi_extents_start	= fe;
++	
++	set_fs(KERNEL_DS);
++	rval = i->i_op->fiemap(i, fei, foff, len +
++	    (1 << i->i_sb->s_blocksize_bits) - 1);
++	set_fs(old_fs);
++	
++	if (rval || !fei->fi_extents_mapped) {
++		dprintk("    No extents. Wanted %d, got %d\n",
++			fei->fi_extents_max, fei->fi_extents_mapped);
++		kfree(fe);
++		fei->fi_extents_start = NULL;
++		return False;
++	} else
++		return True;
++}
++
++/*
++ * extents_process -- runs through the extent returned from the file system and
++ *	 creates block layout entries.
++ */
++static boolean_t
++extents_process(struct fiemap_extent_info *fei, struct list_head *bl_candidates,
++    struct nfsd4_layout_seg *seg, dev_t dev, pnfs_blocklayout_layout_t *b)
++{
++	struct fiemap_extent		*fep,
++					*fep_last	= NULL;
++	int				i;
++	pnfs_blocklayout_layout_t	*n;
++	u64				last_end,
++					rval;
++	
++	dprintk("--> %s\n", __func__);
++	for (fep = fei->fi_extents_start, i = 0; i < fei->fi_extents_mapped;
++	    i++, fep++) {
++		
++		BUG_ON(!fep->fe_physical);
++		/*
++		 * Deal with corner cases of hoel-y files.
++		 */
++		if (fep_last && ((fep_last->fe_logical + fep_last->fe_length) !=
++				 fep->fe_logical)) {
++			
++			/*
++			 * If the last extent doesn't end logically
++			 * at the beginning of the current we've got
++			 * hole and need to create a pNFS extent.
++			 */
++			dprintk("    Got a hole at %Ld:%Ld \n", 
++			    _2SECTS(fep_last->fe_logical),
++			    _2SECTS(fep_last->fe_length));
++			last_end = fep_last->fe_logical + fep_last->fe_length;
++			rval = bll_alloc_holey(bl_candidates, last_end,
++			    fep->fe_logical - last_end, dev);
++			if (!rval)
++				return False;
++			seg->length += rval;
++		}
++		
++		n = bll_alloc(fep->fe_logical, fep->fe_length,
++		    BLOCK_LAYOUT_NEW, bl_candidates);
++		if (unlikely(n == NULL)) {
++			dprintk("%s: bll_alloc failed\n", __func__);
++			return False;
++		}
++		
++		n->bll_soff = fep->fe_physical;
++		n->bll_es = seg->iomode == IOMODE_READ ?
++		    PNFS_BLOCK_READ_DATA : PNFS_BLOCK_READWRITE_DATA;
++		n->bll_vol_id.sbid = 0;
++		n->bll_vol_id.devid = dev;
++		seg->length += fep->fe_length;
++		print_bll(n, "New extent");
++		fep_last = fep;
++	}
++	dprintk("<-- %s (i=%d)\n", __func__, i);
++	
++	return True;
++}
++
++static void
++extents_cleanup(struct fiemap_extent_info *fei)
++{
++	if (fei->fi_extents_start) {
++		kfree(fei->fi_extents_start);
++		fei->fi_extents_start = NULL;
++	}
++}
++
++/*
++ * device_slice -- check to see if device is a slice or DM
++ */
++static boolean_t
++device_slice(dev_t devid)
++{
++	struct block_device	*bd	= open_by_devnum(devid, FMODE_READ);
++	boolean_t		rval	= False;
++	
++	if (bd) {
++		if (bd->bd_disk->minors > 1)
++			rval = True;
++		blkdev_put(bd, FMODE_READ);
++	}
++	return rval;
++}
++
++/*
++ * device_dm -- check to see if device is a Device Mapper volume.
++ *
++ * Returns 1 for DM or 0 if not
++ */
++static boolean_t
++device_dm(dev_t devid)
++{
++	boolean_t		rval = False;
++	bl_comm_msg_t		msg;
++	bl_comm_res_t		*res;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_DMCHK;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Failed upcall to check on DM status\n");
++	} else if (res->u.dm_vol) {
++		rval = True;
++		dprintk("Device is DM volume\n");
++	} else
++		dprintk("Device is not DM volume\n");
++	kfree(res);
++	
++	return rval;
++}
++
++static boolean_t
++layout_inode_add(struct inode *i, bl_layout_rec_t **p)
++{
++	bl_layout_rec_t		*r	= NULL;
++
++	if (!i->i_op->fiemap || !i->i_op->fallocate) {
++		printk("pNFS: file system doesn't support required fiemap or"
++		    "fallocate methods\n");
++		return False;
++	}
++	
++	r = kmalloc(sizeof (*r), GFP_KERNEL);
++	if (!r)
++		goto error;
++
++	r->blr_rdev	= i->i_sb->s_dev;
++	r->blr_inode	= i;
++	r->blr_orig_size = i->i_size;
++	r->blr_ext_size	= 0;
++	r->blr_recalled	= 0;
++	INIT_LIST_HEAD(&r->blr_layouts);
++	spin_lock_init(&r->blr_lock);
++	spin_lock(&layout_hashtbl_lock);
++	list_add_tail(&r->blr_hash, &layout_hash);
++	spin_unlock(&layout_hashtbl_lock);
++	*p = r;
++	return True;
++	
++error:
++	if (r)
++		kfree(r);
++	return False;
++}
++
++static bl_layout_rec_t *
++__layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	if (!list_empty(&layout_hash)) {
++		list_for_each_entry(r, &layout_hash, blr_hash) {
++			if ((r->blr_inode->i_ino == i->i_ino) &&
++			    (r->blr_rdev == i->i_sb->s_dev)) {
++				return r;
++			}
++		}
++	}
++	return NULL;
++}
++
++static bl_layout_rec_t *
++layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	spin_unlock(&layout_hashtbl_lock);
++	
++	return r;
++}
++
++static void
++layout_inode_del(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		if (list_empty(&r->blr_layouts)) {
++			list_del(&r->blr_hash);
++			spin_unlock(&r->blr_lock);
++			kfree(r);
++		} else {
++			spin_unlock(&r->blr_lock);
++		}
++	} else {
++		dprintk("%s: failed to find inode [0x%x:%lu] in table for delete\n",
++			__func__, i->i_sb->s_dev, i->i_ino);
++	}
++	spin_unlock(&layout_hashtbl_lock);
++}
++
++/*
++ * map_state2name -- converts state in ascii string.
++ *
++ * Used for debug messages only.
++ */
++static char *
++map_state2name(enum pnfs_block_extent_state4 s)
++{
++	switch (s) {
++	case PNFS_BLOCK_READWRITE_DATA:	return "     RW";
++	case PNFS_BLOCK_READ_DATA:	return "     RO";
++	case PNFS_BLOCK_INVALID_DATA:	return "INVALID";
++	case PNFS_BLOCK_NONE_DATA:	return "   NONE";
++	default:
++		BUG();
++	}
++}
++
++static pnfs_blocklayout_devinfo_t *
++bld_alloc(struct list_head *volumes, int type)
++{
++	pnfs_blocklayout_devinfo_t *bld;
++	
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		return NULL;
++
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type = type;
++	list_add_tail(&bld->bld_list, volumes);
++
++	return bld;
++}
++
++static void
++bld_free(pnfs_blocklayout_devinfo_t *bld)
++{
++	list_del(&bld->bld_list);
++	kfree(bld);
++}
++
++static void
++print_bll(pnfs_blocklayout_layout_t *b, char *text)
++{
++	dprintk("    BLL: %s\n", text);
++	dprintk("    foff %Lu, soff %Lu, len %Lu, state %s\n",
++	    _2SECTS(b->bll_foff), _2SECTS(b->bll_soff), _2SECTS(b->bll_len),
++	    map_state2name(b->bll_es));
++}
++
++static inline void
++bll_collapse(bl_layout_rec_t *r, pnfs_blocklayout_layout_t *c)
++{
++	pnfs_blocklayout_layout_t	*n;
++	int				dbg_count	= 0;
++	u64				endpoint;
++	
++	BUG_ON(c->bll_es == PNFS_BLOCK_NONE_DATA);
++	while (c->bll_list.next != &r->blr_layouts) {
++		n = list_entry(c->bll_list.next,
++			       struct pnfs_blocklayout_layout, bll_list);
++		endpoint = BLL_S_END(c);
++		if ((n->bll_soff >= c->bll_soff) &&
++		    (n->bll_soff < endpoint)) {
++			if (endpoint < BLL_S_END(n)) {
++				/*
++				 * The following is possible.
++				 *
++				 * 
++				 * Existing: +---+                 +---+
++				 *      New: +-----------------------+
++				 * The client request merge entries together
++				 * but didn't require picking up all of the
++				 * last entry. So, we still need to delete
++				 * the last entry and add the remaining space
++				 * to the new entry.
++				 */
++				c->bll_len += BLL_S_END(n) - endpoint;
++			}
++			dbg_count++;
++			list_del(&n->bll_list);
++			kfree(n);
++		} else {
++			break;
++		}
++	}
++	/* ---- Debug only, remove before integration ---- */
++	if (dbg_count)
++		dprintk("  Collapsed %d cache entries between %Lu(s) and %Lu(s)\n",
++			dbg_count, _2SECTS(c->bll_soff), _2SECTS(BLL_S_END(c)));
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc(u64 offset, u64 len, enum bl_cache_state state, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = kmalloc(sizeof (*n), GFP_KERNEL);
++	if (n) {
++		memset(n, 0, sizeof (*n));
++		n->bll_foff		= offset;
++		n->bll_len		= len;
++		n->bll_cache_state	= state;
++		if (h)
++			list_add_tail(&n->bll_list, h);
++	}
++	return n;
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc_dup(pnfs_blocklayout_layout_t *b, enum bl_cache_state c,
++	      struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = bll_alloc(b->bll_foff, b->bll_len, c, h);
++	if (n) {
++		n->bll_es			= b->bll_es;
++		n->bll_soff			= b->bll_soff;
++		n->bll_vol_id.devid		= b->bll_vol_id.devid;
++	}
++	return n;
++}
++
++static inline boolean_t
++layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++		enum pnfs_block_extent_state4 *s)
++{
++	/* ---- Normal case ---- */
++	*s = b->bll_es;
++	
++	switch (b->bll_es) {
++	case PNFS_BLOCK_READWRITE_DATA:
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_READ_DATA;
++		/* ---- Any use is permitted. ---- */
++		break;
++	case PNFS_BLOCK_READ_DATA:
++		/* ---- Committed as read only data. ---- */
++		if (iomode == IOMODE_RW)
++			return False;
++		break;
++	case PNFS_BLOCK_INVALID_DATA:
++		/* ---- Blocks have been allocated, but not initialized ---- */
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_NONE_DATA;
++		break;
++	case PNFS_BLOCK_NONE_DATA:
++		/* ---- Hole-y file. No backing store avail. ---- */
++		if (iomode != IOMODE_READ)
++			return False;
++		break;
++	default:
++		BUG();
++	}
++	return True;
++}
++
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
+@@ -104,7 +104,8 @@ again:
+ 			continue;
+ 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 			continue;
+-		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
++		if (memcmp(state->stateid.u.data, stateid->u.data,
++			   sizeof(state->stateid.u.data)) != 0)
+ 			continue;
+ 		get_nfs_open_context(ctx);
+ 		spin_unlock(&inode->i_lock);
+@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct
+ 	if (delegation != NULL) {
+ 		spin_lock(&delegation->lock);
+ 		if (delegation->inode != NULL) {
+-			memcpy(delegation->stateid.data, res->delegation.data,
+-			       sizeof(delegation->stateid.data));
++			memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			       sizeof(delegation->stateid.u.data));
+ 			delegation->type = res->delegation_type;
+ 			delegation->maxsize = res->maxsize;
+ 			oldcred = delegation->cred;
+@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach
+ 	if (delegation == NULL)
+ 		goto nomatch;
+ 	spin_lock(&delegation->lock);
+-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+-				sizeof(delegation->stateid.data)) != 0)
++	if (stateid != NULL && memcmp(delegation->stateid.u.data,
++				      stateid->u.data,
++				      sizeof(delegation->stateid.u.data)) != 0)
+ 		goto nomatch_unlock;
+ 	list_del_rcu(&delegation->super_list);
+ 	delegation->inode = NULL;
+@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inod
+ 	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+-	memcpy(delegation->stateid.data, res->delegation.data,
+-			sizeof(delegation->stateid.data));
++	memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			sizeof(delegation->stateid.u.data));
+ 	delegation->type = res->delegation_type;
+ 	delegation->maxsize = res->maxsize;
+ 	delegation->change_attr = nfsi->change_attr;
+@@ -471,9 +473,7 @@ void nfs_expire_unreferenced_delegations
+ /*
+  * Asynchronous delegation recall!
+  */
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid))
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+ {
+ 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ 	struct nfs_delegation *delegation;
+@@ -481,7 +481,7 @@ int nfs_async_inode_return_delegation(st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(NFS_I(inode)->delegation);
+ 
+-	if (!validate_stateid(delegation, stateid)) {
++	if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+ 		rcu_read_unlock();
+ 		return -ENOENT;
+ 	}
+@@ -562,7 +562,8 @@ int nfs4_copy_delegation_stateid(nfs4_st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(nfsi->delegation);
+ 	if (delegation != NULL) {
+-		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
++		memcpy(dst->u.data, delegation->stateid.u.data,
++		       sizeof(dst->u.data));
+ 		ret = 1;
+ 	}
+ 	rcu_read_unlock();
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
+--- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
+@@ -34,9 +34,7 @@ enum {
+ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ int nfs_inode_return_delegation(struct inode *inode);
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid));
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+ 
+ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
+@@ -17,11 +17,19 @@
+ #include <linux/module.h>
+ #include <linux/exportfs.h>
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++#if defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
++#endif
+ #include <linux/nfsd/syscall.h>
+ #include <net/ipv6.h>
+ 
+ #include "nfsd.h"
+ #include "nfsfh.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
+ 
+@@ -352,6 +360,40 @@ static int svc_export_upcall(struct cach
+ 	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static struct pnfsd_cb_operations pnfsd_cb_op = {
++	.cb_layout_recall = nfsd_layout_recall_cb,
++	.cb_device_notify = nfsd_device_notify_cb,
++
++	.cb_get_state = nfs4_pnfs_cb_get_state,
++	.cb_change_state = nfs4_pnfs_cb_change_state,
++};
++
++#if defined(CONFIG_SPNFS)
++static struct pnfs_export_operations spnfs_export_ops = {
++	.layout_type = spnfs_layout_type,
++	.get_device_info = spnfs_getdeviceinfo,
++	.get_device_iter = spnfs_getdeviceiter,
++	.layout_get = spnfs_layoutget,
++	.layout_return = spnfs_layoutreturn,
++};
++
++static struct pnfs_export_operations spnfs_ds_export_ops = {
++	.get_state = spnfs_get_state,
++};
++
++#if defined(CONFIG_SPNFS_BLOCK)
++static struct pnfs_export_operations bl_export_ops = {
++	.layout_type = bl_layout_type,
++	.get_device_info = bl_getdeviceinfo,
++	.get_device_iter = bl_getdeviceiter,
++	.layout_get = bl_layoutget,
++	.layout_return = bl_layoutreturn,
++};
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_SPNFS */
++#endif /* CONFIG_PNFSD */
++
+ static struct svc_export *svc_export_update(struct svc_export *new,
+ 					    struct svc_export *old);
+ static struct svc_export *svc_export_lookup(struct svc_export *);
+@@ -395,6 +437,47 @@ static int check_export(struct inode *in
+ 		return -EINVAL;
+ 	}
+ 
++#if !defined(CONFIG_SPNFS)
++	if (inode->i_sb->s_pnfs_op &&
++	    (!inode->i_sb->s_pnfs_op->layout_type ||
++	     !inode->i_sb->s_pnfs_op->get_device_info ||
++	     !inode->i_sb->s_pnfs_op->layout_get)) {
++		dprintk("exp_export: export of invalid fs pnfs export ops.\n");
++		return -EINVAL;
++	}
++#endif /* CONFIG_SPNFS */
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	if (!inode->i_sb->s_pnfs_op)
++		pnfsd_lexp_init(inode);
++	return 0;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(inode, *flags)) {
++		dprintk("set pnfs block export structure... \n");
++		inode->i_sb->s_pnfs_op = &bl_export_ops;
++	} else
++#endif /* CONFIG_SPNFS_BLOCK */
++	/*
++	 * spnfs_enabled() indicates we're an MDS.
++	 * XXX Better to check an export time option as well.
++	 */
++	if (spnfs_enabled()) {
++		dprintk("set spnfs export structure...\n");
++		inode->i_sb->s_pnfs_op = &spnfs_export_ops;
++	} else {
++		dprintk("%s spnfs not in use\n", __func__);
++
++		/*
++		 * get_state is needed if we're a DS using spnfs.
++		 * XXX Better to check an export time option instead.
++		 */
++		inode->i_sb->s_pnfs_op = &spnfs_ds_export_ops;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	return 0;
+ 
+ }
+@@ -586,6 +669,8 @@ static int svc_export_parse(struct cache
+ 					if (exp.ex_uuid == NULL)
+ 						err = -ENOMEM;
+ 				}
++			} else if (strcmp(buf, "pnfs") == 0) {
++				exp.ex_pnfs = 1;
+ 			} else if (strcmp(buf, "secinfo") == 0)
+ 				err = secinfo_parse(&mesg, buf, &exp);
+ 			else
+@@ -660,6 +745,8 @@ static int svc_export_show(struct seq_fi
+ 				seq_printf(m, "%02x", exp->ex_uuid[i]);
+ 			}
+ 		}
++		if (exp->ex_pnfs)
++			seq_puts(m, ",pnfs");
+ 		show_secinfo(m, exp);
+ 	}
+ 	seq_puts(m, ")\n");
+@@ -687,6 +774,7 @@ static void svc_export_init(struct cache
+ 	new->ex_fslocs.locations = NULL;
+ 	new->ex_fslocs.locations_count = 0;
+ 	new->ex_fslocs.migrated = 0;
++	new->ex_pnfs = 0;
+ }
+ 
+ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+@@ -699,6 +787,7 @@ static void export_update(struct cache_h
+ 	new->ex_anon_uid = item->ex_anon_uid;
+ 	new->ex_anon_gid = item->ex_anon_gid;
+ 	new->ex_fsid = item->ex_fsid;
++	new->ex_pnfs = item->ex_pnfs;
+ 	new->ex_uuid = item->ex_uuid;
+ 	item->ex_uuid = NULL;
+ 	new->ex_pathname = item->ex_pathname;
+@@ -1635,8 +1724,17 @@ nfsd_export_init(void)
+ 	if (rv)
+ 		return rv;
+ 	rv = cache_register(&svc_expkey_cache);
+-	if (rv)
++	if (rv) {
+ 		cache_unregister(&svc_export_cache);
++		goto out;
++	}
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = THIS_MODULE;
++	pnfsd_cb_ctl.cb_op = &pnfsd_cb_op;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
++out:
+ 	return rv;
+ 
+ }
+@@ -1664,6 +1762,12 @@ nfsd_export_shutdown(void)
+ 
+ 	exp_writelock();
+ 
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = NULL;
++	pnfsd_cb_ctl.cb_op = NULL;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
+ 	cache_unregister(&svc_expkey_cache);
+ 	cache_unregister(&svc_export_cache);
+ 	svcauth_unix_purge();
+diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
+--- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
+@@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
+ 	.rpc_release = nfs_direct_read_release,
+ };
+ 
++static long nfs_direct_read_execute(struct nfs_read_data *data,
++				    struct rpc_task_setup *task_setup_data,
++				    struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	nfs_fattr_init(&data->fattr);
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	NFS_PROTO(inode)->read_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct read call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
+  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
+@@ -283,7 +315,6 @@ static ssize_t nfs_direct_read_schedule_
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+ 	size_t rsize = NFS_SERVER(inode)->rsize;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -343,26 +374,9 @@ static ssize_t nfs_direct_read_schedule_
+ 		data->res.fattr = &data->fattr;
+ 		data->res.eof = 0;
+ 		data->res.count = bytes;
+-		nfs_fattr_init(&data->fattr);
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct read call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_read_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+@@ -448,12 +462,15 @@ static void nfs_direct_free_writedata(st
+ }
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg);
++
+ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+ {
+ 	struct inode *inode = dreq->inode;
+ 	struct list_head *p;
+ 	struct nfs_write_data *data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = dreq->ctx->cred,
+ 	};
+@@ -487,25 +504,7 @@ static void nfs_direct_write_reschedule(
+ 		 * Reuse data->task; data->args should not have changed
+ 		 * since the original request was sent.
+ 		 */
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		/*
+-		 * We're called via an RPC callback, so BKL is already held.
+-		 */
+-		task = rpc_run_task(&task_setup_data);
+-		if (!IS_ERR(task))
+-			rpc_put_task(task);
+-
+-		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				data->args.count,
+-				(unsigned long long)data->args.offset);
++		nfs_direct_write_execute(data, &task_setup_data, &msg);
+ 	}
+ 
+ 	if (put_dreq(dreq))
+@@ -548,10 +547,31 @@ static const struct rpc_call_ops nfs_com
+ 	.rpc_release = nfs_direct_commit_release,
+ };
+ 
++static long nfs_direct_commit_execute(struct nfs_direct_req *dreq,
++				      struct nfs_write_data *data,
++				      struct rpc_task_setup *task_setup_data,
++				      struct rpc_message *msg)
++{
++	struct rpc_task *task;
++
++	NFS_PROTO(data->inode)->commit_setup(data, msg);
++
++	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
++	dreq->commit_data = NULL;
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++	return 0;
++}
++
+ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+ {
+ 	struct nfs_write_data *data = dreq->commit_data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+@@ -579,16 +599,7 @@ static void nfs_direct_commit_schedule(s
+ 	data->res.verf = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	NFS_PROTO(data->inode)->commit_setup(data, &msg);
+-
+-	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+-	dreq->commit_data = NULL;
+-
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (!IS_ERR(task))
+-		rpc_put_task(task);
++	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
+ }
+ 
+ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+@@ -690,6 +701,36 @@ static const struct rpc_call_ops nfs_wri
+ 	.rpc_release = nfs_direct_write_release,
+ };
+ 
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++	NFS_PROTO(inode)->write_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+@@ -705,7 +746,6 @@ static ssize_t nfs_direct_write_schedule
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -771,24 +811,8 @@ static ssize_t nfs_direct_write_schedule
+ 		data->res.verf = &data->verf;
+ 		nfs_fattr_init(&data->fattr);
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct write call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_write_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
+--- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
+@@ -79,3 +79,52 @@ config NFSD_V4
+ 	  available from http://linux-nfs.org/.
+ 
+ 	  If unsure, say N.
++
++config PNFSD
++	bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
++	depends on NFSD_V4 && EXPERIMENTAL
++	select EXPORTFS_FILE_LAYOUT
++	help
++	  This option enables support for the parallel NFS features of the
++	  minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
++	  in the kernel's NFS server.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFSD_LOCAL_EXPORT
++	bool "Enable pNFS support for exporting local filesystems for debugging purposes"
++	depends on PNFSD
++	help
++	  Say Y here if you want your pNFS server to export local file systems
++	  over the files layout type.  With this option the MDS (metadata
++	  server) functions also as a single DS (data server).  This is mostly
++	  useful for development and debugging purposes.
++
++	  If unsure, say N.
++
++config SPNFS
++	bool "Provide spNFS server support (EXPERIMENTAL)"
++	depends on PNFSD
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS server support.
++
++	  If unsure, say N.
++
++config SPNFS_LAYOUTSEGMENTS
++	bool "Allow spNFS to return partial file layouts (EXPERIMENTAL)"
++	depends on SPNFS
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS to be able to return layout segments.
++
++	  If unsure, say N.
++
++config SPNFS_BLOCK
++	bool "Provide Block Layout server support (EXPERIMENTAL)"
++	depends on SPNFS
++	select EXPORTFS_BLOCK_LAYOUT
++	help
++	  Say Y here if you want spNFS block layout support
++
++	  If unsure, say N.
+diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
+--- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
+@@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ 			   nfs4acl.o nfs4callback.o nfs4recover.o
++nfsd-$(CONFIG_PNFSD)	+= nfs4pnfsd.o nfs4pnfsdlm.o nfs4pnfsds.o
++nfsd-$(CONFIG_PNFSD_LOCAL_EXPORT) += pnfsd_lexp.o
++nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
++nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
+@@ -40,7 +40,6 @@
+ 
+ #define NFSPROC4_CB_NULL 0
+ #define NFSPROC4_CB_COMPOUND 1
+-#define NFS4_STATEID_SIZE 16
+ 
+ /* Index of predefined Linux callback client operations */
+ 
+@@ -48,11 +47,17 @@ enum {
+ 	NFSPROC4_CLNT_CB_NULL = 0,
+ 	NFSPROC4_CLNT_CB_RECALL,
+ 	NFSPROC4_CLNT_CB_SEQUENCE,
++#if defined(CONFIG_PNFSD)
++	NFSPROC4_CLNT_CB_LAYOUT,
++	NFSPROC4_CLNT_CB_DEVICE,
++#endif
+ };
+ 
+ enum nfs_cb_opnum4 {
+ 	OP_CB_RECALL            = 4,
++	OP_CB_LAYOUT            = 5,
+ 	OP_CB_SEQUENCE          = 11,
++	OP_CB_DEVICE            = 14,
+ };
+ 
+ #define NFS4_MAXTAGLEN		20
+@@ -78,6 +83,19 @@ enum nfs_cb_opnum4 {
+ #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
++#define NFS4_enc_cb_layout_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 3 +                         \
++					enc_nfs4_fh_sz + 4)
++#define NFS4_dec_cb_layout_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
++#define NFS4_enc_cb_device_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 6)
++#define NFS4_dec_cb_device_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
+ 
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+@@ -94,6 +112,10 @@ xdr_writemem(__be32 *p, const void *ptr,
+ }
+ 
+ #define WRITE32(n)               *p++ = htonl(n)
++#define WRITE64(n)               do {				\
++	*p++ = htonl((u32)((n) >> 32));				\
++	*p++ = htonl((u32)(n));					\
++} while (0)
+ #define WRITEMEM(ptr,nbytes)     do {                           \
+ 	p = xdr_writemem(p, ptr, nbytes);                       \
+ } while (0)
+@@ -204,6 +226,16 @@ nfs_cb_stat_to_errno(int stat)
+  */
+ 
+ static void
++encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
++{
++	__be32 *p;
++
++	RESERVE_SPACE(sizeof(stateid_t));
++	WRITE32(sid->si_generation);
++	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
++}
++
++static void
+ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+ {
+ 	__be32 * p;
+@@ -228,10 +260,10 @@ encode_cb_recall(struct xdr_stream *xdr,
+ 	__be32 *p;
+ 	int len = dp->dl_fh.fh_size;
+ 
+-	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
++	RESERVE_SPACE(4);
+ 	WRITE32(OP_CB_RECALL);
+-	WRITE32(dp->dl_stateid.si_generation);
+-	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
++	encode_stateid(xdr, &dp->dl_stateid);
++	RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
+ 	WRITE32(0); /* truncate optimization not implemented */
+ 	WRITE32(len);
+ 	WRITEMEM(&dp->dl_fh.fh_base, len);
+@@ -259,6 +291,111 @@ encode_cb_sequence(struct xdr_stream *xd
+ 	hdr->nops++;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++#include "pnfsd.h"
++
++static void
++encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(20);
++	WRITE32(OP_CB_LAYOUT);
++	WRITE32(clr->cb.cbl_seg.layout_type);
++	WRITE32(clr->cb.cbl_seg.iomode);
++	WRITE32(clr->cb.cbl_layoutchanged);
++	WRITE32(clr->cb.cbl_recall_type);
++	if (unlikely(clr->cb.cbl_recall_type == RETURN_FSID)) {
++		struct nfs4_fsid fsid = clr->cb.cbl_fsid;
++
++		RESERVE_SPACE(16);
++		WRITE64(fsid.major);
++		WRITE64(fsid.minor);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"fsid 0x%llx-0x%llx\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type, fsid.major, fsid.minor);
++	} else if (clr->cb.cbl_recall_type == RETURN_FILE) {
++		int len = clr->clr_file->fi_fhlen;
++		stateid_t *cbl_sid = (stateid_t *)&clr->cb.cbl_sid;
++
++		RESERVE_SPACE(20 + len);
++		WRITE32(len);
++		WRITEMEM(clr->clr_file->fi_fhval, len);
++		WRITE64(clr->cb.cbl_seg.offset);
++		WRITE64(clr->cb.cbl_seg.length);
++		encode_stateid(xdr, cbl_sid);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"offset %lld length %lld stateid " STATEID_FMT "\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type,
++			clr->cb.cbl_seg.offset, clr->cb.cbl_seg.length,
++			STATEID_VAL(cbl_sid));
++	} else {
++		dprintk("%s: type %x iomode %d changed %d recall_type %d\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type);
++	}
++	hdr->nops++;
++}
++
++static void
++encode_cb_device(struct xdr_stream *xdr, struct nfs4_notify_device *nd,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++	int i;
++	int len					= nd->nd_list->cbd_len;
++	struct nfsd4_pnfs_cb_dev_item *cbd	= nd->nd_list->cbd_list;
++
++	dprintk("NFSD %s: --> num %d\n", __func__, len);
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(8);
++	WRITE32(OP_CB_DEVICE);
++
++	/* notify4 cnda_changes<>; */
++	WRITE32(len);
++	for (i = 0; i < len; i++) {
++		dprintk("%s: nt %d lt %d devid x%llx-x%llx im %d i %d\n",
++			__func__, cbd[i].cbd_notify_type,
++			cbd[i].cbd_layout_type,
++			cbd[i].cbd_devid.sbid,
++			cbd[i].cbd_devid.devid,
++			cbd[i].cbd_immediate, i);
++
++		BUG_ON(cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		       cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_DELETE);
++		RESERVE_SPACE(32);
++		/* bitmap4         notify_mask; */
++		WRITE32(1);
++		WRITE32(cbd[i].cbd_notify_type);
++		/* opaque     notify_vals<>; */
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
++			WRITE32(24);
++		else
++			WRITE32(20);
++		WRITE32(cbd[i].cbd_layout_type);
++		WRITE64(cbd[i].cbd_devid.sbid);
++		WRITE64(cbd[i].cbd_devid.devid);
++
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) {
++			RESERVE_SPACE(4);
++			WRITE32(cbd[i].cbd_immediate);
++		}
++	}
++	hdr->nops++;
++}
++#endif /* CONFIG_PNFSD */
++
+ static int
+ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+ {
+@@ -288,6 +425,45 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_layoutrecall *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_layout(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++
++static int
++nfs4_xdr_enc_cb_device(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_notify_device *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_device(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++#endif /* CONFIG_PNFSD */
+ 
+ static int
+ decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+@@ -403,6 +579,48 @@ out:
+ 	return status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_LAYOUT);
++out:
++	return status;
++}
++
++static int
++nfs4_xdr_dec_cb_device(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_DEVICE);
++out:
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * RPC procedure tables
+  */
+@@ -420,6 +638,10 @@ out:
+ static struct rpc_procinfo     nfs4_cb_procedures[] = {
+     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
++#if defined(CONFIG_PNFSD)
++    PROC(CB_LAYOUT,    COMPOUND,   enc_cb_layout,      dec_cb_layout),
++    PROC(CB_DEVICE,    COMPOUND,   enc_cb_device,      dec_cb_device),
++#endif
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
+@@ -606,10 +828,9 @@ out:
+  * TODO: cb_sequence should support referring call lists, cachethis, multiple
+  * slots, and mark callback channel down on communication errors.
+  */
+-static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_prepare_sequence(struct rpc_task *task,
++				      struct nfs4_client *clp)
+ {
+-	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+ 	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+ 	int status = 0;
+@@ -629,11 +850,15 @@ static void nfsd4_cb_prepare(struct rpc_
+ 	rpc_call_start(task);
+ }
+ 
+-static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_recall_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
++	nfsd4_cb_prepare_sequence(task, dp->dl_client);
++}
+ 
++static void nfsd4_cb_done_sequence(struct rpc_task *task,
++				   struct nfs4_client *clp)
++{
+ 	dprintk("%s: minorversion=%d\n", __func__,
+ 		clp->cl_cb_conn.cb_minorversion);
+ 
+@@ -657,7 +882,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	struct nfs4_client *clp = dp->dl_client;
+ 	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+-	nfsd4_cb_done(task, calldata);
++	nfsd4_cb_done_sequence(task, clp);
+ 
+ 	if (current_rpc_client == NULL) {
+ 		/* We're shutting down; give up. */
+@@ -688,7 +913,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+ 		task->tk_status = 0;
+-		rpc_restart_call(task);
++		rpc_restart_call_prepare(task);
+ 		return;
+ 	} else {
+ 		atomic_set(&clp->cl_cb_set, 0);
+@@ -704,7 +929,7 @@ static void nfsd4_cb_recall_release(void
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+-	.rpc_call_prepare = nfsd4_cb_prepare,
++	.rpc_call_prepare = nfsd4_cb_recall_prepare,
+ 	.rpc_call_done = nfsd4_cb_recall_done,
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+@@ -781,3 +1006,173 @@ void nfsd4_cb_recall(struct nfs4_delegat
+ {
+ 	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
++
++#if defined(CONFIG_PNFSD)
++static void nfsd4_cb_layout_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	nfsd4_cb_prepare_sequence(task, clr->clr_client);
++}
++
++static void nfsd4_cb_layout_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	struct nfs4_client *clp = clr->clr_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	if (!task->tk_status)
++		return;
++
++	printk("%s: clp %p cb_client %p fp %p failed with status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       clr->clr_file,
++	       task->tk_status);
++
++	switch (task->tk_status) {
++	case -EIO:
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++		/* FIXME:
++		 * The pnfs standard states that we need to only expire
++		 * the client after at-least "lease time" .eg lease-time * 2
++		 * when failing to communicate a recall
++		 */
++		break;
++	case -NFS4ERR_DELAY:
++		/* Pole the client until it's done with the layout */
++		rpc_delay(task, HZ/100); /* 10 mili-seconds */
++		task->tk_status = 0;
++		rpc_restart_call_prepare(task);
++		break;
++	case -NFS4ERR_NOMATCHING_LAYOUT:
++		task->tk_status = 0;
++		nomatching_layout(clr);
++	}
++}
++
++static void nfsd4_cb_layout_release(void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	kfree(clr->clr_args);
++	clr->clr_args = NULL;
++	put_layoutrecall(clr);
++}
++
++static const struct rpc_call_ops nfsd4_cb_layout_ops = {
++	.rpc_call_prepare = nfsd4_cb_layout_prepare,
++	.rpc_call_done = nfsd4_cb_layout_done,
++	.rpc_release = nfsd4_cb_layout_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfs4_client *clp = clr->clr_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_LAYOUT],
++		.rpc_cred = callback_cred
++	};
++	int status;
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	clr->clr_args = args;
++	args->args_op = clr;
++	msg.rpc_argp = args;
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_layout_ops, clr);
++out:
++	if (status) {
++		kfree(args);
++		put_layoutrecall(clr);
++	}
++	dprintk("NFSD: nfsd4_cb_layout: status %d\n", status);
++	return status;
++}
++
++static void nfsd4_cb_device_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	nfsd4_cb_prepare_sequence(task, cbnd->nd_client);
++}
++
++static void nfsd4_cb_device_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	struct nfs4_client *clp = cbnd->nd_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	dprintk("%s: clp %p cb_client %p: status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       task->tk_status);
++
++	if (task->tk_status == -EIO) {
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++	}
++}
++
++static void nfsd4_cb_device_release(void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	kfree(cbnd->nd_args);
++	cbnd->nd_args = NULL;
++	kfree(cbnd);
++}
++
++static const struct rpc_call_ops nfsd4_cb_device_ops = {
++	.rpc_call_prepare = nfsd4_cb_device_prepare,
++	.rpc_call_done = nfsd4_cb_device_done,
++	.rpc_release = nfsd4_cb_device_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_notify_device(struct nfs4_notify_device *cbnd)
++{
++	struct nfs4_client *clp = cbnd->nd_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_DEVICE],
++		.rpc_cred = callback_cred
++	};
++	int status = -EIO;
++
++	dprintk("%s: clp %p\n", __func__, clp);
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	args->args_op = cbnd;
++	msg.rpc_argp = args;
++
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_device_ops, cbnd);
++out:
++	if (status)
++		kfree(args);
++	dprintk("%s: status %d\n", __func__, status);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
+@@ -0,0 +1,1679 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *****************************************************************************/
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Globals */
++static u32 current_layoutid = 1;
++
++/*
++ * Currently used for manipulating the layout state.
++ */
++static DEFINE_SPINLOCK(layout_lock);
++
++#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_SMP)
++#  define BUG_ON_UNLOCKED_LAYOUT() BUG_ON(!spin_is_locked(&layout_lock))
++#else
++#  define BUG_ON_UNLOCKED_LAYOUT()
++#endif
++
++/*
++ * Layout state - NFSv4.1 pNFS
++ */
++static struct kmem_cache *pnfs_layout_slab;
++static struct kmem_cache *pnfs_layoutrecall_slab;
++
++/* hash table for nfsd4_pnfs_deviceid.sbid */
++#define SBID_HASH_BITS	8
++#define SBID_HASH_SIZE	(1 << SBID_HASH_BITS)
++#define SBID_HASH_MASK	(SBID_HASH_SIZE - 1)
++
++struct sbid_tracker {
++	u64 id;
++	struct super_block *sb;
++	struct list_head hash;
++};
++
++static u64 current_sbid;
++static struct list_head sbid_hashtbl[SBID_HASH_SIZE];
++
++static inline unsigned long
++sbid_hashval(struct super_block *sb)
++{
++	return hash_ptr(sb, SBID_HASH_BITS);
++}
++
++static inline struct sbid_tracker *
++alloc_sbid(void)
++{
++	return kmalloc(sizeof(struct sbid_tracker), GFP_KERNEL);
++}
++
++static void
++destroy_sbid(struct sbid_tracker *sbid)
++{
++	spin_lock(&layout_lock);
++	list_del(&sbid->hash);
++	spin_unlock(&layout_lock);
++	kfree(sbid);
++}
++
++void
++nfsd4_free_pnfs_slabs(void)
++{
++	int i;
++	struct sbid_tracker *sbid;
++
++	nfsd4_free_slab(&pnfs_layout_slab);
++	nfsd4_free_slab(&pnfs_layoutrecall_slab);
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		while (!list_empty(&sbid_hashtbl[i])) {
++			sbid = list_first_entry(&sbid_hashtbl[i],
++						struct sbid_tracker,
++						hash);
++			destroy_sbid(sbid);
++		}
++	}
++}
++
++int
++nfsd4_init_pnfs_slabs(void)
++{
++	int i;
++
++	pnfs_layout_slab = kmem_cache_create("pnfs_layouts",
++			sizeof(struct nfs4_layout), 0, 0, NULL);
++	if (pnfs_layout_slab == NULL)
++		return -ENOMEM;
++	pnfs_layoutrecall_slab = kmem_cache_create("pnfs_layoutrecalls",
++			sizeof(struct nfs4_layoutrecall), 0, 0, NULL);
++	if (pnfs_layoutrecall_slab == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		INIT_LIST_HEAD(&sbid_hashtbl[i]);
++	}
++
++	return 0;
++}
++
++/* XXX: Need to implement the notify types and track which
++ * clients have which devices. */
++void pnfs_set_device_notify(clientid_t *clid, unsigned int types)
++{
++	struct nfs4_client *clp;
++	dprintk("%s: -->\n", __func__);
++
++	nfs4_lock_state();
++	/* Indicate that client has a device so we can only notify
++	 * the correct clients */
++	clp = find_confirmed_client(clid);
++	if (clp) {
++		atomic_inc(&clp->cl_deviceref);
++		dprintk("%s: Incr device count (clnt %p) to %d\n",
++			__func__, clp, atomic_read(&clp->cl_deviceref));
++	}
++	nfs4_unlock_state();
++}
++
++/* Clear notifications for this client
++ * XXX: Do we need to loop through a clean up all
++ *      krefs when nfsd cleans up the client? */
++void pnfs_clear_device_notify(struct nfs4_client *clp)
++{
++	atomic_dec(&clp->cl_deviceref);
++	dprintk("%s: Decr device count (clnt %p) to %d\n",
++		__func__, clp, atomic_read(&clp->cl_deviceref));
++}
++
++static struct nfs4_layout_state *
++alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
++			stateid_t *stateid)
++{
++	struct nfs4_layout_state *new;
++
++	/* FIXME: use a kmem_cache */
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return new;
++	get_nfs4_file(fp);
++	INIT_LIST_HEAD(&new->ls_perfile);
++	INIT_LIST_HEAD(&new->ls_layouts);
++	kref_init(&new->ls_ref);
++	new->ls_client = clp;
++	new->ls_file = fp;
++	new->ls_stateid.si_boot = stateid->si_boot;
++	new->ls_stateid.si_stateownerid = 0; /* identifies layout stateid */
++	new->ls_stateid.si_generation = 1;
++	spin_lock(&layout_lock);
++	new->ls_stateid.si_fileid = current_layoutid++;
++	list_add(&new->ls_perfile, &fp->fi_layout_states);
++	spin_unlock(&layout_lock);
++	return new;
++}
++
++static inline void
++get_layout_state(struct nfs4_layout_state *ls)
++{
++	kref_get(&ls->ls_ref);
++}
++
++static void
++destroy_layout_state_common(struct nfs4_layout_state *ls)
++{
++	struct nfs4_file *fp = ls->ls_file;
++
++	dprintk("pNFS %s: ls %p fp %p clp %p\n", __func__, ls, fp,
++		ls->ls_client);
++	BUG_ON(!list_empty(&ls->ls_layouts));
++	kfree(ls);
++	put_nfs4_file(fp);
++}
++
++static void
++destroy_layout_state(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	spin_lock(&layout_lock);
++	list_del(&ls->ls_perfile);
++	spin_unlock(&layout_lock);
++	destroy_layout_state_common(ls);
++}
++
++static void
++destroy_layout_state_locked(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	list_del(&ls->ls_perfile);
++	destroy_layout_state_common(ls);
++}
++
++static inline void
++put_layout_state(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state);
++}
++
++static inline void
++put_layout_state_locked(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state_locked);
++}
++
++/*
++ * Search the fp->fi_layout_state list for a layout state with the clientid.
++ * If not found, then this is a 'first open/delegation/lock stateid' from
++ * the client for this file.
++ * Called under the layout_lock.
++ */
++static struct nfs4_layout_state *
++find_get_layout_state(struct nfs4_client *clp, struct nfs4_file *fp)
++{
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_for_each_entry(ls, &fp->fi_layout_states, ls_perfile) {
++		if (ls->ls_client == clp) {
++			dprintk("pNFS %s: before GET ls %p ls_ref %d\n",
++				__func__, ls,
++				atomic_read(&ls->ls_ref.refcount));
++			get_layout_state(ls);
++			return ls;
++		}
++	}
++	return NULL;
++}
++
++static __be32
++verify_stateid(struct nfs4_file *fp, stateid_t *stateid)
++{
++	struct nfs4_stateid *local = NULL;
++	struct nfs4_delegation *temp = NULL;
++
++	/* check if open or lock stateid */
++	local = find_stateid(stateid, RD_STATE);
++	if (local)
++		return 0;
++	temp = find_delegation_stateid(fp->fi_inode, stateid);
++	if (temp)
++		return 0;
++	return nfserr_bad_stateid;
++}
++
++/*
++ * nfs4_preocess_layout_stateid ()
++ *
++ * We have looked up the nfs4_file corresponding to the current_fh, and
++ * confirmed the clientid. Pull the few tests from nfs4_preprocess_stateid_op()
++ * that make sense with a layout stateid.
++ *
++ * Called with the state_lock held
++ * Returns zero and stateid is updated, or error.
++ *
++ * Note: the struct nfs4_layout_state pointer is only set by layoutget.
++ */
++static __be32
++nfs4_process_layout_stateid(struct nfs4_client *clp, struct nfs4_file *fp,
++			    stateid_t *stateid, struct nfs4_layout_state **lsp)
++{
++	struct nfs4_layout_state *ls = NULL;
++	__be32 status = 0;
++
++	dprintk("--> %s clp %p fp %p \n", __func__, clp, fp);
++
++	dprintk("%s: operation stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		goto out;
++
++	/* Is this the first use of this layout ? */
++	spin_lock(&layout_lock);
++	ls = find_get_layout_state(clp, fp);
++	spin_unlock(&layout_lock);
++	if (!ls) {
++		/* Only alloc layout state on layoutget (which sets lsp). */
++		if (!lsp) {
++			dprintk("%s ERROR: Not layoutget & no layout stateid\n",
++				__func__);
++			status = nfserr_bad_stateid;
++			goto out;
++		}
++		dprintk("%s Initial stateid for layout: file %p client %p\n",
++			__func__, fp, clp);
++
++		/* verify input stateid */
++		status = verify_stateid(fp, stateid);
++		if (status) {
++			dprintk("%s ERROR: invalid open/deleg/lock stateid\n",
++				__func__);
++			goto out;
++		}
++		ls = alloc_init_layout_state(clp, fp, stateid);
++		if (!ls) {
++			dprintk("%s pNFS ERROR: no memory for layout state\n",
++				__func__);
++			status = nfserr_resource;
++			goto out;
++		}
++	} else {
++		dprintk("%s Not initial stateid. Layout state %p file %p\n",
++			__func__, ls, fp);
++
++		/* BAD STATEID */
++		status = nfserr_bad_stateid;
++		if (memcmp(&ls->ls_stateid.si_opaque, &stateid->si_opaque,
++			sizeof(stateid_opaque_t)) != 0) {
++
++			/* if a LAYOUTGET operation and stateid is a valid
++			 * open/deleg/lock stateid, accept it as a parallel
++			 * initial layout stateid
++			 */
++			if (lsp && ((verify_stateid(fp, stateid)) == 0)) {
++				dprintk("%s parallel initial layout state\n",
++					__func__);
++				goto update;
++			}
++
++			dprintk("%s ERROR bad opaque in stateid 1\n", __func__);
++			goto out_put;
++		}
++
++		/* stateid is a valid layout stateid for this file. */
++		if (stateid->si_generation > ls->ls_stateid.si_generation) {
++			dprintk("%s bad stateid 1\n", __func__);
++			goto out_put;
++		}
++update:
++		update_stateid(&ls->ls_stateid);
++		dprintk("%s Updated ls_stateid to %d on layoutstate %p\n",
++			__func__, ls->ls_stateid.si_generation, ls);
++	}
++	status = 0;
++	/* Set the stateid to be encoded */
++	memcpy(stateid, &ls->ls_stateid, sizeof(stateid_t));
++
++	/* Return the layout state if requested */
++	if (lsp) {
++		get_layout_state(ls);
++		*lsp = ls;
++	}
++	dprintk("%s: layout stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(&ls->ls_stateid));
++out_put:
++	dprintk("%s PUT LO STATE:\n", __func__);
++	put_layout_state(ls);
++out:
++	dprintk("<-- %s status %d\n", __func__, htonl(status));
++
++	return status;
++}
++
++static inline struct nfs4_layout *
++alloc_layout(void)
++{
++	return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
++}
++
++static inline void
++free_layout(struct nfs4_layout *lp)
++{
++	kmem_cache_free(pnfs_layout_slab, lp);
++}
++
++static void
++init_layout(struct nfs4_layout_state *ls,
++	    struct nfs4_layout *lp,
++	    struct nfs4_file *fp,
++	    struct nfs4_client *clp,
++	    struct svc_fh *current_fh,
++	    struct nfsd4_layout_seg *seg)
++{
++	dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__,
++		ls, lp, clp, fp, fp->fi_inode);
++
++	get_nfs4_file(fp);
++	lp->lo_client = clp;
++	lp->lo_file = fp;
++	get_layout_state(ls);
++	lp->lo_state = ls;
++	memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
++	spin_lock(&layout_lock);
++	list_add_tail(&lp->lo_perstate, &ls->ls_layouts);
++	list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
++	list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
++	spin_unlock(&layout_lock);
++	dprintk("pNFS %s end\n", __func__);
++}
++
++static void
++dequeue_layout(struct nfs4_layout *lp)
++{
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del(&lp->lo_perclnt);
++	list_del(&lp->lo_perfile);
++	list_del(&lp->lo_perstate);
++}
++
++static void
++destroy_layout(struct nfs4_layout *lp)
++{
++	struct nfs4_client *clp;
++	struct nfs4_file *fp;
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	clp = lp->lo_client;
++	fp = lp->lo_file;
++	ls = lp->lo_state;
++	dprintk("pNFS %s: lp %p clp %p fp %p ino %p ls_layouts empty %d\n",
++		__func__, lp, clp, fp, fp->fi_inode,
++		list_empty(&ls->ls_layouts));
++
++	kmem_cache_free(pnfs_layout_slab, lp);
++	/* release references taken by init_layout */
++	put_layout_state_locked(ls);
++	put_nfs4_file(fp);
++}
++
++void fs_layout_return(struct super_block *sb, struct inode *ino,
++		      struct nfsd4_pnfs_layoutreturn *lrp, int flags,
++		      void *recall_cookie)
++{
++	int ret;
++
++	if (unlikely(!sb->s_pnfs_op->layout_return))
++		return;
++
++	lrp->lr_flags = flags;
++	lrp->args.lr_cookie = recall_cookie;
++
++	if (!ino) /* FSID or ALL */
++		ino = sb->s_root->d_inode;
++
++	ret = sb->s_pnfs_op->layout_return(ino, &lrp->args);
++	dprintk("%s: inode %lu iomode=%d offset=0x%llx length=0x%llx "
++		"cookie = %p flags 0x%x status=%d\n",
++		__func__, ino->i_ino, lrp->args.lr_seg.iomode,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length,
++		recall_cookie, flags, ret);
++}
++
++static u64
++alloc_init_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	struct sbid_tracker *new = alloc_sbid();
++	unsigned long hash_idx = sbid_hashval(sb);
++	u64 id = 0;
++
++	if (likely(new)) {
++		spin_lock(&layout_lock);
++		id = ++current_sbid;
++		new->id = (id << SBID_HASH_BITS) | (hash_idx & SBID_HASH_MASK);
++		id = new->id;
++		BUG_ON(id == 0);
++		new->sb = sb;
++
++		list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash)
++			if (sbid->sb == sb) {
++				kfree(new);
++				id = sbid->id;
++				spin_unlock(&layout_lock);
++				return id;
++			}
++		list_add(&new->hash, &sbid_hashtbl[hash_idx]);
++		spin_unlock(&layout_lock);
++	}
++	return id;
++}
++
++struct super_block *
++find_sbid_id(u64 id)
++{
++	struct sbid_tracker *sbid;
++	struct super_block *sb = NULL;
++	unsigned long hash_idx = id & SBID_HASH_MASK;
++	int pos = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->id != id)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		sb = sbid->sb;
++		break;
++	}
++	spin_unlock(&layout_lock);
++	return sb;
++}
++
++u64
++find_create_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	unsigned long hash_idx = sbid_hashval(sb);
++	int pos = 0;
++	u64 id = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->sb != sb)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		id = sbid->id;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	if (!id)
++		id = alloc_init_sbid(sb);
++
++	return id;
++}
++
++/*
++ * Create a layoutrecall structure
++ * An optional layoutrecall can be cloned (except for the layoutrecall lists)
++ */
++static struct nfs4_layoutrecall *
++alloc_init_layoutrecall(struct nfsd4_pnfs_cb_layout *cbl,
++			struct nfs4_client *clp,
++			struct nfs4_file *lrfile)
++{
++	struct nfs4_layoutrecall *clr;
++
++	dprintk("NFSD %s\n", __func__);
++	clr = kmem_cache_alloc(pnfs_layoutrecall_slab, GFP_KERNEL);
++	if (clr == NULL)
++		return clr;
++
++	dprintk("NFSD %s -->\n", __func__);
++
++	memset(clr, 0, sizeof(*clr));
++	if (lrfile)
++		get_nfs4_file(lrfile);
++	clr->clr_client = clp;
++	clr->clr_file = lrfile;
++	clr->cb = *cbl;
++
++	kref_init(&clr->clr_ref);
++	INIT_LIST_HEAD(&clr->clr_perclnt);
++
++	dprintk("NFSD %s return %p\n", __func__, clr);
++	return clr;
++}
++
++static void
++get_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	kref_get(&clr->clr_ref);
++}
++
++static void
++destroy_layoutrecall(struct kref *kref)
++{
++	struct nfs4_layoutrecall *clr =
++			container_of(kref, struct nfs4_layoutrecall, clr_ref);
++	dprintk("pNFS %s: clr %p fp %p clp %p\n", __func__, clr,
++		clr->clr_file, clr->clr_client);
++	BUG_ON(!list_empty(&clr->clr_perclnt));
++	if (clr->clr_file)
++		put_nfs4_file(clr->clr_file);
++	kmem_cache_free(pnfs_layoutrecall_slab, clr);
++}
++
++int
++put_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	return kref_put(&clr->clr_ref, destroy_layoutrecall);
++}
++
++void *
++layoutrecall_done(struct nfs4_layoutrecall *clr)
++{
++	void *recall_cookie = clr->cb.cbl_cookie;
++	struct nfs4_layoutrecall *parent = clr->parent;
++
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del_init(&clr->clr_perclnt);
++	put_layoutrecall(clr);
++
++	if (parent && !put_layoutrecall(parent))
++		recall_cookie = NULL;
++
++	return recall_cookie;
++}
++
++/*
++ * get_state() and cb_get_state() are
++ */
++void
++release_pnfs_ds_dev_list(struct nfs4_stateid *stp)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	while (!list_empty(&stp->st_pnfs_ds_id)) {
++		ddp = list_entry(stp->st_pnfs_ds_id.next,
++				 struct pnfs_ds_dev_entry, dd_dev_entry);
++		list_del(&ddp->dd_dev_entry);
++		kfree(ddp);
++	}
++}
++
++static int
++nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 dsid)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	ddp = kmalloc(sizeof(*ddp), GFP_KERNEL);
++	if (!ddp)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&ddp->dd_dev_entry);
++	list_add(&ddp->dd_dev_entry, &stp->st_pnfs_ds_id);
++	ddp->dd_dsid = dsid;
++	return 0;
++}
++
++/*
++ * are two octet ranges overlapping?
++ * start1            last1
++ *   |-----------------|
++ *                start2            last2
++ *                  |----------------|
++ */
++static inline int
++lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 last1 = last_byte_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 last2 = last_byte_offset(start2, l2->length);
++	int ret;
++
++	/* if last1 == start2 there's a single byte overlap */
++	ret = (last2 >= start1) && (last1 >= start2);
++	dprintk("%s: l1 %llu:%lld l2 %llu:%lld ret=%d\n", __func__,
++		l1->offset, l1->length, l2->offset, l2->length, ret);
++	return ret;
++}
++
++static inline int
++same_fsid_major(struct nfs4_fsid *fsid, u64 major)
++{
++	return fsid->major == major;
++}
++
++static inline int
++same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
++{
++	return same_fsid_major(fsid, current_fh->fh_export->ex_fsid);
++}
++
++/*
++ * find a layout recall conflicting with the specified layoutget
++ */
++static int
++is_layout_recalled(struct nfs4_client *clp,
++		   struct svc_fh *current_fh,
++		   struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layoutrecall *clr;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (clr, &clp->cl_layoutrecalls, clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != seg->layout_type)
++			continue;
++		if (clr->cb.cbl_recall_type == RETURN_ALL)
++			goto found;
++		if (clr->cb.cbl_recall_type == RETURN_FSID) {
++			if (same_fsid(&clr->cb.cbl_fsid, current_fh))
++				goto found;
++			else
++				continue;
++		}
++		BUG_ON(clr->cb.cbl_recall_type != RETURN_FILE);
++		if (clr->cb.cbl_seg.clientid == seg->clientid &&
++		    lo_seg_overlapping(&clr->cb.cbl_seg, seg))
++			goto found;
++	}
++	spin_unlock(&layout_lock);
++	return 0;
++found:
++	spin_unlock(&layout_lock);
++	return 1;
++}
++
++/*
++ * are two octet ranges overlapping or adjacent?
++ */
++static inline int
++lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	/* is end1 == start2 ranges are adjacent */
++	return (end2 >= start1) && (end1 >= start2);
++}
++
++static void
++extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lg_start = lg->offset;
++	u64 lg_end = end_offset(lg_start, lg->length);
++
++	/* lo already covers lg? */
++	if (lo_start <= lg_start && lg_end <= lo_end)
++		return;
++
++	/* extend start offset */
++	if (lo_start > lg_start)
++		lo_start = lg_start;
++
++	/* extend end offset */
++	if (lo_end < lg_end)
++		lo_end = lg_end;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ?
++		      lo_end : lo_end - lo_start;
++}
++
++static struct nfs4_layout *
++merge_layout(struct nfs4_file *fp,
++	     struct nfs4_client *clp,
++	     struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layout *lp = NULL;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
++		if (lp->lo_seg.layout_type == seg->layout_type &&
++		    lp->lo_seg.clientid == seg->clientid &&
++		    lp->lo_seg.iomode == seg->iomode &&
++		    lo_seg_mergeable(&lp->lo_seg, seg)) {
++			extend_layout(&lp->lo_seg, seg);
++			break;
++		}
++	spin_unlock(&layout_lock);
++
++	return lp;
++}
++
++__be32
++nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
++		     struct exp_xdr_stream *xdr)
++{
++	u32 status;
++	__be32 nfserr;
++	struct inode *ino = lgp->lg_fhp->fh_dentry->d_inode;
++	struct super_block *sb = ino->i_sb;
++	int can_merge;
++	struct nfs4_file *fp;
++	struct nfs4_client *clp;
++	struct nfs4_layout *lp = NULL;
++	struct nfs4_layout_state *ls = NULL;
++	struct nfsd4_pnfs_layoutget_arg args = {
++		.lg_minlength = lgp->lg_minlength,
++		.lg_fh = &lgp->lg_fhp->fh_handle,
++	};
++	struct nfsd4_pnfs_layoutget_res res = {
++		.lg_seg = lgp->lg_seg,
++	};
++
++	dprintk("NFSD: %s Begin\n", __func__);
++
++	args.lg_sbid = find_create_sbid(sb);
++	if (!args.lg_sbid) {
++		nfserr = nfserr_layouttrylater;
++		goto out;
++	}
++
++	can_merge = sb->s_pnfs_op->can_merge_layouts != NULL &&
++		    sb->s_pnfs_op->can_merge_layouts(lgp->lg_seg.layout_type);
++
++	nfs4_lock_state();
++	fp = find_alloc_file(ino, lgp->lg_fhp);
++	clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
++	dprintk("pNFS %s: fp %p clp %p \n", __func__, fp, clp);
++	if (!fp || !clp) {
++		nfserr = nfserr_inval;
++		goto out_unlock;
++	}
++
++	/* Check decoded layout stateid */
++	nfserr = nfs4_process_layout_stateid(clp, fp, &lgp->lg_sid, &ls);
++	if (nfserr)
++		goto out_unlock;
++
++	if (is_layout_recalled(clp, lgp->lg_fhp, &lgp->lg_seg)) {
++		nfserr = nfserr_recallconflict;
++		goto out;
++	}
++
++	/* pre-alloc layout in case we can't merge after we call
++	 * the file system
++	 */
++	lp = alloc_layout();
++	if (!lp) {
++		nfserr = nfserr_layouttrylater;
++		goto out_unlock;
++	}
++
++	dprintk("pNFS %s: pre-export type 0x%x maxcount %Zd "
++		"iomode %u offset %llu length %llu\n",
++		__func__, lgp->lg_seg.layout_type,
++		exp_xdr_qbytes(xdr->end - xdr->p),
++		lgp->lg_seg.iomode, lgp->lg_seg.offset, lgp->lg_seg.length);
++
++	/* FIXME: need to eliminate the use of the state lock */
++	nfs4_unlock_state();
++	status = sb->s_pnfs_op->layout_get(ino, xdr, &args, &res);
++	nfs4_lock_state();
++
++	dprintk("pNFS %s: post-export status %u "
++		"iomode %u offset %llu length %llu\n",
++		__func__, status, res.lg_seg.iomode,
++		res.lg_seg.offset, res.lg_seg.length);
++
++	/*
++	 * The allowable error codes for the layout_get pNFS export
++	 * operations vector function (from the file system) can be
++	 * expanded as needed to include other errors defined for
++	 * the RFC 5561 LAYOUTGET operation.
++	 */
++	switch (status) {
++	case 0:
++		nfserr = NFS4_OK;
++		break;
++	case NFS4ERR_ACCESS:
++	case NFS4ERR_BADIOMODE:
++		/* No support for LAYOUTIOMODE4_RW layouts */
++	case NFS4ERR_BADLAYOUT:
++		/* No layout matching loga_minlength rules */
++	case NFS4ERR_INVAL:
++	case NFS4ERR_IO:
++	case NFS4ERR_LAYOUTTRYLATER:
++	case NFS4ERR_LAYOUTUNAVAILABLE:
++	case NFS4ERR_LOCKED:
++	case NFS4ERR_NOSPC:
++	case NFS4ERR_RECALLCONFLICT:
++	case NFS4ERR_SERVERFAULT:
++	case NFS4ERR_TOOSMALL:
++		/* Requested layout too big for loga_maxcount */
++	case NFS4ERR_WRONG_TYPE:
++		/* Not a regular file */
++		nfserr = cpu_to_be32(status);
++		goto out_freelayout;
++	default:
++		BUG();
++		nfserr = nfserr_serverfault;
++	}
++
++	lgp->lg_seg = res.lg_seg;
++	lgp->lg_roc = res.lg_return_on_close;
++
++	/* SUCCESS!
++	 * Can the new layout be merged into an existing one?
++	 * If so, free unused layout struct
++	 */
++	if (can_merge && merge_layout(fp, clp, &res.lg_seg))
++		goto out_freelayout;
++
++	/* Can't merge, so let's initialize this new layout */
++	init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
++out_unlock:
++	if (ls)
++		put_layout_state(ls);
++	if (fp)
++		put_nfs4_file(fp);
++	nfs4_unlock_state();
++out:
++	dprintk("pNFS %s: lp %p exit nfserr %u\n", __func__, lp,
++		be32_to_cpu(nfserr));
++	return nfserr;
++out_freelayout:
++	free_layout(lp);
++	goto out_unlock;
++}
++
++static void
++trim_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lr)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lr_start = lr->offset;
++	u64 lr_end = end_offset(lr_start, lr->length);
++
++	dprintk("%s:Begin lo %llu:%lld lr %llu:%lld\n", __func__,
++		lo->offset, lo->length, lr->offset, lr->length);
++
++	/* lr fully covers lo? */
++	if (lr_start <= lo_start && lo_end <= lr_end) {
++		lo->length = 0;
++		goto out;
++	}
++
++	/*
++	 * split not supported yet. retain layout segment.
++	 * remains must be returned by the client
++	 * on the final layout return.
++	 */
++	if (lo_start < lr_start && lr_end < lo_end) {
++		dprintk("%s: split not supported\n", __func__);
++		goto out;
++	}
++
++	if (lo_start < lr_start)
++		lo_end = lr_start - 1;
++	else /* lr_end < lo_end */
++		lo_start = lr_end + 1;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ? lo_end : lo_end - lo_start;
++out:
++	dprintk("%s:End lo %llu:%lld\n", __func__, lo->offset, lo->length);
++}
++
++static int
++pnfs_return_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp,
++			 struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	dprintk("%s: clp %p fp %p\n", __func__, clp, fp);
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &fp->fi_layouts, lo_perfile) {
++		dprintk("%s: lp %p client %p,%p lo_type %x,%x iomode %d,%d\n",
++			__func__, lp,
++			lp->lo_client, clp,
++			lp->lo_seg.layout_type, lrp->args.lr_seg.layout_type,
++			lp->lo_seg.iomode, lrp->args.lr_seg.iomode);
++		if (lp->lo_client != clp ||
++		    lp->lo_seg.layout_type != lrp->args.lr_seg.layout_type ||
++		    (lp->lo_seg.iomode != lrp->args.lr_seg.iomode &&
++		     lrp->args.lr_seg.iomode != IOMODE_ANY) ||
++		     !lo_seg_overlapping(&lp->lo_seg, &lrp->args.lr_seg))
++			continue;
++		layouts_found++;
++		trim_layout(&lp->lo_seg, &lrp->args.lr_seg);
++		if (!lp->lo_seg.length) {
++			lrp->lrs_present = 0;
++			dequeue_layout(lp);
++			destroy_layout(lp);
++		}
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++pnfs_return_client_layouts(struct nfs4_client *clp,
++			   struct nfsd4_pnfs_layoutreturn *lrp, u64 ex_fsid)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &clp->cl_layouts, lo_perclnt) {
++		if (lrp->args.lr_seg.layout_type != lp->lo_seg.layout_type ||
++		   (lrp->args.lr_seg.iomode != lp->lo_seg.iomode &&
++		    lrp->args.lr_seg.iomode != IOMODE_ANY))
++			continue;
++
++		if (lrp->args.lr_return_type == RETURN_FSID &&
++		    !same_fsid_major(&lp->lo_file->fi_fsid, ex_fsid))
++			continue;
++
++		layouts_found++;
++		dequeue_layout(lp);
++		destroy_layout(lp);
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++recall_return_perfect_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode ||
++	    clr->cb.cbl_recall_type != lrp->args.lr_return_type)
++		return 0;
++
++	return (clr->cb.cbl_recall_type == RETURN_FILE &&
++		clr->clr_file == fp &&
++		clr->cb.cbl_seg.offset == lrp->args.lr_seg.offset &&
++		clr->cb.cbl_seg.length == lrp->args.lr_seg.length) ||
++
++		(clr->cb.cbl_recall_type == RETURN_FSID &&
++		 same_fsid(&clr->cb.cbl_fsid, current_fh)) ||
++
++		clr->cb.cbl_recall_type == RETURN_ALL;
++}
++
++static int
++recall_return_partial_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	/* iomode matching? */
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode &&
++	    clr->cb.cbl_seg.iomode != IOMODE_ANY &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY)
++		return 0;
++
++	if (clr->cb.cbl_recall_type == RETURN_ALL ||
++	    lrp->args.lr_return_type == RETURN_ALL)
++		return 1;
++
++	/* fsid matches? */
++	if (clr->cb.cbl_recall_type == RETURN_FSID ||
++	    lrp->args.lr_return_type == RETURN_FSID)
++		return same_fsid(&clr->cb.cbl_fsid, current_fh);
++
++	/* file matches, range overlapping? */
++	return clr->clr_file == fp &&
++	       lo_seg_overlapping(&clr->cb.cbl_seg, &lrp->args.lr_seg);
++}
++
++int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
++			    struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status = 0;
++	int layouts_found = 0;
++	struct inode *ino = current_fh->fh_dentry->d_inode;
++	struct nfs4_file *fp = NULL;
++	struct nfs4_client *clp;
++	struct nfs4_layoutrecall *clr, *nextclr;
++	u64 ex_fsid = current_fh->fh_export->ex_fsid;
++	void *recall_cookie = NULL;
++
++	dprintk("NFSD: %s\n", __func__);
++
++	nfs4_lock_state();
++	clp = find_confirmed_client((clientid_t *)&lrp->args.lr_seg.clientid);
++	if (!clp)
++		goto out;
++
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		fp = find_file(ino);
++		if (!fp) {
++			printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
++				"ino %p:%lu\n",
++				__func__, ino, ino ? ino->i_ino : 0L);
++			goto out;
++		}
++
++		/* Check the stateid */
++		dprintk("%s PROCESS LO_STATEID inode %p\n", __func__, ino);
++		status = nfs4_process_layout_stateid(clp, fp, &lrp->lr_sid,
++						     NULL);
++		if (status)
++			goto out_put_file;
++
++		/* update layouts */
++		layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
++		/* optimize for the all-empty case */
++		if (list_empty(&fp->fi_layouts))
++			recall_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++	} else {
++		layouts_found = pnfs_return_client_layouts(clp, lrp, ex_fsid);
++	}
++
++	dprintk("pNFS %s: clp %p fp %p layout_type 0x%x iomode %d "
++		"return_type %d fsid 0x%llx offset %llu length %llu: "
++		"layouts_found %d\n",
++		__func__, clp, fp, lrp->args.lr_seg.layout_type,
++		lrp->args.lr_seg.iomode, lrp->args.lr_return_type,
++		ex_fsid,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);
++
++	/* update layoutrecalls
++	 * note: for RETURN_{FSID,ALL}, fp may be NULL
++	 */
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (clr, nextclr, &clp->cl_layoutrecalls,
++				  clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != lrp->args.lr_seg.layout_type)
++			continue;
++
++		if (recall_return_perfect_match(clr, lrp, fp, current_fh))
++			recall_cookie = layoutrecall_done(clr);
++		else if (layouts_found &&
++			 recall_return_partial_match(clr, lrp, fp, current_fh))
++			clr->clr_time = CURRENT_TIME;
++	}
++	spin_unlock(&layout_lock);
++
++out_put_file:
++	if (fp)
++		put_nfs4_file(fp);
++out:
++	nfs4_unlock_state();
++
++	/* call exported filesystem layout_return (ignore return-code) */
++	fs_layout_return(sb, ino, lrp, 0, recall_cookie);
++
++	dprintk("pNFS %s: exit status %d \n", __func__, status);
++	return status;
++}
++
++/*
++ * PNFS Metadata server export operations callback for get_state
++ *
++ * called by the cluster fs when it receives a get_state() from a data
++ * server.
++ * returns status, or pnfs_get_state* with pnfs_get_state->status set.
++ *
++ */
++int
++nfs4_pnfs_cb_get_state(struct super_block *sb, struct pnfs_get_state *arg)
++{
++	struct nfs4_stateid *stp;
++	int flags = LOCK_STATE | OPEN_STATE; /* search both hash tables */
++	int status = -EINVAL;
++	struct inode *ino;
++	struct nfs4_delegation *dl;
++	stateid_t *stid = (stateid_t *)&arg->stid;
++
++	dprintk("NFSD: %s sid=" STATEID_FMT " ino %llu\n", __func__,
++		STATEID_VAL(stid), arg->ino);
++
++	nfs4_lock_state();
++	stp = find_stateid(stid, flags);
++	if (!stp) {
++		ino = iget_locked(sb, arg->ino);
++		if (!ino)
++			goto out;
++
++		if (ino->i_state & I_NEW) {
++			iget_failed(ino);
++			goto out;
++		}
++
++		dl = find_delegation_stateid(ino, stid);
++		if (dl)
++			status = 0;
++
++		iput(ino);
++	} else {
++		/* XXX ANDROS: marc removed nfs4_check_fh - how come? */
++
++		/* arg->devid is the Data server id, set by the cluster fs */
++		status = nfs4_add_pnfs_ds_dev(stp, arg->dsid);
++		if (status)
++			goto out;
++
++		arg->access = stp->st_access_bmap;
++		*(clientid_t *)&arg->clid =
++			stp->st_stateowner->so_client->cl_clientid;
++	}
++out:
++	nfs4_unlock_state();
++	return status;
++}
++
++static int
++cl_has_file_layout(struct nfs4_client *clp, struct nfs4_file *lrfile,
++		   stateid_t *lsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++	struct nfs4_layout_state *ls;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt) {
++		if (lp->lo_file != lrfile)
++			continue;
++
++		ls = find_get_layout_state(clp, lrfile);
++		if (!ls) {
++			/* This shouldn't happen as the file should have a
++			 * layout stateid if it has a layout.
++			 */
++			printk(KERN_ERR "%s: file %p has no layout stateid\n",
++				__func__, lrfile);
++			WARN_ON(1);
++			break;
++		}
++		update_stateid(&ls->ls_stateid);
++		memcpy(lsid, &ls->ls_stateid, sizeof(stateid_t));
++		put_layout_state_locked(ls);
++		found = 1;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	return found;
++}
++
++static int
++cl_has_fsid_layout(struct nfs4_client *clp, struct nfs4_fsid *fsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++
++	/* note: minor version unused */
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt)
++		if (lp->lo_file->fi_fsid.major == fsid->major) {
++			found = 1;
++			break;
++		}
++	spin_unlock(&layout_lock);
++	return found;
++}
++
++static int
++cl_has_any_layout(struct nfs4_client *clp)
++{
++	return !list_empty(&clp->cl_layouts);
++}
++
++static int
++cl_has_layout(struct nfs4_client *clp, struct nfsd4_pnfs_cb_layout *cbl,
++	      struct nfs4_file *lrfile, stateid_t *lsid)
++{
++	switch (cbl->cbl_recall_type) {
++	case RETURN_FILE:
++		return cl_has_file_layout(clp, lrfile, lsid);
++	case RETURN_FSID:
++		return cl_has_fsid_layout(clp, &cbl->cbl_fsid);
++	default:
++		return cl_has_any_layout(clp);
++	}
++}
++
++/*
++ * Called without the layout_lock.
++ */
++void
++nomatching_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfsd4_pnfs_layoutreturn lr = {
++		.args.lr_return_type = clr->cb.cbl_recall_type,
++		.args.lr_seg = clr->cb.cbl_seg,
++	};
++	struct inode *inode;
++	void *recall_cookie;
++
++	if (clr->clr_file) {
++		inode = igrab(clr->clr_file->fi_inode);
++		if (WARN_ON(!inode))
++			return;
++	} else {
++		inode = NULL;
++	}
++
++	dprintk("%s: clp %p fp %p: simulating layout_return\n", __func__,
++		clr->clr_client, clr->clr_file);
++
++	if (clr->cb.cbl_recall_type == RETURN_FILE)
++		pnfs_return_file_layouts(clr->clr_client, clr->clr_file, &lr);
++	else
++		pnfs_return_client_layouts(clr->clr_client, &lr,
++					   clr->cb.cbl_fsid.major);
++
++	spin_lock(&layout_lock);
++	recall_cookie = layoutrecall_done(clr);
++	spin_unlock(&layout_lock);
++
++	fs_layout_return(clr->clr_sb, inode, &lr, LR_FLAG_INTERN,
++			 recall_cookie);
++	iput(inode);
++}
++
++void pnfs_expire_client(struct nfs4_client *clp)
++{
++	for (;;) {
++		struct nfs4_layoutrecall *lrp = NULL;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layoutrecalls)) {
++			lrp = list_entry(clp->cl_layoutrecalls.next,
++					 struct nfs4_layoutrecall, clr_perclnt);
++			get_layoutrecall(lrp);
++		}
++		spin_unlock(&layout_lock);
++		if (!lrp)
++			break;
++
++		dprintk("%s: lrp %p, fp %p\n", __func__, lrp, lrp->clr_file);
++		BUG_ON(lrp->clr_client != clp);
++		nomatching_layout(lrp);
++		put_layoutrecall(lrp);
++	}
++
++	for (;;) {
++		struct nfs4_layout *lp = NULL;
++		struct inode *inode = NULL;
++		struct nfsd4_pnfs_layoutreturn lr;
++		bool empty = false;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layouts)) {
++			lp = list_entry(clp->cl_layouts.next,
++					struct nfs4_layout, lo_perclnt);
++			inode = igrab(lp->lo_file->fi_inode);
++			memset(&lr, 0, sizeof(lr));
++			lr.args.lr_return_type = RETURN_FILE;
++			lr.args.lr_seg = lp->lo_seg;
++			empty = list_empty(&lp->lo_file->fi_layouts);
++			BUG_ON(lp->lo_client != clp);
++			dequeue_layout(lp);
++			destroy_layout(lp); /* do not access lp after this */
++		}
++		spin_unlock(&layout_lock);
++		if (!lp)
++			break;
++
++		if (WARN_ON(!inode))
++			break;
++
++		dprintk("%s: inode %lu lp %p clp %p\n", __func__, inode->i_ino,
++			lp, clp);
++
++		fs_layout_return(inode->i_sb, inode, &lr, LR_FLAG_EXPIRE,
++				 empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL);
++		iput(inode);
++	}
++}
++
++struct create_recall_list_arg {
++	struct nfsd4_pnfs_cb_layout *cbl;
++	struct nfs4_file *lrfile;
++	struct list_head *todolist;
++	unsigned todo_count;
++};
++
++/*
++ * look for matching layout for the given client
++ * and add a pending layout recall to the todo list
++ * if found any.
++ * returns:
++ *   0 if layouts found or negative error.
++ */
++static int
++lo_recall_per_client(struct nfs4_client *clp, void *p)
++{
++	stateid_t lsid;
++	struct nfs4_layoutrecall *pending;
++	struct create_recall_list_arg *arg = p;
++
++	memset(&lsid, 0, sizeof(lsid));
++	if (!cl_has_layout(clp, arg->cbl, arg->lrfile, &lsid))
++		return 0;
++
++	/* Matching put done by layoutreturn */
++	pending = alloc_init_layoutrecall(arg->cbl, clp, arg->lrfile);
++	/* out of memory, drain todo queue */
++	if (!pending)
++		return -ENOMEM;
++
++	*(stateid_t *)&pending->cb.cbl_sid = lsid;
++	list_add(&pending->clr_perclnt, arg->todolist);
++	arg->todo_count++;
++	return 0;
++}
++
++/* Create a layoutrecall structure for each client based on the
++ * original structure. */
++int
++create_layout_recall_list(struct list_head *todolist, unsigned *todo_len,
++			  struct nfsd4_pnfs_cb_layout *cbl,
++			  struct nfs4_file *lrfile)
++{
++	struct nfs4_client *clp;
++	struct create_recall_list_arg arg = {
++		.cbl = cbl,
++		.lrfile = lrfile,
++		.todolist = todolist,
++	};
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* If client given by fs, just do single client */
++	if (cbl->cbl_seg.clientid) {
++		clp = find_confirmed_client(
++				(clientid_t *)&cbl->cbl_seg.clientid);
++		if (!clp) {
++			status = -ENOENT;
++			dprintk("%s: clientid %llx not found\n", __func__,
++				(unsigned long long)cbl->cbl_seg.clientid);
++			goto out;
++		}
++
++		status = lo_recall_per_client(clp, &arg);
++	} else {
++		/* Check all clients for layout matches */
++		status = filter_confirmed_clients(lo_recall_per_client, &arg);
++	}
++
++out:
++	*todo_len = arg.todo_count;
++	dprintk("%s: <-- list len %u status %d\n", __func__, *todo_len, status);
++	return status;
++}
++
++/*
++ * Recall layouts asynchronously
++ * Called with state lock.
++ */
++static int
++spawn_layout_recall(struct super_block *sb, struct list_head *todolist,
++		    unsigned todo_len)
++{
++	struct nfs4_layoutrecall *pending;
++	struct nfs4_layoutrecall *parent = NULL;
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	if (todo_len > 1) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++
++		parent = alloc_init_layoutrecall(&pending->cb, NULL,
++						 pending->clr_file);
++		if (unlikely(!parent)) {
++			/* We want forward progress. If parent cannot be
++			 * allocated take the first one as parent but don't
++			 * execute it.  Caller must check for -EAGAIN, if so
++			 * When the partial recalls return,
++			 * nfsd_layout_recall_cb should be called again.
++			 */
++			list_del_init(&pending->clr_perclnt);
++			if (todo_len > 2) {
++				parent = pending;
++			} else {
++				parent = NULL;
++				put_layoutrecall(pending);
++			}
++			--todo_len;
++				status = -ENOMEM;
++		}
++	}
++
++	while (!list_empty(todolist)) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++		list_del_init(&pending->clr_perclnt);
++		dprintk("%s: clp %p cb_client %p fp %p\n", __func__,
++			pending->clr_client,
++			pending->clr_client->cl_cb_client,
++			pending->clr_file);
++		if (unlikely(!pending->clr_client->cl_cb_client)) {
++			printk(KERN_INFO
++				"%s: clientid %08x/%08x has no callback path\n",
++				__func__,
++				pending->clr_client->cl_clientid.cl_boot,
++				pending->clr_client->cl_clientid.cl_id);
++			put_layoutrecall(pending);
++			continue;
++		}
++
++		pending->clr_time = CURRENT_TIME;
++		pending->clr_sb = sb;
++		if (parent) {
++			/* If we created a parent its initial ref count is 1.
++			 * We will need to de-ref it eventually. So we just
++			 * don't increment on behalf of the last one.
++			 */
++			if (todo_len != 1)
++				get_layoutrecall(parent);
++		}
++		pending->parent = parent;
++		get_layoutrecall(pending);
++		/* Add to list so corresponding layoutreturn can find req */
++		list_add(&pending->clr_perclnt,
++			 &pending->clr_client->cl_layoutrecalls);
++
++		nfsd4_cb_layout(pending);
++		--todo_len;
++	}
++
++	return status;
++}
++
++/*
++ * Spawn a thread to perform a recall layout
++ *
++ */
++int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,
++			  struct nfsd4_pnfs_cb_layout *cbl)
++{
++	int status;
++	struct nfs4_file *lrfile = NULL;
++	struct list_head todolist;
++	unsigned todo_len = 0;
++
++	dprintk("NFSD nfsd_layout_recall_cb: inode %p cbl %p\n", inode, cbl);
++	BUG_ON(!cbl);
++	BUG_ON(cbl->cbl_recall_type != RETURN_FILE &&
++	       cbl->cbl_recall_type != RETURN_FSID &&
++	       cbl->cbl_recall_type != RETURN_ALL);
++	BUG_ON(cbl->cbl_recall_type == RETURN_FILE && !inode);
++	BUG_ON(cbl->cbl_seg.iomode != IOMODE_READ &&
++	       cbl->cbl_seg.iomode != IOMODE_RW &&
++	       cbl->cbl_seg.iomode != IOMODE_ANY);
++
++	if (nfsd_serv == NULL) {
++		dprintk("NFSD nfsd_layout_recall_cb: nfsd_serv == NULL\n");
++		return -ENOENT;
++	}
++
++	nfs4_lock_state();
++	status = -ENOENT;
++	if (inode) {
++		lrfile = find_file(inode);
++		if (!lrfile) {
++			dprintk("NFSD nfsd_layout_recall_cb: "
++				"nfs4_file not found\n");
++			goto err;
++		}
++		if (cbl->cbl_recall_type == RETURN_FSID)
++			cbl->cbl_fsid = lrfile->fi_fsid;
++	}
++
++	INIT_LIST_HEAD(&todolist);
++
++	/* If no cookie provided by FS, return a default one */
++	if (!cbl->cbl_cookie)
++		cbl->cbl_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++
++	status = create_layout_recall_list(&todolist, &todo_len, cbl, lrfile);
++	if (list_empty(&todolist)) {
++		status = -ENOENT;
++	} else {
++		/* process todolist even if create_layout_recall_list
++		 * returned an error */
++		int status2 = spawn_layout_recall(sb, &todolist, todo_len);
++		if (status2)
++			status = status2;
++	}
++
++err:
++	nfs4_unlock_state();
++	if (lrfile)
++		put_nfs4_file(lrfile);
++	return (todo_len && status) ? -EAGAIN : status;
++}
++
++struct create_device_notify_list_arg {
++	struct list_head *todolist;
++	struct nfsd4_pnfs_cb_dev_list *ndl;
++};
++
++static int
++create_device_notify_per_cl(struct nfs4_client *clp, void *p)
++{
++	struct nfs4_notify_device *cbnd;
++	struct create_device_notify_list_arg *arg = p;
++
++	if (atomic_read(&clp->cl_deviceref) <= 0)
++		return 0;
++
++	cbnd = kmalloc(sizeof(*cbnd), GFP_KERNEL);
++	if (!cbnd)
++		return -ENOMEM;
++
++	cbnd->nd_list = arg->ndl;
++	cbnd->nd_client = clp;
++	list_add(&cbnd->nd_perclnt, arg->todolist);
++	return 0;
++}
++
++/* Create a list of clients to send device notifications. */
++int
++create_device_notify_list(struct list_head *todolist,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	int status;
++	struct create_device_notify_list_arg arg = {
++		.todolist = todolist,
++		.ndl = ndl,
++	};
++
++	nfs4_lock_state();
++	status = filter_confirmed_clients(create_device_notify_per_cl, &arg);
++	nfs4_unlock_state();
++
++	return status;
++}
++
++/*
++ * For each client that a device, send a device notification.
++ * XXX: Need to track which clients have which devices.
++ */
++int nfsd_device_notify_cb(struct super_block *sb,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	struct nfs4_notify_device *cbnd;
++	unsigned int notify_num = 0;
++	int status2, status = 0;
++	struct list_head todolist;
++
++	BUG_ON(!ndl || ndl->cbd_len == 0 || !ndl->cbd_list);
++
++	dprintk("NFSD %s: cbl %p len %u\n", __func__, ndl, ndl->cbd_len);
++
++	if (nfsd_serv == NULL)
++		return -ENOENT;
++
++	INIT_LIST_HEAD(&todolist);
++
++	status = create_device_notify_list(&todolist, ndl);
++
++	while (!list_empty(&todolist)) {
++		cbnd = list_entry(todolist.next, struct nfs4_notify_device,
++				  nd_perclnt);
++		list_del_init(&cbnd->nd_perclnt);
++		status2 = nfsd4_cb_notify_device(cbnd);
++		pnfs_clear_device_notify(cbnd->nd_client);
++		if (status2) {
++			kfree(cbnd);
++			status = status2;
++		}
++		notify_num++;
++	}
++
++	dprintk("NFSD %s: status %d clients %u\n",
++		__func__, status, notify_num);
++	return status;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
+@@ -0,0 +1,461 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++
++#include <linux/nfs4.h>
++#include <linux/nfsd/const.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/sunrpc/clnt.h>
++
++#include "nfsfh.h"
++#include "nfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Just use a linked list. Do not expect more than 32 dlm_device_entries
++ * the first implementation will just use one device per cluster file system
++ */
++
++static LIST_HEAD(dlm_device_list);
++static DEFINE_SPINLOCK(dlm_device_list_lock);
++
++struct dlm_device_entry {
++	struct list_head	dlm_dev_list;
++	char			disk_name[DISK_NAME_LEN];
++	int			num_ds;
++	char			ds_list[NFSD_DLM_DS_LIST_MAX];
++};
++
++static struct dlm_device_entry *
++_nfsd4_find_pnfs_dlm_device(char *disk_name)
++{
++	struct dlm_device_entry *dlm_pdev;
++
++	dprintk("--> %s  disk name %s\n", __func__, disk_name);
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
++		dprintk("%s Look for dlm_pdev %s\n", __func__,
++			dlm_pdev->disk_name);
++		if (!memcmp(dlm_pdev->disk_name, disk_name, strlen(disk_name))) {
++			spin_unlock(&dlm_device_list_lock);
++			return dlm_pdev;
++		}
++	}
++	spin_unlock(&dlm_device_list_lock);
++	return NULL;
++}
++
++static struct dlm_device_entry *
++nfsd4_find_pnfs_dlm_device(struct super_block *sb) {
++	char dname[BDEVNAME_SIZE];
++
++	bdevname(sb->s_bdev, dname);
++	return _nfsd4_find_pnfs_dlm_device(dname);
++}
++
++ssize_t
++nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen)
++{
++	char *pos = buf;
++	ssize_t size = 0;
++	struct dlm_device_entry *dlm_pdev;
++	int ret = -EINVAL;
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list)
++	{
++		int advanced;
++		advanced = snprintf(pos, buflen - size, "%s:%s\n", dlm_pdev->disk_name, dlm_pdev->ds_list);
++		if (advanced >= buflen - size)
++			goto out;
++		size += advanced;
++		pos += advanced;
++	}
++	ret = size;
++
++out:
++	spin_unlock(&dlm_device_list_lock);
++	return ret;
++}
++
++bool nfsd4_validate_pnfs_dlm_device(char *ds_list, int *num_ds)
++{
++	char *start = ds_list;
++
++	*num_ds = 0;
++
++	while (*start) {
++		struct sockaddr_storage tempAddr;
++		int ipLen = strcspn(start, ",");
++
++		if (!rpc_pton(start, ipLen, (struct sockaddr *)&tempAddr, sizeof(tempAddr)))
++			return false;
++		(*num_ds)++;
++		start += ipLen + 1;
++	}
++	return true;
++}
++
++/*
++ * pnfs_dlm_device string format:
++ *     block-device-path:<ds1 ipv4 address>,<ds2 ipv4 address>
++ *
++ * Examples
++ *     /dev/sda:192.168.1.96,192.168.1.97' creates a data server list with
++ *     two data servers for the dlm cluster file system mounted on /dev/sda.
++ *
++ *     /dev/sda:192.168.1.96,192.168.1.100'
++ *     replaces the data server list for /dev/sda
++ *
++ *     Only the deviceid == 1 is supported. Can add device id to
++ *     pnfs_dlm_device string when needed.
++ *
++ *     Only the round robin each data server once stripe index is supported.
++ */
++int
++nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
++
++{
++	struct dlm_device_entry *new, *found;
++	char *bufp = pnfs_dlm_device;
++	char *endp = bufp + strlen(bufp);
++	int err = -ENOMEM;
++
++	dprintk("--> %s len %d\n", __func__, len);
++
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return err;
++
++	err = -EINVAL;
++	/* disk_name */
++	/* FIXME: need to check for valid disk_name. search superblocks?
++	 * check for slash dev slash ?
++	 */
++	len = strcspn(bufp, ":");
++	if (len > DISK_NAME_LEN)
++		goto out_free;
++	memcpy(new->disk_name, bufp, len);
++
++	err = -EINVAL;
++	bufp += len + 1;
++	if (bufp >= endp)
++		goto out_free;
++
++	/* data server list */
++	/* FIXME: need to check for comma separated valid ip format */
++	len = strcspn(bufp, ":");
++	if (len > NFSD_DLM_DS_LIST_MAX)
++		goto out_free;
++	memcpy(new->ds_list, bufp, len);
++
++
++	/*  validate the ips */
++	if (!nfsd4_validate_pnfs_dlm_device(new->ds_list, &(new->num_ds)))
++		goto out_free;
++
++	dprintk("%s disk_name %s num_ds %d ds_list %s\n", __func__,
++		new->disk_name, new->num_ds, new->ds_list);
++
++	found = _nfsd4_find_pnfs_dlm_device(new->disk_name);
++	if (found) {
++		/* FIXME: should compare found->ds_list with new->ds_list
++		 * and if it is different, kick off a CB_NOTIFY change
++		 * deviceid.
++		 */
++		dprintk("%s pnfs_dlm_device %s:%s already in cache "
++			" replace ds_list with new ds_list %s\n", __func__,
++			found->disk_name, found->ds_list, new->ds_list);
++		memset(found->ds_list, 0, DISK_NAME_LEN);
++		memcpy(found->ds_list, new->ds_list, strlen(new->ds_list));
++		found->num_ds = new->num_ds;
++		kfree(new);
++	} else {
++		dprintk("%s Adding pnfs_dlm_device %s:%s\n", __func__,
++				new->disk_name, new->ds_list);
++		spin_lock(&dlm_device_list_lock);
++		list_add(&new->dlm_dev_list, &dlm_device_list);
++		spin_unlock(&dlm_device_list_lock);
++	}
++	dprintk("<-- %s Success\n", __func__);
++	return 0;
++
++out_free:
++	kfree(new);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++void nfsd4_pnfs_dlm_shutdown(void)
++{
++	struct dlm_device_entry *dlm_pdev, *next;
++
++	dprintk("--> %s\n", __func__);
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry_safe (dlm_pdev, next, &dlm_device_list,
++				  dlm_dev_list) {
++		list_del(&dlm_pdev->dlm_dev_list);
++		kfree(dlm_pdev);
++	}
++	spin_unlock(&dlm_device_list_lock);
++}
++
++static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
++				     u32 layout_type,
++				     struct nfsd4_pnfs_dev_iter_res *res)
++{
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		printk(KERN_ERR "%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return -ENOTSUPP;
++	}
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++	return 0;
++}
++
++static int nfsd4_pnfs_dlm_getdevinfo(struct super_block *sb,
++				     struct exp_xdr_stream *xdr,
++				     u32 layout_type,
++				     const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err, len, i = 0;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_devaddr *daddr;
++	struct dlm_device_entry *dlm_pdev;
++	char   *bufp;
++
++	err = -ENOTSUPP;
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		dprintk("%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return err;
++	}
++
++	/* We only hand out a deviceid of 1 in LAYOUTGET, so a GETDEVICEINFO
++	 * with a gdia_device_id != 1 is invalid.
++	 */
++	err = -EINVAL;
++	if (devid->devid != 1) {
++		dprintk("%s: WARNING: didn't receive a deviceid of "
++			"1 (got: 0x%llx)\n", __func__, devid->devid);
++		return err;
++	}
++
++	/*
++	 * If the DS list has not been established, return -EINVAL
++	 */
++	dlm_pdev = nfsd4_find_pnfs_dlm_device(sb);
++	if (!dlm_pdev) {
++		dprintk("%s: DEBUG: disk %s Not Found\n", __func__,
++			sb->s_bdev->bd_disk->disk_name);
++		return err;
++	}
++
++	dprintk("%s: Found disk %s with DS list |%s|\n",
++		__func__, dlm_pdev->disk_name, dlm_pdev->ds_list);
++
++	memset(&fdev, '\0', sizeof(fdev));
++	fdev.fl_device_length = dlm_pdev->num_ds;
++
++	err = -ENOMEM;
++	len = sizeof(*fdev.fl_device_list) * fdev.fl_device_length;
++	fdev.fl_device_list = kzalloc(len, GFP_KERNEL);
++	if (!fdev.fl_device_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a device list "
++			"buffer for %d DSes.\n", __func__, i);
++		fdev.fl_device_length = 0;
++		goto out;
++	}
++
++	/* Set a simple stripe indicie */
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = kzalloc(sizeof(u32) *
++				     fdev.fl_stripeindices_length, GFP_KERNEL);
++
++	if (!fdev.fl_stripeindices_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a stripeindices "
++			"list buffer for %d DSes.\n", __func__, i);
++		goto out;
++	}
++	for (i = 0; i < fdev.fl_stripeindices_length; i++)
++		fdev.fl_stripeindices_list[i] = i;
++
++	/* Transfer the data server list with a single multipath entry */
++	bufp = dlm_pdev->ds_list;
++	for (i = 0; i < fdev.fl_device_length; i++) {
++		daddr = kmalloc(sizeof(*daddr), GFP_KERNEL);
++		if (!daddr) {
++			printk(KERN_ERR "%s: ERROR: unable to kmalloc a device "
++				"addr buffer.\n", __func__);
++			goto out;
++		}
++
++		daddr->r_netid.data = "tcp";
++		daddr->r_netid.len = 3;
++
++		len = strcspn(bufp, ",");
++		daddr->r_addr.data = kmalloc(len + 4, GFP_KERNEL);
++		memcpy(daddr->r_addr.data, bufp, len);
++		/*
++		 * append the port number.  interpreted as two more bytes
++		 * beyond the quad: ".8.1" -> 0x08.0x01 -> 0x0801 = port 2049.
++		 */
++		memcpy(daddr->r_addr.data + len, ".8.1", 4);
++		daddr->r_addr.len = len + 4;
++
++		fdev.fl_device_list[i].fl_multipath_length = 1;
++		fdev.fl_device_list[i].fl_multipath_list = daddr;
++
++		dprintk("%s: encoding DS |%s|\n", __func__, bufp);
++
++		bufp += len + 1;
++	}
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	for (i = 0; i < fdev.fl_device_length; i++)
++		kfree(fdev.fl_device_list[i].fl_multipath_list);
++	kfree(fdev.fl_device_list);
++	kfree(fdev.fl_stripeindices_list);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize >= NFSSVC_MAXBLKSIZE)
++		return blocksize;
++	return NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++}
++
++/*
++ * Look up inode block device in pnfs_dlm_device list.
++ * Hash on the inode->i_ino and number of data servers.
++ */
++static int dlm_ino_hash(struct inode *ino)
++{
++	struct dlm_device_entry *de;
++	u32 hash_mask = 0;
++
++	/* If can't find the inode block device in the pnfs_dlm_deivce list
++	 * then don't hand out a layout
++	 */
++	de = nfsd4_find_pnfs_dlm_device(ino->i_sb);
++	if (!de)
++		return -1;
++	hash_mask = de->num_ds - 1;
++	return ino->i_ino & hash_mask;
++}
++
++static enum nfsstat4 nfsd4_pnfs_dlm_layoutget(struct inode *inode,
++			   struct exp_xdr_stream *xdr,
++			   const struct nfsd4_pnfs_layoutget_arg *args,
++			   struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++	int index;
++	enum nfsstat4 rc = NFS4_OK;
++
++	dprintk("%s: LAYOUT_GET\n", __func__);
++
++	/* DLM exported file systems only support layouts for READ */
++	if (res->lg_seg.iomode == IOMODE_RW)
++		return NFS4ERR_BADIOMODE;
++
++	index = dlm_ino_hash(inode);
++	dprintk("%s first stripe index %d i_ino %lu\n", __func__, index,
++		inode->i_ino);
++	if (index < 0)
++		return NFS4ERR_LAYOUTUNAVAILABLE;
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	/* Always give out whole file layouts */
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	/* Always give out READ ONLY layouts */
++	res->lg_seg.iomode = IOMODE_READ;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = false;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = args->lg_sbid;
++	layout->device_id.devid = 1;                                /*FSFTEMP*/
++	layout->lg_first_stripe_index = index;                      /*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	memcpy(fhp, args->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++/* For use by DLM cluster file systems exported by pNFSD */
++const struct pnfs_export_operations pnfs_dlm_export_ops = {
++	.layout_type = nfsd4_pnfs_dlm_layouttype,
++	.get_device_info = nfsd4_pnfs_dlm_getdevinfo,
++	.get_device_iter = nfsd4_pnfs_dlm_getdeviter,
++	.layout_get = nfsd4_pnfs_dlm_layoutget,
++};
++EXPORT_SYMBOL(pnfs_dlm_export_ops);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
+@@ -0,0 +1,620 @@
++/*
++*  linux/fs/nfsd/nfs4pnfsds.c
++*
++*  Copyright (c) 2005 The Regents of the University of Michigan.
++*  All rights reserved.
++*
++*  Andy Adamson <andros@umich.edu>
++*
++*  Redistribution and use in source and binary forms, with or without
++*  modification, are permitted provided that the following conditions
++*  are met:
++*
++*  1. Redistributions of source code must retain the above copyright
++*     notice, this list of conditions and the following disclaimer.
++*  2. Redistributions in binary form must reproduce the above copyright
++*     notice, this list of conditions and the following disclaimer in the
++*     documentation and/or other materials provided with the distribution.
++*  3. Neither the name of the University nor the names of its
++*     contributors may be used to endorse or promote products derived
++*     from this software without specific prior written permission.
++*
++*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*
++*/
++#if defined(CONFIG_PNFSD)
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++#include <linux/param.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/debug.h>
++#include <linux/nfs4.h>
++#include <linux/exportfs.h>
++#include <linux/sched.h>
++
++#include "nfsd.h"
++#include "pnfsd.h"
++#include "state.h"
++
++/*
++ *******************
++ *   	 PNFS
++ *******************
++ */
++/*
++ * Hash tables for pNFS Data Server state
++ *
++ * mds_nodeid:	list of struct pnfs_mds_id one per Metadata server (MDS) using
++ *		this data server (DS).
++ *
++ * mds_clid_hashtbl[]: uses clientid_hashval(), hash of all clientids obtained
++ *			from any MDS.
++ *
++ * ds_stid_hashtbl[]: uses stateid_hashval(), hash of all stateids obtained
++ *			from any MDS.
++ *
++ */
++/* Hash tables for clientid state */
++#define CLIENT_HASH_BITS                 4
++#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
++#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
++
++#define clientid_hashval(id) \
++	((id) & CLIENT_HASH_MASK)
++
++/* hash table for pnfs_ds_stateid */
++#define STATEID_HASH_BITS              10
++#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
++#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
++
++#define stateid_hashval(owner_id, file_id)  \
++	(((owner_id) + (file_id)) & STATEID_HASH_MASK)
++
++static struct list_head mds_id_tbl;
++static struct list_head mds_clid_hashtbl[CLIENT_HASH_SIZE];
++static struct list_head ds_stid_hashtbl[STATEID_HASH_SIZE];
++
++static inline void put_ds_clientid(struct pnfs_ds_clientid *dcp);
++static inline void put_ds_mdsid(struct pnfs_mds_id *mdp);
++
++/* Mutex for data server state.  Needs to be separate from
++ * mds state mutex since a node can be both mds and ds */
++static DEFINE_MUTEX(ds_mutex);
++static struct thread_info *ds_mutex_owner;
++
++static void
++ds_lock_state(void)
++{
++	mutex_lock(&ds_mutex);
++	ds_mutex_owner = current_thread_info();
++}
++
++static void
++ds_unlock_state(void)
++{
++	BUG_ON(ds_mutex_owner != current_thread_info());
++	ds_mutex_owner = NULL;
++	mutex_unlock(&ds_mutex);
++}
++
++static int
++cmp_clid(const clientid_t *cl1, const clientid_t *cl2)
++{
++	return (cl1->cl_boot == cl2->cl_boot) &&
++	       (cl1->cl_id == cl2->cl_id);
++}
++
++void
++nfs4_pnfs_state_init(void)
++{
++	int i;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&mds_clid_hashtbl[i]);
++
++	for (i = 0; i < STATEID_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&ds_stid_hashtbl[i]);
++
++	INIT_LIST_HEAD(&mds_id_tbl);
++}
++
++static struct pnfs_mds_id *
++find_pnfs_mds_id(u32 mdsid)
++{
++	struct pnfs_mds_id *local = NULL;
++
++	dprintk("pNFSD: %s\n", __func__);
++	list_for_each_entry(local, &mds_id_tbl, di_hash) {
++		if (local->di_mdsid == mdsid)
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_clientid *
++find_pnfs_ds_clientid(const clientid_t *clid)
++{
++	struct pnfs_ds_clientid *local = NULL;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = clientid_hashval(clid->cl_id);
++	list_for_each_entry(local, &mds_clid_hashtbl[hashval], dc_hash) {
++		if (cmp_clid(&local->dc_mdsclid, clid))
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_stateid *
++find_pnfs_ds_stateid(stateid_t *stid)
++{
++	struct pnfs_ds_stateid *local = NULL;
++	u32 st_id = stid->si_stateownerid;
++	u32 f_id = stid->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_for_each_entry(local, &ds_stid_hashtbl[hashval], ds_hash)
++		if ((local->ds_stid.si_stateownerid == st_id) &&
++				(local->ds_stid.si_fileid == f_id) &&
++				(local->ds_stid.si_boot == stid->si_boot)) {
++			stateid_t *sid = &local->ds_stid;
++			dprintk("NFSD: %s <-- %p ds_flags %lx " STATEID_FMT "\n",
++				__func__, local, local->ds_flags,
++				STATEID_VAL(sid));
++			return local;
++		}
++	return NULL;
++}
++
++static void
++release_ds_mdsid(struct kref *kref)
++{
++	struct pnfs_mds_id *mdp =
++		container_of(kref, struct pnfs_mds_id, di_ref);
++	dprintk("pNFSD: %s\n", __func__);
++
++	list_del(&mdp->di_hash);
++	list_del(&mdp->di_mdsclid);
++	kfree(mdp);
++}
++
++static void
++release_ds_clientid(struct kref *kref)
++{
++	struct pnfs_ds_clientid *dcp =
++		container_of(kref, struct pnfs_ds_clientid, dc_ref);
++	struct pnfs_mds_id *mdp;
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(dcp->dc_mdsid);
++	if (mdp)
++		put_ds_mdsid(mdp);
++
++	list_del(&dcp->dc_hash);
++	list_del(&dcp->dc_stateid);
++	list_del(&dcp->dc_permdsid);
++	kfree(dcp);
++}
++
++static void
++release_ds_stateid(struct kref *kref)
++{
++	struct pnfs_ds_stateid *dsp =
++		container_of(kref, struct pnfs_ds_stateid, ds_ref);
++	struct pnfs_ds_clientid *dcp;
++	dprintk("pNFS %s: dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid(&dsp->ds_mdsclid);
++	if (dcp)
++		put_ds_clientid(dcp);
++
++	list_del(&dsp->ds_hash);
++	list_del(&dsp->ds_perclid);
++	kfree(dsp);
++}
++
++static inline void
++put_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_put(&dcp->dc_ref, release_ds_clientid);
++}
++
++static inline void
++get_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_get(&dcp->dc_ref);
++}
++
++static inline void
++put_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_put(&mdp->di_ref, release_ds_mdsid);
++}
++
++static inline void
++get_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_get(&mdp->di_ref);
++}
++
++static inline void
++put_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_put(&dsp->ds_ref, release_ds_stateid);
++}
++
++static inline void
++get_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_get(&dsp->ds_ref);
++}
++
++void
++nfs4_pnfs_state_shutdown(void)
++{
++	struct pnfs_ds_stateid *dsp;
++	int i;
++
++	dprintk("pNFSD %s: -->\n", __func__);
++
++	ds_lock_state();
++	for (i = 0; i < STATEID_HASH_SIZE; i++) {
++		while (!list_empty(&ds_stid_hashtbl[i])) {
++			dsp = list_entry(ds_stid_hashtbl[i].next,
++					 struct pnfs_ds_stateid, ds_hash);
++			put_ds_stateid(dsp);
++		}
++	}
++	ds_unlock_state();
++}
++
++static struct pnfs_mds_id *
++alloc_init_mds_id(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = kmalloc(sizeof(*mdp), GFP_KERNEL);
++	if (!mdp)
++		return NULL;
++	INIT_LIST_HEAD(&mdp->di_hash);
++	INIT_LIST_HEAD(&mdp->di_mdsclid);
++	list_add(&mdp->di_hash, &mds_id_tbl);
++	mdp->di_mdsid = gsp->dsid;
++	mdp->di_mdsboot = 0;
++	kref_init(&mdp->di_ref);
++	return mdp;
++}
++
++static struct pnfs_ds_clientid *
++alloc_init_ds_clientid(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++	struct pnfs_ds_clientid *dcp;
++	clientid_t *clid = (clientid_t *)&gsp->clid;
++	unsigned int hashval = clientid_hashval(clid->cl_id);
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(gsp->dsid);
++	if (!mdp) {
++		mdp = alloc_init_mds_id(gsp);
++		if (!mdp)
++			return NULL;
++	} else {
++		get_ds_mdsid(mdp);
++	}
++
++	dcp = kmalloc(sizeof(*dcp), GFP_KERNEL);
++	if (!dcp)
++		return NULL;
++
++	INIT_LIST_HEAD(&dcp->dc_hash);
++	INIT_LIST_HEAD(&dcp->dc_stateid);
++	INIT_LIST_HEAD(&dcp->dc_permdsid);
++	list_add(&dcp->dc_hash, &mds_clid_hashtbl[hashval]);
++	list_add(&dcp->dc_permdsid, &mdp->di_mdsclid);
++	dcp->dc_mdsclid = *clid;
++	kref_init(&dcp->dc_ref);
++	dcp->dc_mdsid = gsp->dsid;
++	return dcp;
++}
++
++static struct pnfs_ds_stateid *
++alloc_init_ds_stateid(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct pnfs_ds_stateid *dsp;
++	u32 st_id = stidp->si_stateownerid;
++	u32 f_id  = stidp->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	dsp = kmalloc(sizeof(*dsp), GFP_KERNEL);
++	if (!dsp)
++		return dsp;
++
++	INIT_LIST_HEAD(&dsp->ds_hash);
++	INIT_LIST_HEAD(&dsp->ds_perclid);
++	memcpy(&dsp->ds_stid, stidp, sizeof(stateid_t));
++	fh_copy_shallow(&dsp->ds_fh, &cfh->fh_handle);
++	dsp->ds_access = 0;
++	dsp->ds_status = 0;
++	dsp->ds_flags = 0L;
++	kref_init(&dsp->ds_ref);
++	set_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	clear_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	init_waitqueue_head(&dsp->ds_waitq);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_add(&dsp->ds_hash, &ds_stid_hashtbl[hashval]);
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++static int
++update_ds_stateid(struct pnfs_ds_stateid *dsp, struct svc_fh *cfh,
++		  struct pnfs_get_state *gsp)
++{
++	struct pnfs_ds_clientid *dcp;
++	int new = 0;
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid((clientid_t *)&gsp->clid);
++	if (!dcp) {
++		dcp = alloc_init_ds_clientid(gsp);
++		if (!dcp)
++			return 1;
++		new = 1;
++	}
++	if (test_bit(DS_STATEID_NEW, &dsp->ds_flags)) {
++		list_add(&dsp->ds_perclid, &dcp->dc_stateid);
++		if (!new)
++			get_ds_clientid(dcp);
++	}
++
++	memcpy(&dsp->ds_stid, &gsp->stid, sizeof(stateid_t));
++	dsp->ds_access = gsp->access;
++	dsp->ds_status = 0;
++	dsp->ds_verifier[0] = gsp->verifier[0];
++	dsp->ds_verifier[1] = gsp->verifier[1];
++	memcpy(&dsp->ds_mdsclid, &gsp->clid, sizeof(clientid_t));
++	set_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	clear_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	return 0;
++}
++
++int
++nfs4_pnfs_cb_change_state(struct pnfs_get_state *gs)
++{
++	stateid_t *stid = (stateid_t *)&gs->stid;
++	struct pnfs_ds_stateid *dsp;
++
++	dprintk("pNFSD: %s stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stid));
++
++	ds_lock_state();
++	dsp = find_pnfs_ds_stateid(stid);
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	if (dsp)
++		return 0;
++	return -ENOENT;
++}
++
++/* Retrieves and validates stateid.
++ * If stateid exists and its fields match, return it.
++ * If stateid exists but either the generation or
++ * ownerids don't match, check with mds to see if it is valid.
++ * If the stateid doesn't exist, the first thread creates a
++ * invalid *marker* stateid, then checks to see if the
++ * stateid exists on the mds.  If so, it validates the *marker*
++ * stateid and updates its fields.  Subsequent threads that
++ * find the *marker* stateid wait until it is valid or an error
++ * occurs.
++ * Called with ds_state_lock.
++ */
++static struct pnfs_ds_stateid *
++nfsv4_ds_get_state(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct inode *ino = cfh->fh_dentry->d_inode;
++	struct super_block *sb;
++	struct pnfs_ds_stateid *dsp = NULL;
++	struct pnfs_get_state gs = {
++		.access = 0,
++	};
++	int status = 0, waiter = 0;
++
++	dprintk("pNFSD: %s -->\n", __func__);
++
++	dsp = find_pnfs_ds_stateid(stidp);
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags) &&
++	    (stidp->si_generation == dsp->ds_stid.si_generation))
++		goto out_noput;
++
++	sb = ino->i_sb;
++	if (!sb || !sb->s_pnfs_op->get_state)
++		goto out_noput;
++
++	/* Uninitialize current state if it exists yet it doesn't match.
++	 * If it is already invalid, another thread is checking state */
++	if (dsp) {
++		if (!test_and_clear_bit(DS_STATEID_VALID, &dsp->ds_flags))
++			waiter = 1;
++	} else {
++		dsp = alloc_init_ds_stateid(cfh, stidp);
++		if (!dsp)
++			goto out_noput;
++	}
++
++	dprintk("pNFSD: %s Starting loop\n", __func__);
++	get_ds_stateid(dsp);
++	while (!test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		ds_unlock_state();
++
++		/* Another thread is checking the state */
++		if (waiter) {
++			dprintk("pNFSD: %s waiting\n", __func__);
++			wait_event_interruptible_timeout(dsp->ds_waitq,
++				(test_bit(DS_STATEID_VALID, &dsp->ds_flags) ||
++				 test_bit(DS_STATEID_ERROR, &dsp->ds_flags)),
++				 msecs_to_jiffies(1024));
++			dprintk("pNFSD: %s awake\n", __func__);
++			ds_lock_state();
++			if (test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++				goto out;
++
++			continue;
++		}
++
++		/* Validate stateid on mds */
++		dprintk("pNFSD: %s Checking state on MDS\n", __func__);
++		memcpy(&gs.stid, stidp, sizeof(stateid_t));
++		status = sb->s_pnfs_op->get_state(ino, &cfh->fh_handle, &gs);
++		dprintk("pNFSD: %s from MDS status %d\n", __func__, status);
++		ds_lock_state();
++		/* if !status and stateid is valid, update id and mark valid */
++		if (status || update_ds_stateid(dsp, cfh, &gs)) {
++			set_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++			/* remove invalid stateid from list */
++			put_ds_stateid(dsp);
++			wake_up(&dsp->ds_waitq);
++			goto out;
++		}
++
++		wake_up(&dsp->ds_waitq);
++	}
++out:
++	if (dsp)
++		put_ds_stateid(dsp);
++out_noput:
++	if (dsp)
++		dprintk("pNFSD: %s <-- dsp %p ds_flags %lx " STATEID_FMT "\n",
++			__func__, dsp, dsp->ds_flags, STATEID_VAL(&dsp->ds_stid));
++	/* If error, return null */
++	if (dsp && test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++		dsp = NULL;
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++int
++nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *cfh, stateid_t *stateid)
++{
++	struct pnfs_ds_stateid *dsp;
++	int status = 0;
++
++	dprintk("pNFSD: %s --> " STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	/* Must release state lock while verifying stateid on mds */
++	nfs4_unlock_state();
++	ds_lock_state();
++	dsp = nfsv4_ds_get_state(cfh, stateid);
++	if (dsp) {
++		get_ds_stateid(dsp);
++		dprintk("pNFSD: %s Found " STATEID_FMT "\n", __func__,
++			STATEID_VAL(&dsp->ds_stid));
++
++		dprintk("NFSD: %s: dsp %p fh_size %u:%u "
++			"fh [%08x:%08x:%08x:%08x]:[%08x:%08x:%08x:%08x] "
++			"gen %x:%x\n",
++			__func__, dsp,
++			cfh->fh_handle.fh_size, dsp->ds_fh.fh_size,
++			((unsigned *)&cfh->fh_handle.fh_base)[0],
++			((unsigned *)&cfh->fh_handle.fh_base)[1],
++			((unsigned *)&cfh->fh_handle.fh_base)[2],
++			((unsigned *)&cfh->fh_handle.fh_base)[3],
++			((unsigned *)&dsp->ds_fh.fh_base)[0],
++			((unsigned *)&dsp->ds_fh.fh_base)[1],
++			((unsigned *)&dsp->ds_fh.fh_base)[2],
++			((unsigned *)&dsp->ds_fh.fh_base)[3],
++			stateid->si_generation, dsp->ds_stid.si_generation);
++	}
++
++	if (!dsp ||
++	    (cfh->fh_handle.fh_size != dsp->ds_fh.fh_size) ||
++	    (memcmp(&cfh->fh_handle.fh_base, &dsp->ds_fh.fh_base,
++		    dsp->ds_fh.fh_size) != 0) ||
++	    (stateid->si_generation > dsp->ds_stid.si_generation))
++		status = nfserr_bad_stateid;
++	else if (stateid->si_generation < dsp->ds_stid.si_generation)
++		status = nfserr_old_stateid;
++
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++	nfs4_lock_state();
++	dprintk("pNFSD: %s <-- status %d\n", __func__, be32_to_cpu(status));
++	return status;
++}
++
++void
++nfs4_ds_get_verifier(stateid_t *stateid, struct super_block *sb, u32 *p)
++{
++	struct pnfs_ds_stateid *dsp = NULL;
++
++	dprintk("pNFSD: %s --> stid %p\n", __func__, stateid);
++
++	ds_lock_state();
++	if (stateid != NULL) {
++		dsp = find_pnfs_ds_stateid(stateid);
++		if (dsp)
++			get_ds_stateid(dsp);
++	}
++
++	/* XXX: Should we fetch the stateid or wait if some other
++	 * thread is currently retrieving the stateid ? */
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		*p++ = dsp->ds_verifier[0];
++		*p++ = dsp->ds_verifier[1];
++		put_ds_stateid(dsp);
++	} else {
++		/* must be on MDS */
++		ds_unlock_state();
++		sb->s_pnfs_op->get_verifier(sb, p);
++		ds_lock_state();
++		p += 2;
++	}
++	ds_unlock_state();
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
+@@ -34,10 +34,14 @@
+  */
+ #include <linux/file.h>
+ #include <linux/slab.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "cache.h"
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_PROC
+ 
+@@ -372,6 +376,24 @@ nfsd4_open(struct svc_rqst *rqstp, struc
+ 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ 	 */
+ 	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
++#if defined(CONFIG_SPNFS)
++	if (!status && spnfs_enabled()) {
++		struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
++
++		status = spnfs_open(inode, open);
++		if (status) {
++			dprintk(
++			     "nfsd: pNFS could not be enabled for inode: %lu\n",
++			     inode->i_ino);
++			/*
++			 * XXX When there's a failure then need to indicate to
++			 * future ops that no pNFS is available.  Should I save
++			 * the status in the inode?  It's kind of a big hammer.
++			 * But there may be no stripes available?
++			 */
++		}
++	}
++#endif /* CONFIG_SPNFS */
+ out:
+ 	if (open->op_stateowner) {
+ 		nfs4_get_stateowner(open->op_stateowner);
+@@ -454,16 +476,30 @@ nfsd4_access(struct svc_rqst *rqstp, str
+ 			   &access->ac_supported);
+ }
+ 
++static void
++nfsd4_get_verifier(struct super_block *sb, nfs4_verifier *verf)
++{
++	u32 *p = (u32 *)verf->data;
++
++#if defined(CONFIG_PNFSD)
++	if (sb->s_pnfs_op && sb->s_pnfs_op->get_verifier) {
++		nfs4_ds_get_verifier(NULL, sb, p);
++		return;
++	}
++#endif /* CONFIG_PNFSD */
++
++	*p++ = nfssvc_boot.tv_sec;
++	*p++ = nfssvc_boot.tv_usec;
++}
++
+ static __be32
+ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	     struct nfsd4_commit *commit)
+ {
+ 	__be32 status;
+ 
+-	u32 *p = (u32 *)commit->co_verf.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+-
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &commit->co_verf);
+ 	status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+ 			     commit->co_count);
+ 	if (status == nfserr_symlink)
+@@ -816,7 +852,6 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ {
+ 	stateid_t *stateid = &write->wr_stateid;
+ 	struct file *filp = NULL;
+-	u32 *p;
+ 	__be32 status = nfs_ok;
+ 	unsigned long cnt;
+ 
+@@ -838,13 +873,49 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ 
+ 	cnt = write->wr_buflen;
+ 	write->wr_how_written = write->wr_stable_how;
+-	p = (u32 *)write->wr_verifier.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+ 
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &write->wr_verifier);
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(cstate->current_fh.fh_dentry->d_inode, 0)) {
++                status = bl_layoutrecall(cstate->current_fh.fh_dentry->d_inode,
++		    RETURN_FILE, write->wr_offset, write->wr_buflen);
++                if (!status) {
++                        status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++                }
++        } else
++#endif
++		
++	if (spnfs_enabled()) {
++		status = spnfs_write(cstate->current_fh.fh_dentry->d_inode,
++			write->wr_offset, write->wr_buflen, write->wr_vlen,
++			rqstp);
++		if (status == nfs_ok) {
++			/* DMXXX: HACK to get filesize set */
++			/* write one byte at offset+length-1 */
++			struct kvec k[1];
++			char zero = 0;
++			unsigned long cnt = 1;
++
++			k[0].iov_base = (void *)&zero;
++			k[0].iov_len = 1;
++			nfsd_write(rqstp, &cstate->current_fh, filp,
++				   write->wr_offset+write->wr_buflen-1, k, 1,
++				   &cnt, &write->wr_how_written);
++		}
++	} else /* we're not an MDS */
++		status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++#else
+ 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
+ 			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+ 			     &cnt, &write->wr_how_written);
++#endif /* CONFIG_SPNFS */
++
+ 	if (filp)
+ 		fput(filp);
+ 
+@@ -935,6 +1006,306 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+ 	return status == nfserr_same ? nfs_ok : status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++static __be32
++nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
++		    unsigned int layout_type)
++{
++	int status, type;
++
++	/* check to see if pNFS  is supported. */
++	status = nfserr_layoutunavailable;
++	if (exp && exp->ex_pnfs == 0) {
++		dprintk("%s: Underlying file system "
++			"is not exported over pNFS\n", __func__);
++		goto out;
++	}
++	if (!sb->s_pnfs_op || !sb->s_pnfs_op->layout_type) {
++		dprintk("%s: Underlying file system "
++			"does not support pNFS\n", __func__);
++		goto out;
++	}
++
++	type = sb->s_pnfs_op->layout_type(sb);
++
++	/* check to see if requested layout type is supported. */
++	status = nfserr_unknown_layouttype;
++	if (!type)
++		dprintk("BUG: %s: layout_type 0 is reserved and must not be "
++			"used by filesystem\n", __func__);
++	else if (type != layout_type)
++		dprintk("%s: requested layout type %d "
++		       "does not match supported type %d\n",
++			__func__, layout_type, type);
++	else
++		status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevlist(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevlist *gdlp)
++{
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++	int status;
++
++	dprintk("%s: type %u maxdevices %u cookie %llu verf %llu\n",
++		__func__, gdlp->gd_layout_type, gdlp->gd_maxdevices,
++		gdlp->gd_cookie, gdlp->gd_verf);
++
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* We must be able to encode at list one device */
++	if (!gdlp->gd_maxdevices)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     gdlp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Do nothing if underlying file system does not support
++	 * getdevicelist */
++	if (!sb->s_pnfs_op->get_device_iter) {
++		status = nfserr_notsupp;
++		goto out;
++	}
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdlp->gd_fhp = &cstate->current_fh;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevinfo(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevinfo *gdp)
++{
++	struct super_block *sb;
++	int status;
++	clientid_t clid;
++
++	dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
++	       __func__, gdp->gd_layout_type, gdp->gd_devid.sbid,
++	       gdp->gd_devid.devid, gdp->gd_maxcount);
++
++	status = nfserr_inval;
++	sb = find_sbid_id(gdp->gd_devid.sbid);
++	dprintk("%s: sb %p\n", __func__, sb);
++	if (!sb) {
++		status = nfserr_noent;
++		goto out;
++	}
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, NULL, gdp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdp->gd_sb = sb;
++
++	/* Update notifications */
++	copy_clientid(&clid, cstate->session);
++	pnfs_set_device_notify(&clid, gdp->gd_notify_types);
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutget(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutget *lgp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lgp->lg_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_badiomode;
++	if (lgp->lg_seg.iomode != IOMODE_READ &&
++	    lgp->lg_seg.iomode != IOMODE_RW) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lgp->lg_seg.iomode);
++		goto out;
++	}
++
++	/* Set up arguments so layout can be retrieved at encode time */
++	lgp->lg_fhp = current_fh;
++	copy_clientid((clientid_t *)&lgp->lg_seg.clientid, cstate->session);
++	status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutcommit(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	int status;
++	struct inode *ino = NULL;
++	struct iattr ia;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	dprintk("NFSD: nfsd4_layoutcommit \n");
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	ino = current_fh->fh_dentry->d_inode;
++	if (!ino)
++		goto out;
++
++	status = nfserr_inval;
++	sb = ino->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lcp->args.lc_seg.layout_type);
++	if (status)
++		goto out;
++
++	/* This will only extend the file length.  Do a quick
++	 * check to see if there is any point in waiting for the update
++	 * locks.
++	 * TODO: Is this correct for all back ends?
++	 */
++	dprintk("%s:new offset: %d new size: %llu old size: %lld\n",
++		__func__, lcp->args.lc_newoffset, lcp->args.lc_last_wr + 1,
++		ino->i_size);
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lcp->args.lc_seg.clientid, cstate->session);
++	lcp->res.lc_size_chg = 0;
++	if (sb->s_pnfs_op->layout_commit) {
++		status = sb->s_pnfs_op->layout_commit(ino, &lcp->args, &lcp->res);
++		dprintk("%s:layout_commit result %d\n", __func__, status);
++	} else {
++		fh_lock(current_fh);
++		if ((lcp->args.lc_newoffset == 0) ||
++		    ((lcp->args.lc_last_wr + 1) <= ino->i_size)) {
++			status = 0;
++			lcp->res.lc_size_chg = 0;
++			fh_unlock(current_fh);
++			goto out;
++		}
++
++		/* Try our best to update the file size */
++		dprintk("%s: Modifying file size\n", __func__);
++		ia.ia_valid = ATTR_SIZE;
++		ia.ia_size = lcp->args.lc_last_wr + 1;
++		status = notify_change(current_fh->fh_dentry, &ia);
++		fh_unlock(current_fh);
++		dprintk("%s:notify_change result %d\n", __func__, status);
++	}
++
++	if (!status && lcp->res.lc_size_chg &&
++	    EX_ISSYNC(current_fh->fh_export)) {
++		dprintk("%s: Synchronously writing inode size %llu\n",
++			__func__, ino->i_size);
++		write_inode_now(ino, 1);
++		lcp->res.lc_newsize = i_size_read(ino);
++	}
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutreturn(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lrp->args.lr_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	if (lrp->args.lr_return_type != RETURN_FILE &&
++	    lrp->args.lr_return_type != RETURN_FSID &&
++	    lrp->args.lr_return_type != RETURN_ALL) {
++		dprintk("pNFS %s: invalid return_type %d\n", __func__,
++			lrp->args.lr_return_type);
++		goto out;
++	}
++
++	status = nfserr_inval;
++	if (lrp->args.lr_seg.iomode != IOMODE_READ &&
++	    lrp->args.lr_seg.iomode != IOMODE_RW &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lrp->args.lr_seg.iomode);
++		goto out;
++	}
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lrp->args.lr_seg.clientid, cstate->session);
++	lrp->lrs_present = (lrp->args.lr_return_type == RETURN_FILE);
++	status = nfs4_pnfs_return_layout(sb, current_fh, lrp);
++out:
++	dprintk("pNFS %s: status %d return_type 0x%x lrs_present %d\n",
++		__func__, status, lrp->args.lr_return_type, lrp->lrs_present);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * NULL call.
+  */
+@@ -1317,6 +1688,29 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH,
+ 		.op_name = "OP_RECLAIM_COMPLETE",
+ 	},
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICELIST] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevlist,
++		.op_name = "OP_GETDEVICELIST",
++	},
++	[OP_GETDEVICEINFO] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevinfo,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_GETDEVICEINFO",
++	},
++	[OP_LAYOUTGET] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutget,
++		.op_name = "OP_LAYOUTGET",
++	},
++	[OP_LAYOUTCOMMIT] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
++		.op_name = "OP_LAYOUTCOMMIT",
++	},
++	[OP_LAYOUTRETURN] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
++		.op_name = "OP_LAYOUTRETURN",
++	},
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
+@@ -42,6 +42,8 @@
+ #include "xdr4.h"
+ #include "vfs.h"
+ 
++#include "pnfsd.h"
++
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+@@ -60,8 +62,6 @@ static u64 current_sessionid = 1;
+ #define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+ 
+ /* forward declarations */
+-static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+-static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+ static void nfs4_set_recdir(char *recdir);
+ 
+@@ -69,6 +69,7 @@ static void nfs4_set_recdir(char *recdir
+ 
+ /* Currently used for almost all code touching nfsv4 state: */
+ static DEFINE_MUTEX(client_mutex);
++struct task_struct *client_mutex_owner;
+ 
+ /*
+  * Currently used for the del_recall_lru and file hash table.  In an
+@@ -86,11 +87,21 @@ void
+ nfs4_lock_state(void)
+ {
+ 	mutex_lock(&client_mutex);
++	client_mutex_owner = current;
++}
++
++#define BUG_ON_UNLOCKED_STATE() BUG_ON(client_mutex_owner != current)
++
++void
++nfs4_bug_on_unlocked_state(void)
++{
++	BUG_ON(client_mutex_owner != current);
+ }
+ 
+ void
+ nfs4_unlock_state(void)
+ {
++	client_mutex_owner = NULL;
+ 	mutex_unlock(&client_mutex);
+ }
+ 
+@@ -109,7 +120,7 @@ opaque_hashval(const void *ptr, int nbyt
+ 
+ static struct list_head del_recall_lru;
+ 
+-static inline void
++inline void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+ 	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+@@ -120,7 +131,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ 	}
+ }
+ 
+-static inline void
++inline void
+ get_nfs4_file(struct nfs4_file *fi)
+ {
+ 	atomic_inc(&fi->fi_ref);
+@@ -230,7 +241,10 @@ nfs4_close_delegation(struct nfs4_delega
+ 	 * but we want to remove the lease in any case. */
+ 	if (dp->dl_flock)
+ 		vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(filp);
++	nfs4_lock_state();
+ }
+ 
+ /* Called under the state lock. */
+@@ -266,8 +280,8 @@ static DEFINE_SPINLOCK(client_lock);
+  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+  * used in reboot/reset lease grace period processing
+  *
+- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+- * setclientid_confirmed info. 
++ * conf_id_hashtbl[], and conf_str_hashtbl[] hold
++ * confirmed setclientid_confirmed info.
+  *
+  * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+  * setclientid info.
+@@ -292,6 +306,7 @@ static void unhash_generic_stateid(struc
+ 	list_del(&stp->st_hash);
+ 	list_del(&stp->st_perfile);
+ 	list_del(&stp->st_perstateowner);
++	release_pnfs_ds_dev_list(stp);
+ }
+ 
+ static void free_generic_stateid(struct nfs4_stateid *stp)
+@@ -345,7 +360,10 @@ static void release_open_stateid(struct 
+ {
+ 	unhash_generic_stateid(stp);
+ 	release_stateid_lockowners(stp);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(stp->st_vfs_file);
++	nfs4_lock_state();
+ 	free_generic_stateid(stp);
+ }
+ 
+@@ -739,6 +757,8 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
++	BUG_ON_UNLOCKED_STATE();
++
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -758,6 +778,7 @@ expire_client(struct nfs4_client *clp)
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
++	pnfs_expire_client(clp);
+ 	nfsd4_set_callback_client(clp, NULL);
+ 	if (clp->cl_cb_conn.cb_xprt)
+ 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+@@ -770,6 +791,13 @@ expire_client(struct nfs4_client *clp)
+ 	spin_unlock(&client_lock);
+ }
+ 
++void expire_client_lock(struct nfs4_client *clp)
++{
++	nfs4_lock_state();
++	expire_client(clp);
++	nfs4_unlock_state();
++}
++
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+ {
+ 	memcpy(target->cl_verifier.data, source->data,
+@@ -859,6 +887,11 @@ static struct nfs4_client *create_client
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++	INIT_LIST_HEAD(&clp->cl_layoutrecalls);
++	atomic_set(&clp->cl_deviceref, 0);
++#endif /* CONFIG_PNFSD */
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
+ 	clp->cl_time = get_seconds();
+@@ -908,7 +941,7 @@ move_to_confirmed(struct nfs4_client *cl
+ 	renew_client(clp);
+ }
+ 
+-static struct nfs4_client *
++struct nfs4_client *
+ find_confirmed_client(clientid_t *clid)
+ {
+ 	struct nfs4_client *clp;
+@@ -978,6 +1011,24 @@ find_unconfirmed_client_by_str(const cha
+ 	return NULL;
+ }
+ 
++int
++filter_confirmed_clients(int (* func)(struct nfs4_client *, void *),
++			 void *arg)
++{
++	struct nfs4_client *clp, *next;
++	int i, status = 0;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		list_for_each_entry_safe (clp, next, &conf_str_hashtbl[i],
++					  cl_strhash) {
++			status = func(clp, arg);
++			if (status)
++				break;
++		}
++
++	return status;
++}
++
+ static void
+ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
+ {
+@@ -1110,8 +1161,12 @@ nfsd4_replay_cache_entry(struct nfsd4_co
+ static void
+ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
+ {
+-	/* pNFS is not supported */
++#if defined(CONFIG_PNFSD)
++	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
++				  EXCHGID4_FLAG_USE_PNFS_DS;
++#else  /* CONFIG_PNFSD */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
++#endif /* CONFIG_PNFSD */
+ 
+ 	/* Referrals are supported, Migration is not. */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
+@@ -1301,6 +1356,13 @@ nfsd4_create_session(struct svc_rqst *rq
+ 	struct nfsd4_clid_slot *cs_slot = NULL;
+ 	int status = 0;
+ 
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	/* XXX hack to get local ip address */
++	memcpy(&pnfsd_lexp_addr, &rqstp->rq_xprt->xpt_local,
++		sizeof(pnfsd_lexp_addr));
++	pnfs_lexp_addr_len = rqstp->rq_xprt->xpt_locallen;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
+ 	nfs4_lock_state();
+ 	unconf = find_unconfirmed_client(&cr_ses->clientid);
+ 	conf = find_confirmed_client(&cr_ses->clientid);
+@@ -1340,25 +1402,26 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(rqstp->rq_xprt);
+-			rpc_copy_addr(
+-				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+-				sa);
+-			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+-			unconf->cl_cb_conn.cb_minorversion =
+-				cstate->minorversion;
+-			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+-			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+-		}
++		if (is_ds_only_session(unconf->cl_exchange_flags))
++			cr_ses->flags &= ~SESSION4_BACK_CHAN;
++
+ 		conf = unconf;
+ 	} else {
+ 		status = nfserr_stale_clientid;
+ 		goto out;
+ 	}
+ 
++	if (cr_ses->flags & SESSION4_BACK_CHAN) {
++		conf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++		svc_xprt_get(rqstp->rq_xprt);
++		rpc_copy_addr((struct sockaddr *)&conf->cl_cb_conn.cb_addr, sa);
++		conf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
++		conf->cl_cb_conn.cb_minorversion = cstate->minorversion;
++		conf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
++		conf->cl_cb_seq_nr = 1;
++		nfsd4_probe_callback(conf, &conf->cl_cb_conn);
++	}
++
+ 	/*
+ 	 * We do not support RDMA or persistent sessions
+ 	 */
+@@ -1746,7 +1809,7 @@ out:
+ 
+ /* OPEN Share state helper functions */
+ static inline struct nfs4_file *
+-alloc_init_file(struct inode *ino)
++alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
+ {
+ 	struct nfs4_file *fp;
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1760,6 +1823,16 @@ alloc_init_file(struct inode *ino)
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++#if defined(CONFIG_PNFSD)
++		INIT_LIST_HEAD(&fp->fi_layouts);
++		INIT_LIST_HEAD(&fp->fi_layout_states);
++		fp->fi_fsid.major = current_fh->fh_export->ex_fsid;
++		fp->fi_fsid.minor = 0;
++		fp->fi_fhlen = current_fh->fh_handle.fh_size;
++		BUG_ON(fp->fi_fhlen > sizeof(fp->fi_fhval));
++		memcpy(fp->fi_fhval, &current_fh->fh_handle.fh_base,
++		       fp->fi_fhlen);
++#endif /* CONFIG_PNFSD */
+ 		spin_lock(&recall_lock);
+ 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ 		spin_unlock(&recall_lock);
+@@ -1768,7 +1841,7 @@ alloc_init_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
+-static void
++void
+ nfsd4_free_slab(struct kmem_cache **slab)
+ {
+ 	if (*slab == NULL)
+@@ -1784,6 +1857,7 @@ nfsd4_free_slabs(void)
+ 	nfsd4_free_slab(&file_slab);
+ 	nfsd4_free_slab(&stateid_slab);
+ 	nfsd4_free_slab(&deleg_slab);
++	nfsd4_free_pnfs_slabs();
+ }
+ 
+ static int
+@@ -1805,6 +1879,8 @@ nfsd4_init_slabs(void)
+ 			sizeof(struct nfs4_delegation), 0, 0, NULL);
+ 	if (deleg_slab == NULL)
+ 		goto out_nomem;
++	if (nfsd4_init_pnfs_slabs())
++		goto out_nomem;
+ 	return 0;
+ out_nomem:
+ 	nfsd4_free_slabs();
+@@ -1878,6 +1954,9 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners);
+ 	INIT_LIST_HEAD(&stp->st_perfile);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+@@ -1919,6 +1998,7 @@ find_openstateowner_str(unsigned int has
+ {
+ 	struct nfs4_stateowner *so = NULL;
+ 
++	BUG_ON_UNLOCKED_STATE();
+ 	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ 		if (same_owner_str(so, &open->op_owner, &open->op_clientid))
+ 			return so;
+@@ -1927,7 +2007,7 @@ find_openstateowner_str(unsigned int has
+ }
+ 
+ /* search file_hashtbl[] for file */
+-static struct nfs4_file *
++struct nfs4_file *
+ find_file(struct inode *ino)
+ {
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1945,6 +2025,18 @@ find_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
++struct nfs4_file *
++find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
++{
++	struct nfs4_file *fp;
++
++	fp = find_file(ino);
++	if (fp)
++		return fp;
++
++	return alloc_init_file(ino, current_fh);
++}
++
+ static inline int access_valid(u32 x, u32 minorversion)
+ {
+ 	if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
+@@ -2503,7 +2595,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+ 			goto out;
+ 		status = nfserr_resource;
+-		fp = alloc_init_file(ino);
++		fp = alloc_init_file(ino, current_fh);
+ 		if (fp == NULL)
+ 			goto out;
+ 	}
+@@ -2730,7 +2822,7 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ 	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
+ }
+ 
+-static int
++int
+ STALE_STATEID(stateid_t *stateid)
+ {
+ 	if (stateid->si_boot == boot_time)
+@@ -2740,6 +2832,16 @@ STALE_STATEID(stateid_t *stateid)
+ 	return 1;
+ }
+ 
++__be32
++nfs4_check_stateid(stateid_t *stateid)
++{
++	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++		return nfserr_bad_stateid;
++	if (STALE_STATEID(stateid))
++		return nfserr_stale_stateid;
++	return 0;
++}
++
+ static inline int
+ access_permit_read(unsigned long access_bmap)
+ {
+@@ -2848,6 +2950,24 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	if (grace_disallows_io(ino))
+ 		return nfserr_grace;
+ 
++#if defined(CONFIG_PNFSD)
++	if (pnfs_fh_is_ds(&current_fh->fh_handle)) {
++		if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++			status = nfserr_bad_stateid;
++		else
++#ifdef CONFIG_GFS2_FS_LOCKING_DLM
++		{
++			dprintk("%s Don't check DS stateid\n", __func__);
++			return 0;
++		}
++#else /* CONFIG_GFS2_FS_LOCKING_DLM */
++			status = nfs4_preprocess_pnfs_ds_stateid(current_fh,
++								 stateid);
++#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
++		goto out;
++	}
++#endif /* CONFIG_PNFSD */
++
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 
+@@ -2924,13 +3044,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 	*stpp = NULL;
+ 	*sopp = NULL;
+ 
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+-		dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
+-		return nfserr_bad_stateid;
+-	}
+-
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		return status;
+ 
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+@@ -3205,11 +3321,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 	nfs4_lock_state();
+-	status = nfserr_bad_stateid;
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+-		goto out;
+-	status = nfserr_stale_stateid;
+-	if (STALE_STATEID(stateid))
++	status = nfs4_check_stateid(stateid);
++	if (status)
+ 		goto out;
+ 	status = nfserr_bad_stateid;
+ 	if (!is_delegation_stateid(stateid))
+@@ -3238,26 +3351,6 @@ out:
+ #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
+ #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+ 
+-static inline u64
+-end_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	end = start + len;
+-	return end >= start ? end: NFS4_MAX_UINT64;
+-}
+-
+-/* last octet in a range */
+-static inline u64
+-last_byte_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	BUG_ON(!len);
+-	end = start + len;
+-	return end > start ? end - 1: NFS4_MAX_UINT64;
+-}
+-
+ #define lockownerid_hashval(id) \
+         ((id) & LOCK_HASH_MASK)
+ 
+@@ -3274,7 +3367,7 @@ static struct list_head lock_ownerid_has
+ static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+ 
+-static struct nfs4_stateid *
++struct nfs4_stateid *
+ find_stateid(stateid_t *stid, int flags)
+ {
+ 	struct nfs4_stateid *local;
+@@ -3303,7 +3396,7 @@ find_stateid(stateid_t *stid, int flags)
+ 	return NULL;
+ }
+ 
+-static struct nfs4_delegation *
++struct nfs4_delegation *
+ find_delegation_stateid(struct inode *ino, stateid_t *stid)
+ {
+ 	struct nfs4_file *fp;
+@@ -3436,6 +3529,9 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	INIT_LIST_HEAD(&stp->st_perfile);
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+@@ -3998,6 +4094,9 @@ nfs4_state_init(void)
+ 	INIT_LIST_HEAD(&client_lru);
+ 	INIT_LIST_HEAD(&del_recall_lru);
+ 	reclaim_str_hashtbl_size = 0;
++#if defined(CONFIG_PNFSD)
++	nfs4_pnfs_state_init();
++#endif /* CONFIG_PNFSD */
+ 	return 0;
+ }
+ 
+@@ -4110,6 +4209,7 @@ __nfs4_state_shutdown(void)
+ 	}
+ 
+ 	nfsd4_shutdown_recdir();
++	nfs4_pnfs_state_shutdown();
+ 	nfs4_init = 0;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
+@@ -47,9 +47,14 @@
+ #include <linux/nfsd_idmap.h>
+ #include <linux/nfs4_acl.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -1234,6 +1239,138 @@ nfsd4_decode_sequence(struct nfsd4_compo
+ 	DECODE_TAIL;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static __be32
++nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16 + sizeof(nfs4_verifier));
++	READ32(gdevl->gd_layout_type);
++	READ32(gdevl->gd_maxdevices);
++	READ64(gdevl->gd_cookie);
++	COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	u32 num;
++	DECODE_HEAD;
++
++	READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
++	READ64(gdev->gd_devid.sbid);
++	READ64(gdev->gd_devid.devid);
++	READ32(gdev->gd_layout_type);
++	READ32(gdev->gd_maxcount);
++	READ32(num);
++	if (num) {
++		READ_BUF(4);
++		READ32(gdev->gd_notify_types);
++	} else {
++		gdev->gd_notify_types = 0;
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_layoutget *lgp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(36);
++	READ32(lgp->lg_signal);
++	READ32(lgp->lg_seg.layout_type);
++	READ32(lgp->lg_seg.iomode);
++	READ64(lgp->lg_seg.offset);
++	READ64(lgp->lg_seg.length);
++	READ64(lgp->lg_minlength);
++	nfsd4_decode_stateid(argp, &lgp->lg_sid);
++	READ_BUF(4);
++	READ32(lgp->lg_maxcount);
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	DECODE_HEAD;
++	u32 timechange;
++
++	READ_BUF(20);
++	READ64(lcp->args.lc_seg.offset);
++	READ64(lcp->args.lc_seg.length);
++	READ32(lcp->args.lc_reclaim);
++	nfsd4_decode_stateid(argp, &lcp->lc_sid);
++	READ_BUF(4);
++	READ32(lcp->args.lc_newoffset);
++	if (lcp->args.lc_newoffset) {
++		READ_BUF(8);
++		READ64(lcp->args.lc_last_wr);
++	} else
++		lcp->args.lc_last_wr = 0;
++	READ_BUF(4);
++	READ32(timechange);
++	if (timechange) {
++		READ_BUF(12);
++		READ64(lcp->args.lc_mtime.seconds);
++		READ32(lcp->args.lc_mtime.nseconds);
++	} else {
++		lcp->args.lc_mtime.seconds = 0;
++		lcp->args.lc_mtime.nseconds = 0;
++	}
++	READ_BUF(8);
++	READ32(lcp->args.lc_seg.layout_type);
++	/* XXX: saving XDR'ed layout update. Since we don't have the
++	 * current_fh yet, and therefore no export_ops, we can't call
++	 * the layout specific decode routines. File and pVFS2
++	 * do not use the layout update....
++	 */
++	READ32(lcp->args.lc_up_len);
++	if (lcp->args.lc_up_len > 0) {
++		READ_BUF(lcp->args.lc_up_len);
++		READMEM(lcp->args.lc_up_layout, lcp->args.lc_up_len);
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16);
++	READ32(lrp->args.lr_reclaim);
++	READ32(lrp->args.lr_seg.layout_type);
++	READ32(lrp->args.lr_seg.iomode);
++	READ32(lrp->args.lr_return_type);
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		READ_BUF(16);
++		READ64(lrp->args.lr_seg.offset);
++		READ64(lrp->args.lr_seg.length);
++		nfsd4_decode_stateid(argp, &lrp->lr_sid);
++		READ_BUF(4);
++		READ32(lrp->args.lrf_body_len);
++		if (lrp->args.lrf_body_len > 0) {
++			READ_BUF(lrp->args.lrf_body_len);
++			READMEM(lrp->args.lrf_body, lrp->args.lrf_body_len);
++		}
++	}
++
++	DECODE_TAIL;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+ {
+@@ -1335,11 +1472,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+@@ -2136,6 +2281,36 @@ out_acl:
+ 		}
+ 		WRITE64(stat.ino);
+ 	}
++#if defined(CONFIG_PNFSD)
++	if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
++		struct super_block *sb = dentry->d_inode->i_sb;
++		int type = 0;
++
++		/* Query the filesystem for supported pNFS layout types.
++		 * Currently, we only support one layout type per file system.
++		 * The export_ops->layout_type() returns the pnfs_layouttype4.
++		 */
++		buflen -= 4;
++		if (buflen < 0)		/* length */
++			goto out_resource;
++
++		if (sb && sb->s_pnfs_op && sb->s_pnfs_op->layout_type)
++			type = sb->s_pnfs_op->layout_type(sb);
++		if (type) {
++			if ((buflen -= 4) < 0)	/* type */
++				goto out_resource;
++			WRITE32(1); 	/* length */
++			WRITE32(type);  /* type */
++		} else
++			WRITE32(0);  /* length */
++	}
++
++	if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		if ((buflen -= 4) < 0)
++			goto out_resource;
++		WRITE32(stat.blksize);
++	}
++#endif /* CONFIG_PNFSD */
+ 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ 		WRITE32(3);
+ 		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+@@ -2366,6 +2541,10 @@ nfsd4_encode_commit(struct nfsd4_compoun
+ 	if (!nfserr) {
+ 		RESERVE_SPACE(8);
+ 		WRITEMEM(commit->co_verf.data, 8);
++		dprintk("NFSD: nfsd4_encode_commit: verifier %x:%x\n",
++			((u32 *)(&commit->co_verf.data))[0],
++			((u32 *)(&commit->co_verf.data))[1]);
++
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -2620,9 +2799,20 @@ nfsd4_encode_read(struct nfsd4_compoundr
+ 	}
+ 	read->rd_vlen = v;
+ 
++#if defined(CONFIG_SPNFS)
++	if (spnfs_enabled())
++		nfserr = spnfs_read(read->rd_fhp->fh_dentry->d_inode,
++				    read->rd_offset, &maxcount, read->rd_vlen,
++				    resp->rqstp);
++	else /* we're not an MDS */
++		nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
++			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
++			&maxcount);
++#else
+ 	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ 			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ 			&maxcount);
++#endif /* CONFIG_SPNFS */
+ 
+ 	if (nfserr == nfserr_symlink)
+ 		nfserr = nfserr_inval;
+@@ -2926,6 +3116,9 @@ nfsd4_encode_write(struct nfsd4_compound
+ 		WRITE32(write->wr_bytes_written);
+ 		WRITE32(write->wr_how_written);
+ 		WRITEMEM(write->wr_verifier.data, 8);
++		dprintk("NFSD: nfsd4_encode_write: verifier %x:%x\n",
++			((u32 *)(&write->wr_verifier.data))[0],
++			((u32 *)(&write->wr_verifier.data))[1]);
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -3069,6 +3262,343 @@ nfsd4_encode_sequence(struct nfsd4_compo
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/* Uses the export interface to iterate through the available devices
++ * and encodes them on the response stream.
++ */
++static  __be32
++nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
++			      struct nfsd4_pnfs_getdevlist *gdevl,
++			      unsigned int *dev_count)
++{
++	struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
++	__be32 nfserr;
++	int status;
++	__be32 *p;
++	struct nfsd4_pnfs_dev_iter_res res = {
++		.gd_cookie = gdevl->gd_cookie,
++		.gd_verf = gdevl->gd_verf,
++		.gd_eof = 0
++	};
++	u64 sbid;
++
++	dprintk("%s: Begin\n", __func__);
++
++	sbid = find_create_sbid(sb);
++	*dev_count = 0;
++	do {
++		status = sb->s_pnfs_op->get_device_iter(sb,
++							gdevl->gd_layout_type,
++							&res);
++		if (status) {
++			if (status == -ENOENT) {
++				res.gd_eof = 1;
++				/* return success */
++				break;
++			}
++			nfserr = nfserrno(status);
++			goto out_err;
++		}
++
++		/* Encode device id and layout type */
++		RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
++		WRITE64((__be64)sbid);
++		WRITE64(res.gd_devid);	/* devid minor */
++		ADJUST_ARGS();
++		(*dev_count)++;
++	} while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
++	gdevl->gd_cookie = res.gd_cookie;
++	gdevl->gd_verf = res.gd_verf;
++	gdevl->gd_eof = res.gd_eof;
++	nfserr = nfs_ok;
++out_err:
++	dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
++	return nfserr;
++}
++
++/* Encodes the response of get device list.
++*/
++static __be32
++nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	unsigned int dev_count = 0, lead_count;
++	u32 *p_in = resp->p;
++	__be32 *p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	/* Ensure we have room for cookie, verifier, and devlist len,
++	 * which we will backfill in after we encode as many devices as possible
++	 */
++	lead_count = 8 + sizeof(nfs4_verifier) + 4;
++	RESERVE_SPACE(lead_count);
++	/* skip past these values */
++	p += XDR_QUADLEN(lead_count);
++	ADJUST_ARGS();
++
++	/* Iterate over as many device ids as possible on the xdr stream */
++	nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
++	if (nfserr)
++		goto out_err;
++
++	/* Backfill in cookie, verf and number of devices encoded */
++	p = p_in;
++	WRITE64(gdevl->gd_cookie);
++	WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++	WRITE32(dev_count);
++
++	/* Skip over devices */
++	p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
++	ADJUST_ARGS();
++
++	/* are we at the end of devices? */
++	RESERVE_SPACE(4);
++	WRITE32(gdevl->gd_eof);
++	ADJUST_ARGS();
++
++	dprintk("%s: done.\n", __func__);
++
++	nfserr = nfs_ok;
++out:
++	return nfserr;
++out_err:
++	p = p_in;
++	ADJUST_ARGS();
++	goto out;
++}
++
++/* For a given device id, have the file system retrieve and encode the
++ * associated device.  For file layout, the encoding function is
++ * passed down to the file system.  The file system then has the option
++ * of using this encoding function or one of its own.
++ *
++ * Note: the file system must return the XDR size of struct device_addr4
++ * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
++ * gdir_mincount calculation.
++ */
++static __be32
++nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	struct super_block *sb;
++	int maxcount = 0, type_notify_len = 12;
++	__be32 *p, *p_save = NULL, *p_in = resp->p;
++	struct exp_xdr_stream xdr;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = gdev->gd_sb;
++
++	if (gdev->gd_maxcount != 0) {
++		/* FIXME: this will be bound by the session max response */
++		maxcount = svc_max_payload(resp->rqstp);
++		if (maxcount > gdev->gd_maxcount)
++			maxcount = gdev->gd_maxcount;
++
++		/* Ensure have room for type and notify field */
++		maxcount -= type_notify_len;
++		if (maxcount < 0) {
++			nfserr = -ETOOSMALL;
++			goto toosmall;
++		}
++	}
++
++	RESERVE_SPACE(4);
++	WRITE32(gdev->gd_layout_type);
++	ADJUST_ARGS();
++
++	/* If maxcount is 0 then just update notifications */
++	if (gdev->gd_maxcount == 0)
++		goto handle_notifications;
++
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
++						&gdev->gd_devid);
++	if (nfserr)
++		goto err;
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++	BUG_ON(xdr.p > xdr.end);
++
++	/* Update the xdr stream with the number of bytes encoded
++	 * by the file system.
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++handle_notifications:
++	/* Encode supported device notifications */
++	RESERVE_SPACE(4);
++	if (sb->s_pnfs_op->set_device_notify) {
++		struct pnfs_devnotify_arg dn_args;
++
++		dn_args.dn_layout_type = gdev->gd_layout_type;
++		dn_args.dn_devid = gdev->gd_devid;
++		dn_args.dn_notify_types = gdev->gd_notify_types;
++		nfserr = sb->s_pnfs_op->set_device_notify(sb, &dn_args);
++		if (nfserr)
++			goto err;
++		WRITE32(dn_args.dn_notify_types);
++	} else {
++		WRITE32(0);
++	}
++	ADJUST_ARGS();
++
++out:
++	return nfserrno(nfserr);
++toosmall:
++	dprintk("%s: maxcount too small\n", __func__);
++	RESERVE_SPACE(4);
++	WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
++	ADJUST_ARGS();
++	goto out;
++err:
++	/* Rewind to the beginning */
++	p = p_in;
++	ADJUST_ARGS();
++	if (nfserr == -ETOOSMALL)
++		goto toosmall;
++	printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
++	goto out;
++}
++
++static __be32
++nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
++		       __be32 nfserr,
++		       struct nfsd4_pnfs_layoutget *lgp)
++{
++	int maxcount, leadcount;
++	struct super_block *sb;
++	struct exp_xdr_stream xdr;
++	__be32 *p, *p_save, *p_start = resp->p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = lgp->lg_fhp->fh_dentry->d_inode->i_sb;
++	maxcount = PAGE_SIZE;
++	if (maxcount > lgp->lg_maxcount)
++		maxcount = lgp->lg_maxcount;
++
++	/* Check for space on xdr stream */
++	leadcount = 36 + sizeof(stateid_opaque_t);
++	RESERVE_SPACE(leadcount);
++	/* encode layout metadata after file system encodes layout */
++	p += XDR_QUADLEN(leadcount);
++	ADJUST_ARGS();
++
++	/* Ensure have room for ret_on_close, off, len, iomode, type */
++	maxcount -= leadcount;
++	if (maxcount < 0) {
++		printk(KERN_ERR "%s: buffer too small\n", __func__);
++		nfserr = nfserr_toosmall;
++		goto err;
++	}
++
++	/* Set xdr info so file system can encode layout */
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	/* Retrieve, encode, and merge layout; process stateid */
++	nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
++	if (nfserr)
++		goto err;
++
++	/* Ensure file system returned enough bytes for the client
++	 * to access.
++	 */
++	if (lgp->lg_seg.length < lgp->lg_minlength) {
++		nfserr = nfserr_badlayout;
++		goto err;
++	}
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++
++	/* Rewind to beginning and encode attrs */
++	resp->p = p_start;
++	RESERVE_SPACE(4);
++	WRITE32(lgp->lg_roc);	/* return on close */
++	ADJUST_ARGS();
++	nfsd4_encode_stateid(resp, &lgp->lg_sid);
++	RESERVE_SPACE(28);
++	/* Note: response logr_layout array count, always one for now */
++	WRITE32(1);
++	WRITE64(lgp->lg_seg.offset);
++	WRITE64(lgp->lg_seg.length);
++	WRITE32(lgp->lg_seg.iomode);
++	WRITE32(lgp->lg_seg.layout_type);
++
++	/* Update the xdr stream with the number of bytes written
++	 * by the file system
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++	return nfs_ok;
++err:
++	resp->p = p_start;
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lcp->res.lc_size_chg);
++	ADJUST_ARGS();
++	if (lcp->res.lc_size_chg) {
++		RESERVE_SPACE(8);
++		WRITE64(lcp->res.lc_newsize);
++		ADJUST_ARGS();
++	}
++out:
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lrp->lrs_present != 0);    /* got stateid? */
++	ADJUST_ARGS();
++	if (lrp->lrs_present)
++		nfsd4_encode_stateid(resp, &lrp->lr_sid);
++out:
++	return nfserr;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+ {
+@@ -3129,11 +3659,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
+@@ -13,10 +13,15 @@
+ #include <linux/nfsd/syscall.h>
+ #include <linux/lockd/lockd.h>
+ #include <linux/sunrpc/clnt.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "nfsd.h"
+ #include "cache.h"
+ 
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ /*
+  *	We have a single directory with 9 nodes in it.
+  */
+@@ -49,6 +54,9 @@ enum {
+ 	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
++#ifdef CONFIG_PNFSD
++	NFSD_pnfs_dlm_device,
++#endif
+ };
+ 
+ /*
+@@ -74,6 +82,9 @@ static ssize_t write_leasetime(struct fi
+ static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
++#ifdef CONFIG_PNFSD
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size);
++#endif
+ 
+ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ 	[NFSD_Svc] = write_svc,
+@@ -96,6 +107,9 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
++#ifdef CONFIG_PNFSD
++	[NFSD_pnfs_dlm_device] = write_pnfs_dlm_device,
++#endif
+ };
+ 
+ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+@@ -1349,6 +1363,68 @@ static ssize_t write_recoverydir(struct 
+ 
+ #endif
+ 
++#ifdef CONFIG_PNFSD
++
++static ssize_t __write_pnfs_dlm_device(struct file *file, char *buf,
++				       size_t size)
++{
++	char *mesg = buf;
++	char *pnfs_dlm_device;
++	int max_size = NFSD_PNFS_DLM_DEVICE_MAX;
++	int len, ret = 0;
++
++	if (size > 0) {
++		ret = -EINVAL;
++		if (size > max_size || buf[size-1] != '\n')
++			return ret;
++		buf[size-1] = 0;
++
++		pnfs_dlm_device = mesg;
++		len = qword_get(&mesg, pnfs_dlm_device, size);
++		if (len <= 0)
++			return ret;
++
++		ret = nfsd4_set_pnfs_dlm_device(pnfs_dlm_device, len);
++	} else
++		return nfsd4_get_pnfs_dlm_device_list(buf, SIMPLE_TRANSACTION_LIMIT);
++
++	return ret <= 0 ? ret : strlen(buf);
++}
++
++/**
++ * write_pnfs_dlm_device - Set or report the current pNFS data server list
++ *
++ * Input:
++ *			buf:		ignored
++ *			size:		zero
++ *
++ * OR
++ *
++ * Input:
++ *			buf:		C string containing a block device name,
++ *					a colon, and then a comma separated
++ *					list of pNFS data server IPv4 addresses
++ *			size:		non-zero length of C string in @buf
++ * Output:
++ *	On success:	passed-in buffer filled with '\n'-terminated C
++ *			string containing a block device name, a colon, and
++ *			then a comma separated list of pNFS
++ *			data server IPv4 addresses.
++ *			return code is the size in bytes of the string
++ *	On error:	return code is a negative errno value
++ */
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __write_pnfs_dlm_device(file, buf, size);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
++}
++
++#endif /* CONFIG_PNFSD */
++
+ /*----------------------------------------------------------------------------*/
+ /*
+  *	populating the filesystem.
+@@ -1383,6 +1459,10 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
++#ifdef CONFIG_PNFSD
++		[NFSD_pnfs_dlm_device] = {"pnfs_dlm_device", &transaction_ops,
++					   S_IWUSR|S_IRUSR},
++#endif
+ 		/* last one */ {""}
+ 	};
+ 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
+@@ -1421,6 +1501,9 @@ static int create_proc_exports_entry(voi
+ }
+ #endif
+ 
++#if defined(CONFIG_SPNFS_BLOCK)
++int nfsd_bl_init(void);
++#endif
+ static int __init init_nfsd(void)
+ {
+ 	int retval;
+@@ -1443,6 +1526,15 @@ static int __init init_nfsd(void)
+ 	retval = create_proc_exports_entry();
+ 	if (retval)
+ 		goto out_free_idmap;
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	retval = spnfs_init_proc();
++	if (retval != 0)
++		goto out_free_idmap;
++#if defined(CONFIG_SPNFS_BLOCK)
++	nfsd_bl_init();
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ 	retval = register_filesystem(&nfsd_fs_type);
+ 	if (retval)
+ 		goto out_free_all;
+@@ -1465,7 +1557,22 @@ out_free_stat:
+ 
+ static void __exit exit_nfsd(void)
+ {
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	remove_proc_entry("fs/nfs/spnfs/recall", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/getfh", NULL);
++	remove_proc_entry("fs/nfs/spnfs/config", NULL);
++	remove_proc_entry("fs/nfs/spnfs/ctl", NULL);
++	remove_proc_entry("fs/nfs/spnfs", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutsegsize", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS_LAYOUTSEGMENTS */
++
+ 	nfsd_export_shutdown();
++	nfsd4_pnfs_dlm_shutdown();
+ 	nfsd_reply_cache_shutdown();
+ 	remove_proc_entry("fs/nfs/exports", NULL);
+ 	remove_proc_entry("fs/nfs", NULL);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
+@@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD0
+ 
++#if defined(CONFIG_PNFSD)
++#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
++	(NFSD4_SUPPORTED_ATTRS_WORD1 | FATTR4_WORD1_FS_LAYOUT_TYPES)
++#else /* CONFIG_PNFSD */
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD1
++#endif /* CONFIG_PNFSD */
+ 
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
++	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT | \
++	 FATTR4_WORD2_LAYOUT_BLKSIZE)
+ 
+ static inline u32 nfsd_suppattrs0(u32 minorversion)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
+@@ -10,6 +10,7 @@
+ #include <linux/exportfs.h>
+ 
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
+ #include "nfsd.h"
+ #include "vfs.h"
+ #include "auth.h"
+@@ -139,6 +140,7 @@ static inline __be32 check_pseudo_root(s
+ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ 	struct knfsd_fh	*fh = &fhp->fh_handle;
++	int fsid_type;
+ 	struct fid *fid = NULL, sfid;
+ 	struct svc_export *exp;
+ 	struct dentry *dentry;
+@@ -159,7 +161,8 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 			return error;
+ 		if (fh->fh_auth_type != 0)
+ 			return error;
+-		len = key_len(fh->fh_fsid_type) / 4;
++		fsid_type = pnfs_fh_fsid_type(fh);
++		len = key_len(fsid_type) / 4;
+ 		if (len == 0)
+ 			return error;
+ 		if  (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+@@ -172,7 +175,7 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 		data_left -= len;
+ 		if (data_left < 0)
+ 			return error;
+-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
++		exp = rqst_exp_find(rqstp, fsid_type, fh->fh_auth);
+ 		fid = (struct fid *)(fh->fh_auth + len);
+ 	} else {
+ 		__u32 tfh[2];
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
+@@ -14,6 +14,7 @@ enum nfsd_fsid {
+ 	FSID_UUID8,
+ 	FSID_UUID16,
+ 	FSID_UUID16_INUM,
++	FSID_MAX
+ };
+ 
+ enum fsid_source {
+@@ -205,4 +206,42 @@ fh_unlock(struct svc_fh *fhp)
+ 	}
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/*
++ * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
++ * to a DS by LAYOUTGET.  nfs4_preprocess_stateid_op() uses this to decide how
++ * to handle a given stateid.
++ */
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return fh->fh_fsid_type >= FSID_MAX;
++}
++
++static inline void pnfs_fh_mark_ds(struct knfsd_fh *fh)
++{
++	BUG_ON(fh->fh_version != 1);
++	BUG_ON(pnfs_fh_is_ds(fh));
++	fh->fh_fsid_type += FSID_MAX;
++}
++
++#else  /* CONFIG_PNFSD */
++
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return 0;
++}
++
++#endif /* CONFIG_PNFSD */
++
++/* allows fh_verify() to check the real fsid_type (i.e., not overloaded). */
++static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
++{
++	int fsid_type = fh->fh_fsid_type;
++
++	if (pnfs_fh_is_ds(fh))
++		return fsid_type - FSID_MAX;
++	return fsid_type;
++}
++
+ #endif /* _LINUX_NFSD_FH_INT_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
+@@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
+ 
+ };
+ 
+-u32 nfsd_supported_minorversion;
++u32 nfsd_supported_minorversion = NFSD_SUPPORTED_MINOR_VERSION;
+ 
+ int nfsd_vers(int vers, enum vers_op change)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
+@@ -0,0 +1,143 @@
++/*
++ *  Copyright (c) 2005 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef LINUX_NFSD_PNFSD_H
++#define LINUX_NFSD_PNFSD_H
++
++#include <linux/list.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#include "state.h"
++#include "xdr4.h"
++
++/* outstanding layout stateid */
++struct nfs4_layout_state {
++	struct list_head	ls_perfile;
++	struct list_head	ls_layouts; /* list of nfs4_layouts */
++	struct kref		ls_ref;
++	struct nfs4_client	*ls_client;
++	struct nfs4_file	*ls_file;
++	stateid_t		ls_stateid;
++};
++
++/* outstanding layout */
++struct nfs4_layout {
++	struct list_head		lo_perfile;	/* hash by f_id */
++	struct list_head		lo_perclnt;	/* hash by clientid */
++	struct list_head		lo_perstate;
++	struct nfs4_file		*lo_file;	/* backpointer */
++	struct nfs4_client		*lo_client;
++	struct nfs4_layout_state	*lo_state;
++	struct nfsd4_layout_seg 	lo_seg;
++};
++
++struct pnfs_inval_state {
++	struct knfsd_fh		mdsfh; /* needed only by invalidate all */
++	stateid_t		stid;
++	clientid_t		clid;
++	u32			status;
++};
++
++/* pNFS Data Server state */
++#define DS_STATEID_VALID   0
++#define DS_STATEID_ERROR   1
++#define DS_STATEID_NEW     2
++
++struct pnfs_ds_stateid {
++	struct list_head	ds_hash;        /* ds_stateid hash entry */
++	struct list_head	ds_perclid;     /* per client hash entry */
++	stateid_t		ds_stid;
++	struct knfsd_fh		ds_fh;
++	unsigned long		ds_access;
++	u32			ds_status;      /* from MDS */
++	u32			ds_verifier[2]; /* from MDS */
++	wait_queue_head_t	ds_waitq;
++	unsigned long		ds_flags;
++	struct kref		ds_ref;
++	clientid_t		ds_mdsclid;
++};
++
++struct pnfs_ds_clientid {
++	struct list_head	dc_hash;        /* mds_clid_hashtbl entry */
++	struct list_head	dc_stateid;     /* ds_stateid head */
++	struct list_head	dc_permdsid;    /* per mdsid hash entry */
++	clientid_t		dc_mdsclid;
++	struct kref		dc_ref;
++	uint32_t		dc_mdsid;
++};
++
++struct pnfs_mds_id {
++	struct list_head	di_hash;        /* mds_nodeid list entry */
++	struct list_head	di_mdsclid;     /* mds_clientid head */
++	uint32_t		di_mdsid;
++	time_t			di_mdsboot;	/* mds boot time */
++	struct kref		di_ref;
++};
++
++/* notify device request (from exported filesystem) */
++struct nfs4_notify_device {
++	struct nfsd4_pnfs_cb_dev_list  *nd_list;
++	struct nfs4_client	       *nd_client;
++	struct list_head	        nd_perclnt;
++
++	void				*nd_args;	/* nfsd internal */
++};
++
++u64 find_create_sbid(struct super_block *);
++struct super_block *find_sbid_id(u64);
++__be32 nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
++int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
++					struct nfsd4_pnfs_layoutreturn *);
++int nfs4_pnfs_cb_get_state(struct super_block *, struct pnfs_get_state *);
++int nfs4_pnfs_cb_change_state(struct pnfs_get_state *);
++void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++int put_layoutrecall(struct nfs4_layoutrecall *);
++void nomatching_layout(struct nfs4_layoutrecall *);
++void *layoutrecall_done(struct nfs4_layoutrecall *);
++int nfsd4_cb_layout(struct nfs4_layoutrecall *);
++int nfsd_layout_recall_cb(struct super_block *, struct inode *,
++			  struct nfsd4_pnfs_cb_layout *);
++int nfsd_device_notify_cb(struct super_block *,
++			  struct nfsd4_pnfs_cb_dev_list *);
++int nfsd4_cb_notify_device(struct nfs4_notify_device *);
++void pnfs_set_device_notify(clientid_t *, unsigned int types);
++void pnfs_clear_device_notify(struct nfs4_client *);
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++extern struct sockaddr pnfsd_lexp_addr;
++extern size_t pnfs_lexp_addr_len;
++
++extern void pnfsd_lexp_init(struct inode *);
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#endif /* LINUX_NFSD_PNFSD_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
+@@ -0,0 +1,225 @@
++/*
++ * linux/fs/nfsd/pnfs_lexp.c
++ *
++ * pNFS export of local filesystems.
++ *
++ * Export local file systems over the files layout type.
++ * The MDS (metadata server) functions also as a single DS (data server).
++ * This is mostly useful for development and debugging purposes.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Copyright (C) 2008 Benny Halevy, <bhalevy@panasas.com>
++ *
++ * Initial implementation was based on the pnfs-gfs2 patches done
++ * by David M. Richter <richterd@citi.umich.edu>
++ */
++
++#include <linux/sunrpc/svc_xprt.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++struct sockaddr pnfsd_lexp_addr;
++size_t pnfs_lexp_addr_len;
++
++static int
++pnfsd_lexp_layout_type(struct super_block *sb)
++{
++	int ret = LAYOUT_NFSV4_1_FILES;
++	dprintk("<-- %s: return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++pnfsd_lexp_get_device_iter(struct super_block *sb,
++			   u32 layout_type,
++			   struct nfsd4_pnfs_dev_iter_res *res)
++{
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++
++	dprintk("<-- %s: return 0\n", __func__);
++	return 0;
++}
++
++static int
++pnfsd_lexp_get_device_info(struct super_block *sb,
++			   struct exp_xdr_stream *xdr,
++			   u32 layout_type,
++			   const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_multipath fl_devices[1];
++	u32 fl_stripe_indices[1] = { 0 };
++	struct pnfs_filelayout_devaddr daddr;
++	/* %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x.%03u.%03u */
++	char daddr_buf[8*4 + 2*3 + 10];
++
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	memset(&fdev, '\0', sizeof(fdev));
++
++	if (devid->devid != 1) {
++		printk(KERN_ERR "%s: WARNING: didn't receive a deviceid of 1 "
++			"(got: 0x%llx)\n", __func__, devid->devid);
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* count the number of comma-delimited DS IPs */
++	fdev.fl_device_length = 1;
++	fdev.fl_device_list = fl_devices;
++
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = fl_stripe_indices;
++
++	daddr.r_addr.data = daddr_buf;
++	daddr.r_addr.len = sizeof(daddr_buf);
++	err = __svc_print_netaddr(&pnfsd_lexp_addr, &daddr.r_addr);
++	if (err < 0)
++		goto out;
++	daddr.r_addr.len = err;
++	switch (pnfsd_lexp_addr.sa_family) {
++	case AF_INET:
++		daddr.r_netid.data = "tcp";
++		daddr.r_netid.len = 3;
++		break;
++	case AF_INET6:
++		daddr.r_netid.data = "tcp6";
++		daddr.r_netid.len = 4;
++		break;
++	default:
++		BUG();
++	}
++	fdev.fl_device_list[0].fl_multipath_length = 1;
++	fdev.fl_device_list[0].fl_multipath_list = &daddr;
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	dprintk("<-- %s: return %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize < NFSSVC_MAXBLKSIZE)
++		blocksize = NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++	dprintk("%s: return %d\n", __func__, blocksize);
++	return blocksize;
++}
++
++static enum nfsstat4
++pnfsd_lexp_layout_get(struct inode *inode,
++		      struct exp_xdr_stream *xdr,
++		      const struct nfsd4_pnfs_layoutget_arg *arg,
++		      struct nfsd4_pnfs_layoutget_res *res)
++{
++	enum nfsstat4 rc = NFS4_OK;
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++
++	dprintk("--> %s: inode=%p\n", __func__, inode);
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = true;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = arg->lg_sbid;
++	layout->device_id.devid = 1;				/*FSFTEMP*/
++	layout->lg_first_stripe_index = 0;			/*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	memcpy(fhp, arg->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	dprintk("<-- %s: return %d\n", __func__, rc);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++pnfsd_lexp_layout_commit(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutcommit_arg *args,
++			 struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int
++pnfsd_lexp_layout_return(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int pnfsd_lexp_get_state(struct inode *inode, struct knfsd_fh *fh,
++				struct pnfs_get_state *p)
++{
++	return 0;	/* just use the current stateid */
++}
++
++static struct pnfs_export_operations pnfsd_lexp_ops = {
++	.layout_type = pnfsd_lexp_layout_type,
++	.get_device_info = pnfsd_lexp_get_device_info,
++	.get_device_iter = pnfsd_lexp_get_device_iter,
++	.layout_get = pnfsd_lexp_layout_get,
++	.layout_commit = pnfsd_lexp_layout_commit,
++	.layout_return = pnfsd_lexp_layout_return,
++	.get_state = pnfsd_lexp_get_state,
++};
++
++void
++pnfsd_lexp_init(struct inode *inode)
++{
++	dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
++	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
+@@ -0,0 +1,535 @@
++/*
++ * fs/nfsd/spnfs_com.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ * Based heavily on idmap.c
++ *
++ */
++
++/*
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/nfsd/debug.h>
++
++#include <linux/nfsd4_spnfs.h>
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PROC
++
++static ssize_t   spnfs_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++		     char __user *, size_t);
++static ssize_t   spnfs_pipe_downcall(struct file *, const char __user *,
++		     size_t);
++static void      spnfs_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops spnfs_upcall_ops = {
++	.upcall		= spnfs_pipe_upcall,
++	.downcall	= spnfs_pipe_downcall,
++	.destroy_msg	= spnfs_pipe_destroy_msg,
++};
++
++/* evil global variable */
++struct spnfs *global_spnfs;
++struct spnfs_config *spnfs_config;
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++int spnfs_use_layoutsegments;
++uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++/*
++ * Used by spnfs_enabled()
++ * Tracks if the subsystem has been initialized at some point.  It doesn't
++ * matter if it's not currently initialized.
++ */
++static int spnfs_enabled_at_some_point;
++
++/* call this to start the ball rolling */
++/* code it like we're going to avoid the global variable in the future */
++int
++nfsd_spnfs_new(void)
++{
++	struct spnfs *spnfs = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	if (global_spnfs != NULL)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	spnfs = kzalloc(sizeof(*spnfs), GFP_KERNEL);
++	if (spnfs == NULL){
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	spnfs->spnfs_dentry = rpc_mkpipe(nd.path.dentry, "spnfs", spnfs,
++					 &spnfs_upcall_ops, 0);
++	if (IS_ERR(spnfs->spnfs_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++
++	mutex_init(&spnfs->spnfs_lock);
++	mutex_init(&spnfs->spnfs_plock);
++	init_waitqueue_head(&spnfs->spnfs_wq);
++
++	global_spnfs = spnfs;
++	spnfs_enabled_at_some_point = 1;
++
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(spnfs);
++	return rc;
++}
++
++/* again, code it like we're going to remove the global variable */
++void
++nfsd_spnfs_delete(void)
++{
++	struct spnfs *spnfs = global_spnfs;
++
++	if (!spnfs)
++		return;
++	rpc_unlink(spnfs->spnfs_dentry);
++	rpc_put_mount();
++	global_spnfs = NULL;
++	kfree(spnfs);
++}
++
++/* RPC pipefs upcall/downcall routines */
++/* looks like this code is invoked by the rpc_pipe code */
++/* to handle upcalls on things we've queued elsewhere */
++/* See nfs_idmap_id for an exmaple of enqueueing */
++static ssize_t
++spnfs_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
++    char __user *dst, size_t buflen)
++{
++	char *data = (char *)msg->data + msg->copied;
++	ssize_t mlen = msg->len - msg->copied;
++	ssize_t left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen -= left;
++	msg->copied += mlen;
++	msg->errno = 0;
++	return mlen;
++}
++
++static ssize_t
++spnfs_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
++	struct spnfs *spnfs = (struct spnfs *)rpci->private;
++	struct spnfs_msg *im_in = NULL, *im = &spnfs->spnfs_im;
++	int ret;
++
++	if (mlen != sizeof(struct spnfs_msg))
++		return -ENOSPC;
++
++	im_in = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im_in == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(im_in, src, mlen) != 0)
++		return -EFAULT;
++
++	mutex_lock(&spnfs->spnfs_plock);
++
++	ret = mlen;
++	im->im_status = im_in->im_status;
++	/* If we got an error, terminate now, and wake up pending upcalls */
++	if (!(im_in->im_status & SPNFS_STATUS_SUCCESS)) {
++		wake_up(&spnfs->spnfs_wq);
++		goto out;
++	}
++
++	ret = -EINVAL;
++	/* Did we match the current upcall? */
++	/* DMXXX: do not understand the comment above, from original code */
++	/* DMXXX: when do we _not_ match the current upcall? */
++	/* DMXXX: anyway, let's to a simplistic check */
++	if (im_in->im_type == im->im_type) {
++		/* copy the response into the spnfs struct */
++		memcpy(&im->im_res, &im_in->im_res, sizeof(im->im_res));
++		ret = mlen;
++	} else
++		dprintk("spnfs: downcall type != upcall type\n");
++
++
++	wake_up(&spnfs->spnfs_wq);
++/* DMXXX handle rval processing */
++out:
++	mutex_unlock(&spnfs->spnfs_plock);
++	kfree(im_in);
++	return ret;
++}
++
++static void
++spnfs_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	struct spnfs_msg *im = msg->data;
++	struct spnfs *spnfs = container_of(im, struct spnfs, spnfs_im);
++
++	if (msg->errno >= 0)
++		return;
++	mutex_lock(&spnfs->spnfs_plock);
++	im->im_status = SPNFS_STATUS_FAIL;  /* DMXXX */
++	wake_up(&spnfs->spnfs_wq);
++	mutex_unlock(&spnfs->spnfs_plock);
++}
++
++/* generic upcall.  called by functions in spnfs_ops.c  */
++int
++spnfs_upcall(struct spnfs *spnfs, struct spnfs_msg *upmsg,
++		union spnfs_msg_res *res)
++{
++	struct rpc_pipe_msg msg;
++	struct spnfs_msg *im;
++	DECLARE_WAITQUEUE(wq, current);
++	int ret = -EIO;
++	int rval;
++
++	im = &spnfs->spnfs_im;
++
++	mutex_lock(&spnfs->spnfs_lock);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	memset(im, 0, sizeof(*im));
++	memcpy(im, upmsg, sizeof(*upmsg));
++
++	memset(&msg, 0, sizeof(msg));
++	msg.data = im;
++	msg.len = sizeof(*im);
++
++	add_wait_queue(&spnfs->spnfs_wq, &wq);
++	rval = rpc_queue_upcall(spnfs->spnfs_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&spnfs->spnfs_wq, &wq);
++		goto out;
++	}
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&spnfs->spnfs_plock);
++	schedule();
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&spnfs->spnfs_wq, &wq);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	if (im->im_status & SPNFS_STATUS_SUCCESS) {
++		/* copy our result from the upcall */
++		memcpy(res, &im->im_res, sizeof(*res));
++		ret = 0;
++	}
++
++out:
++	memset(im, 0, sizeof(*im));
++	mutex_unlock(&spnfs->spnfs_plock);
++	mutex_unlock(&spnfs->spnfs_lock);
++	return(ret);
++}
++
++/*
++ * This is used to determine if the spnfsd daemon has been started at
++ * least once since the system came up.  This is used to by the export
++ * mechanism to decide if spnfs is in use.
++ *
++ * Returns non-zero if the spnfsd has initialized the communication pipe
++ * at least once.
++ */
++int spnfs_enabled(void)
++{
++	return spnfs_enabled_at_some_point;
++}
++
++#ifdef CONFIG_PROC_FS
++
++/*
++ * procfs virtual files for user/kernel space communication:
++ *
++ * ctl - currently just an on/off switch...can be expanded
++ * getfh - fd to fh conversion
++ * recall - recall a layout from the command line, for example:
++ *		echo <path> > /proc/fs/spnfs/recall
++ * config - configuration info, e.g., stripe size, num ds, etc.
++ */
++
++/*-------------- start ctl -------------------------*/
++static ssize_t ctl_write(struct file *file, const char __user *buf,
++			 size_t count, loff_t *offset)
++{
++	int cmd, rc;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (cmd) {
++		rc = nfsd_spnfs_new();
++		if (rc != 0)
++			return rc;
++	} else
++		nfsd_spnfs_delete();
++
++	return count;
++}
++
++static const struct file_operations ctl_ops = {
++	.write		= ctl_write,
++};
++/*-------------- end ctl ---------------------------*/
++
++/*-------------- start config -------------------------*/
++static ssize_t config_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	static struct spnfs_config cfg;
++
++	if (copy_from_user(&cfg, buf, count))
++		return -EFAULT;
++
++	spnfs_config = &cfg;
++	return 0;
++}
++
++static const struct file_operations config_ops = {
++	.write		= config_write,
++};
++/*-------------- end config ---------------------------*/
++
++/*-------------- start getfh -----------------------*/
++static int getfh_open(struct inode *inode, struct file *file)
++{
++	file->private_data = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
++	if (file->private_data == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static ssize_t getfh_read(struct file *file, char __user *buf, size_t count,
++			  loff_t *offset)
++{
++	if (copy_to_user(buf, file->private_data, sizeof(struct nfs_fh)))
++		return -EFAULT;
++
++	return count;
++}
++
++static ssize_t getfh_write(struct file *file, const char __user *buf,
++			   size_t count, loff_t *offset)
++{
++	int fd;
++
++	if (copy_from_user((int *)&fd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (spnfs_getfh(fd, file->private_data) != 0)
++		return -EIO;
++
++	return count;
++}
++
++static int getfh_release(struct inode *inode, struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static const struct file_operations getfh_ops = {
++	.open		= getfh_open,
++	.read		= getfh_read,
++	.write		= getfh_write,
++	.release	= getfh_release,
++};
++/*-------------- end getfh ------------------------*/
++
++
++/*-------------- start recall layout --------------*/
++static ssize_t recall_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	char input[128];
++	char *path, *str, *p;
++	int rc;
++	u64 off = 0, len = 0;
++
++	if (count > 128)
++		return -EINVAL;
++
++	if (copy_from_user(input, buf, count))
++		return -EFAULT;
++
++	/* assumes newline-terminated path */
++	p = memchr(input, '\n', count);
++	if (p == NULL)
++		return -EINVAL;
++	*p = '\0';
++
++	/*
++	 * Scan for path and, optionally, an offset and length
++	 * of a layout segment to be recalled; if there are two
++	 * fields, they're assumed to be path and offset.
++	 */
++	p = input;
++	path = strsep(&p, " ");
++	if (path == NULL)
++		return -EINVAL;
++
++	str = strsep(&p, " ");
++	if (str != NULL) {
++		rc = strict_strtoull(str, 10, &off);
++		if (rc != 0)
++			return -EINVAL;
++
++		str = strsep(&p, " ");
++		if (str != NULL) {
++			rc = strict_strtoull(str, 10, &len);
++			if (rc != 0)
++				return -EINVAL;
++		}
++	}
++
++	rc = spnfs_test_layoutrecall(path, off, len);
++	if (rc != 0)
++		return rc;
++
++	return count;
++}
++
++static const struct file_operations recall_ops = {
++	.write		= recall_write,
++};
++/*-------------- end recall layout --------------*/
++
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++/*-------------- start layoutseg -------------------------*/
++static ssize_t layoutseg_write(struct file *file, const char __user *buf,
++			       size_t count, loff_t *offset)
++{
++	char cmd[3];
++
++	if (copy_from_user(cmd, buf, 1))
++		return -EFAULT;
++	if (cmd[0] == '0')
++		spnfs_use_layoutsegments = 0;
++	else
++		spnfs_use_layoutsegments = 1;
++
++	return count;
++}
++
++static const struct file_operations layoutseg_ops = {
++	.write		= layoutseg_write,
++};
++/*-------------- end layoutseg ---------------------------*/
++
++/*-------------- start layoutsegsize -------------------------*/
++static ssize_t layoutsegsize_write(struct file *file, const char __user *buf,
++				   size_t count, loff_t *offset)
++{
++	char cmd[50];
++
++	if (copy_from_user(cmd, buf, 49))
++		return -EFAULT;
++	layoutsegment_size = simple_strtoull(cmd, NULL, 10);
++
++	return count;
++}
++
++static const struct file_operations layoutsegsize_ops = {
++	.write		= layoutsegsize_write,
++};
++/*-------------- end layoutsegsize ---------------------------*/
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++int
++spnfs_init_proc(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = proc_mkdir("fs/spnfs", NULL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry = create_proc_entry("fs/spnfs/ctl", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &ctl_ops;
++
++	entry = create_proc_entry("fs/spnfs/config", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &config_ops;
++
++	entry = create_proc_entry("fs/spnfs/getfh", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &getfh_ops;
++
++	entry = create_proc_entry("fs/spnfs/recall", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &recall_ops;
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++	entry = create_proc_entry("fs/spnfs/layoutseg", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutseg_ops;
++
++	entry = create_proc_entry("fs/spnfs/layoutsegsize", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutsegsize_ops;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	return 0;
++}
++#endif /* CONFIG_PROC_FS */
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
+@@ -0,0 +1,878 @@
++/*
++ * fs/nfsd/spnfs_ops.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ *
++ */
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/namei.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++/* comment out CONFIG_SPNFS_TEST for non-test behaviour */
++/* #define CONFIG_SPNFS_TEST 1 */
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PNFS
++
++/*
++ * The functions that are called from elsewhere in the kernel
++ * to perform tasks in userspace
++ *
++ */
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++extern int spnfs_use_layoutsegments;
++extern uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++extern struct spnfs *global_spnfs;
++
++int
++spnfs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++enum nfsstat4
++spnfs_layoutget(struct inode *inode, struct exp_xdr_stream *xdr,
++		const struct nfsd4_pnfs_layoutget_arg *lg_arg,
++		struct nfsd4_pnfs_layoutget_res *lg_res)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct pnfs_filelayout_layout *flp = NULL;
++	int status, i;
++	enum nfsstat4 nfserr;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	im->im_type = SPNFS_TYPE_LAYOUTGET;
++	im->im_args.layoutget_args.inode = inode->i_ino;
++	im->im_args.layoutget_args.generation = inode->i_generation;
++
++	/* call function to queue the msg for upcall */
++	if (spnfs_upcall(spnfs, im, res) != 0) {
++		dprintk("failed spnfs upcall: layoutget\n");
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto layoutget_cleanup;
++	}
++	status = res->layoutget_res.status;
++	if (status != 0) {
++		/* FIXME? until user mode is fixed, translate system error */
++		switch (status) {
++		case -E2BIG:
++		case -ETOOSMALL:
++			nfserr = NFS4ERR_TOOSMALL;
++			break;
++		case -ENOMEM:
++		case -EAGAIN:
++		case -EINTR:
++			nfserr = NFS4ERR_LAYOUTTRYLATER;
++			break;
++		case -ENOENT:
++			nfserr = NFS4ERR_BADLAYOUT;
++			break;
++ 		default:
++			nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		}
++		dprintk("spnfs layout_get upcall: status=%d nfserr=%u\n",
++			status, nfserr);
++		goto layoutget_cleanup;
++	}
++
++	lg_res->lg_return_on_close = 0;
++#if defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	/* if spnfs_use_layoutsegments & layoutsegment_size == 0, use */
++	/* the amount requested by the client.			      */
++	if (spnfs_use_layoutsegments) {
++		if (layoutsegment_size != 0)
++			lg_res->lg_seg.length = layoutsegment_size;
++	} else
++		lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#else
++	lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	flp = kmalloc(sizeof(struct pnfs_filelayout_layout), GFP_KERNEL);
++	if (flp == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	flp->device_id.sbid = lg_arg->lg_sbid;
++	flp->device_id.devid = res->layoutget_res.devid;
++	flp->lg_layout_type = 1; /* XXX */
++	flp->lg_stripe_type = res->layoutget_res.stripe_type;
++	flp->lg_commit_through_mds = 0;
++	flp->lg_stripe_unit =  res->layoutget_res.stripe_size;
++	flp->lg_first_stripe_index = 0;
++	flp->lg_pattern_offset = 0;
++	flp->lg_fh_length = res->layoutget_res.stripe_count;
++
++	flp->lg_fh_list = kmalloc(flp->lg_fh_length * sizeof(struct knfsd_fh),
++				  GFP_KERNEL);
++	if (flp->lg_fh_list == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	/*
++	 * FIX: Doing an extra copy here.  Should group res.flist's fh_len
++	 * and fh_val into a knfsd_fh structure.
++	 */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		flp->lg_fh_list[i].fh_size = res->layoutget_res.flist[i].fh_len;
++		memcpy(&flp->lg_fh_list[i].fh_base,
++		       res->layoutget_res.flist[i].fh_val,
++		       res->layoutget_res.flist[i].fh_len);
++	}
++
++	/* encode the layoutget body */
++	nfserr = filelayout_encode_layout(xdr, flp);
++
++layoutget_cleanup:
++	if (flp) {
++		if (flp->lg_fh_list)
++			kfree(flp->lg_fh_list);
++		kfree(flp);
++	}
++	kfree(im);
++	kfree(res);
++
++	return nfserr;
++}
++
++int
++spnfs_layoutcommit(void)
++{
++	return 0;
++}
++
++int
++spnfs_layoutreturn(struct inode *inode,
++		   const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	return 0;
++}
++
++int
++spnfs_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block *sb;
++	struct nfsd4_pnfs_cb_layout lr;
++
++	switch (type) {
++	case RETURN_FILE:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for ino = %lu\n",
++			__func__, inode->i_ino);
++		break;
++	case RETURN_FSID:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++			__func__);
++		return 0;
++	case RETURN_ALL:
++		/* XXX figure out how to get a sb since there's no inode ptr */
++		dprintk("%s: recalling all layouts (unimplemented)\n",
++			__func__);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	lr.cbl_recall_type = type;
++	lr.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	lr.cbl_seg.clientid = 0;
++	lr.cbl_seg.offset = offset;
++	lr.cbl_seg.length = len;
++	lr.cbl_seg.iomode = IOMODE_ANY;
++	lr.cbl_layoutchanged = 0;
++
++	nfsd_layout_recall_cb(sb, inode, &lr);
++
++	return 0;
++}
++
++
++int
++spnfs_test_layoutrecall(char *path, u64 offset, u64 len)
++{
++	struct nameidata nd;
++	struct inode *inode;
++	int type, rc;
++
++	dprintk("%s: path=%s, offset=%llu, len=%llu\n",
++		__func__, path, offset, len);
++
++	if (strcmp(path, "all") == 0) {
++		inode = NULL;
++		type = RETURN_ALL;
++	} else {
++		rc = path_lookup(path, 0, &nd);
++		if (rc != 0)
++			return -ENOENT;
++
++		/*
++		 * XXX todo: add a RETURN_FSID scenario here...maybe if
++		 * inode is a dir...
++		 */
++
++		inode = nd.path.dentry->d_inode;
++		type = RETURN_FILE;
++	}
++
++	if (len == 0)
++		len = NFS4_MAX_UINT64;
++
++	rc = spnfs_layoutrecall(inode, type, offset, len);
++
++	if (type != RETURN_ALL)
++		path_put(&nd.path);
++	return rc;
++}
++
++int
++spnfs_getdeviceiter(struct super_block *sb,
++		    u32 layout_type,
++		    struct nfsd4_pnfs_dev_iter_res *gd_res)
++{
++	struct spnfs *spnfs = global_spnfs;   /* XXX keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEITER;
++	im->im_args.getdeviceiter_args.cookie = gd_res->gd_cookie;
++	im->im_args.getdeviceiter_args.verf = gd_res->gd_verf;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceiter_out;
++	}
++	status = res->getdeviceiter_res.status;
++
++	if (res->getdeviceiter_res.eof)
++		gd_res->gd_eof = 1;
++	else {
++		gd_res->gd_devid = res->getdeviceiter_res.devid;
++		gd_res->gd_cookie = res->getdeviceiter_res.cookie;
++		gd_res->gd_verf = res->getdeviceiter_res.verf;
++		gd_res->gd_eof = 0;
++	}
++
++getdeviceiter_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++#ifdef CONFIG_SPNFS_TEST
++/*
++ * Setup the rq_res xdr_buf.  The svc_rqst rq_respages[1] page contains the
++ * 1024 encoded stripe indices.
++ *
++ * Skip the devaddr4 length and encode the indicies count (1024) in the
++ * rq_res.head and set the rq_res.head length.
++ *
++ * Set the rq_res page_len to 4096 (for the 1024 stripe indices).
++ * Set the rq_res xdr_buf tail base to rq_respages[0] just after the
++ * rq_res head to hold the rest of the getdeviceinfo return.
++ *
++ * So rq_respages[rq_resused - 1] contains the rq_res.head and rq_res.tail and
++ * rq_respages[rq_resused] contains the rq_res.pages.
++ */
++static int spnfs_test_indices_xdr(struct pnfs_xdr_info *info,
++				  const struct pnfs_filelayout_device *fdev)
++{
++	struct nfsd4_compoundres *resp = info->resp;
++	struct svc_rqst *rqstp = resp->rqstp;
++	struct xdr_buf *xb = &resp->rqstp->rq_res;
++	__be32 *p;
++
++	p = nfsd4_xdr_reserve_space(resp, 8);
++	p++; /* Fill in length later */
++	*p++ = cpu_to_be32(fdev->fl_stripeindices_length); /* 1024 */
++	resp->p = p;
++
++	xb->head[0].iov_len = (char *)resp->p - (char *)xb->head[0].iov_base;
++	xb->pages = &rqstp->rq_respages[rqstp->rq_resused];
++	xb->page_base = 0;
++	xb->page_len = PAGE_SIZE; /* page of 1024 encoded indices */
++	xb->tail[0].iov_base = resp->p;
++	resp->end = xb->head[0].iov_base + PAGE_SIZE;
++	xb->tail[0].iov_len = (char *)resp->end - (char *)resp->p;
++	return 0;
++}
++/*
++ * Return a stripeindices of length 1024 to test
++ * the pNFS client multipage getdeviceinfo implementation.
++ *
++ * Encode a page of stripe indices.
++ */
++static void spnfs_set_test_indices(struct pnfs_filelayout_device *fldev,
++				  struct spnfs_device *dev,
++				  struct pnfs_devinfo_arg *info)
++{
++	struct svc_rqst *rqstp = info->xdr.resp->rqstp;
++	__be32 *p;
++	int i, j = 0;
++
++	p = (__be32 *)page_address(rqstp->rq_respages[rqstp->rq_resused]);
++	fldev->fl_stripeindices_length = 1024;
++	/* round-robin the data servers device index into the stripe indicie */
++	for (i = 0; i < 1024; i++) {
++		*p++ = cpu_to_be32(j);
++		if (j < dev->dscount - 1)
++			j++;
++		else
++			j = 0;
++	}
++	fldev->fl_stripeindices_list = NULL;
++}
++#endif /* CONFIG_SPNFS_TEST */
++
++int
++spnfs_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    u32 layout_type,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct spnfs *spnfs = global_spnfs;
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct spnfs_device *dev;
++	struct pnfs_filelayout_device *fldev = NULL;
++	struct pnfs_filelayout_multipath *mp = NULL;
++	struct pnfs_filelayout_devaddr *fldap = NULL;
++	int status = 0, i, len;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEINFO;
++	/* XXX FIX: figure out what to do about fsid */
++	im->im_args.getdeviceinfo_args.devid = devid->devid;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceinfo_out;
++	}
++	status = res->getdeviceinfo_res.status;
++	if (status != 0)
++		goto getdeviceinfo_out;
++
++	dev = &res->getdeviceinfo_res.devinfo;
++
++	/* Fill in the device data, i.e., nfs4_1_file_layout_ds_addr4 */
++	fldev = kzalloc(sizeof(struct pnfs_filelayout_device), GFP_KERNEL);
++	if (fldev == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	/*
++	 * Stripe count is the same as data server count for our purposes
++	 */
++	fldev->fl_stripeindices_length = dev->dscount;
++	fldev->fl_device_length = dev->dscount;
++
++	/* Set stripe indices */
++#ifdef CONFIG_SPNFS_TEST
++	spnfs_set_test_indices(fldev, dev, info);
++	fldev->fl_enc_stripe_indices = spnfs_test_indices_xdr;
++#else /* CONFIG_SPNFS_TEST */
++	fldev->fl_stripeindices_list =
++		kmalloc(fldev->fl_stripeindices_length * sizeof(u32),
++			GFP_KERNEL);
++	if (fldev->fl_stripeindices_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_stripeindices_length; i++)
++		fldev->fl_stripeindices_list[i] = i;
++#endif /* CONFIG_SPNFS_TEST */
++
++	/*
++	 * Set the device's data server addresses  No multipath for spnfs,
++	 * so mp length is always 1.
++	 *
++	 */
++	fldev->fl_device_list =
++		kmalloc(fldev->fl_device_length *
++			sizeof(struct pnfs_filelayout_multipath),
++			GFP_KERNEL);
++	if (fldev->fl_device_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_device_length; i++) {
++		mp = &fldev->fl_device_list[i];
++		mp->fl_multipath_length = 1;
++		mp->fl_multipath_list =
++			kmalloc(sizeof(struct pnfs_filelayout_devaddr),
++				GFP_KERNEL);
++		if (mp->fl_multipath_list == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		fldap = mp->fl_multipath_list;
++
++		/*
++		 * Copy the netid into the device address, for example: "tcp"
++		 */
++		len = strlen(dev->dslist[i].netid);
++		fldap->r_netid.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_netid.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_netid.data, dev->dslist[i].netid, len);
++		fldap->r_netid.len = len;
++
++		/*
++		 * Copy the network address into the device address,
++		 * for example: "10.35.9.16.08.01"
++		 */
++		len = strlen(dev->dslist[i].addr);
++		fldap->r_addr.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_addr.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_addr.data, dev->dslist[i].addr, len);
++		fldap->r_addr.len = len;
++	}
++
++	/* encode the device data */
++	status = filelayout_encode_devinfo(xdr, fldev);
++
++getdeviceinfo_out:
++	if (fldev) {
++		kfree(fldev->fl_stripeindices_list);
++		if (fldev->fl_device_list) {
++			for (i = 0; i < fldev->fl_device_length; i++) {
++				fldap =
++				    fldev->fl_device_list[i].fl_multipath_list;
++				kfree(fldap->r_netid.data);
++				kfree(fldap->r_addr.data);
++				kfree(fldap);
++			}
++			kfree(fldev->fl_device_list);
++		}
++		kfree(fldev);
++	}
++
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_setattr(void)
++{
++	return 0;
++}
++
++int
++spnfs_open(struct inode *inode, struct nfsd4_open *open)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	im->im_type = SPNFS_TYPE_OPEN;
++	im->im_args.open_args.inode = inode->i_ino;
++	im->im_args.open_args.generation = inode->i_generation;
++	im->im_args.open_args.create = open->op_create;
++	im->im_args.open_args.createmode = open->op_createmode;
++	im->im_args.open_args.truncate = open->op_truncate;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto open_out;
++	}
++	status = res->open_res.status;
++
++open_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_create(void)
++{
++	return 0;
++}
++
++/*
++ * Invokes the spnfsd with the inode number of the object to remove.
++ * The file has already been removed on the MDS, so all the spnsfd
++ * daemon does is remove the stripes.
++ * Returns 0 on success otherwise error code
++ */
++int
++spnfs_remove(unsigned long ino, unsigned long generation)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	im->im_type = SPNFS_TYPE_REMOVE;
++	im->im_args.remove_args.inode = ino;
++	im->im_args.remove_args.generation = generation;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto remove_out;
++	}
++	status = res->remove_res.status;
++
++remove_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++static int
++read_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	 struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_read(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		if (err == 0)
++			break;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++     struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDONLY | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0 ; vnum < vlen ; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = read_one(inode, offset + bytecount, iolen,
++			       (char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err < 0) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		if (err < iolen) {
++			bytecount += err;
++			goto read_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++read_out:
++	*lenp = bytecount;
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++	return status;
++}
++
++__be32
++spnfs_read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++	   struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return read(inode, offset, lenp, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++static int
++write_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	  struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_write(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++write(struct inode *inode, loff_t offset, size_t len, int vlen,
++      struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDWR | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto write_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0; vnum < vlen; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = write_one(inode, offset + bytecount, iolen,
++				(char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err != iolen) {
++			dprintk("spnfs_write: err=%d expected %Zd\n", err, len);
++			status = nfserr_io;
++			goto write_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++write_out:
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++
++	return status;
++}
++
++__be32
++spnfs_write(struct inode *inode, loff_t offset, size_t len, int vlen,
++	    struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return write(inode, offset, len, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++int
++spnfs_commit(void)
++{
++	return 0;
++}
++
++/*
++ * Return the state for this object.
++ * At this time simply return 0 to indicate success and use the existing state
++ */
++int
++spnfs_get_state(struct inode *inode, struct knfsd_fh *fh, struct pnfs_get_state *arg)
++{
++	return 0;
++}
++
++/*
++ * Return the filehandle for the specified file descriptor
++ */
++int
++spnfs_getfh(int fd, struct nfs_fh *fh)
++{
++	struct file *file;
++
++	file = fget(fd);
++	if (file == NULL)
++		return -EIO;
++
++	memcpy(fh, NFS_FH(file->f_dentry->d_inode), sizeof(struct nfs_fh));
++	fput(file);
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
+@@ -242,6 +242,12 @@ struct nfs4_client {
+ 	u32			cl_cb_seq_nr;
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
++#if defined(CONFIG_PNFSD)
++	struct list_head	cl_layouts;	/* outstanding layouts */
++	struct list_head	cl_layoutrecalls; /* outstanding layoutrecall
++						     callbacks */
++	atomic_t		cl_deviceref;	/* Num outstanding devs */
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static inline void
+@@ -342,12 +348,31 @@ struct nfs4_file {
+ 	struct list_head        fi_hash;    /* hash by "struct inode *" */
+ 	struct list_head        fi_stateids;
+ 	struct list_head	fi_delegations;
++#if defined(CONFIG_PNFSD)
++	struct list_head	fi_layouts;
++	struct list_head	fi_layout_states;
++#endif /* CONFIG_PNFSD */
+ 	struct inode		*fi_inode;
+ 	u32                     fi_id;      /* used with stateowner->so_id 
+ 					     * for stateid_hashtbl hash */
+ 	bool			fi_had_conflict;
++#if defined(CONFIG_PNFSD)
++	/* used by layoutget / layoutrecall */
++	struct nfs4_fsid	fi_fsid;
++	u32			fi_fhlen;
++	u8			fi_fhval[NFS4_FHSIZE];
++#endif /* CONFIG_PNFSD */
+ };
+ 
++#if defined(CONFIG_PNFSD)
++/* pNFS Metadata server state */
++
++struct pnfs_ds_dev_entry {
++	struct list_head	dd_dev_entry; /* st_pnfs_ds_id entry */
++	u32			dd_dsid;
++};
++#endif /* CONFIG_PNFSD */
++
+ /*
+ * nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+ *
+@@ -370,6 +395,9 @@ struct nfs4_stateid {
+ 	struct list_head              st_perfile;
+ 	struct list_head              st_perstateowner;
+ 	struct list_head              st_lockowners;
++#if defined(CONFIG_PNFSD)
++	struct list_head              st_pnfs_ds_id;
++#endif /* CONFIG_PNFSD */
+ 	struct nfs4_stateowner      * st_stateowner;
+ 	struct nfs4_file            * st_file;
+ 	stateid_t                     st_stateid;
+@@ -421,6 +449,34 @@ extern void nfsd4_recdir_purge_old(void)
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+ extern void release_session_client(struct nfsd4_session *);
++extern void nfsd4_free_slab(struct kmem_cache **);
++extern struct nfs4_file *find_file(struct inode *);
++extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
++extern void put_nfs4_file(struct nfs4_file *);
++extern void get_nfs4_file(struct nfs4_file *);
++extern struct nfs4_client *find_confirmed_client(clientid_t *);
++extern struct nfs4_stateid *find_stateid(stateid_t *, int flags);
++extern struct nfs4_delegation *find_delegation_stateid(struct inode *, stateid_t *);
++extern __be32 nfs4_check_stateid(stateid_t *);
++extern void expire_client_lock(struct nfs4_client *);
++extern int filter_confirmed_clients(int (* func)(struct nfs4_client *, void *), void *);
++
++#if defined(CONFIG_PNFSD)
++extern int nfsd4_init_pnfs_slabs(void);
++extern void nfsd4_free_pnfs_slabs(void);
++extern void pnfs_expire_client(struct nfs4_client *);
++extern void release_pnfs_ds_dev_list(struct nfs4_stateid *);
++extern void nfs4_pnfs_state_init(void);
++extern void nfs4_pnfs_state_shutdown(void);
++extern void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++extern int nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *, stateid_t *);
++#else /* CONFIG_PNFSD */
++static inline void nfsd4_free_pnfs_slabs(void) {}
++static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
++static inline void pnfs_expire_client(struct nfs4_client *clp) {}
++static inline void release_pnfs_ds_dev_list(struct nfs4_stateid *stp) {}
++static inline void nfs4_pnfs_state_shutdown(void) {}
++#endif /* CONFIG_PNFSD */
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+@@ -434,4 +490,24 @@ nfs4_get_stateowner(struct nfs4_stateown
+ 	kref_get(&so->so_ref);
+ }
+ 
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
+ #endif   /* NFSD4_STATE_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
+@@ -37,7 +37,12 @@
+ #ifdef CONFIG_NFSD_V4
+ #include <linux/nfs4_acl.h>
+ #include <linux/nfsd_idmap.h>
++#include <linux/security.h>
++#include <linux/nfsd4_spnfs.h>
+ #endif /* CONFIG_NFSD_V4 */
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
+ 
+ #include "nfsd.h"
+ #include "vfs.h"
+@@ -383,6 +388,12 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ 					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
+ 			if (err)
+ 				goto out;
++#if defined(CONFIG_SPNFS_BLOCK)
++			if (pnfs_block_enabled(inode, 0)) {
++				err = bl_layoutrecall(inode, RETURN_FILE,
++				    iap->ia_size, inode->i_size - iap->ia_size);
++			}
++#endif /* CONFIG_SPNFS_BLOCK */
+ 		}
+ 
+ 		/*
+@@ -1703,6 +1714,11 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	struct inode	*fdir, *tdir;
+ 	__be32		err;
+ 	int		host_err;
++#ifdef CONFIG_SPNFS
++	unsigned long ino = 0;
++	unsigned long generation = 0;
++	unsigned int nlink = 0;
++#endif /* CONFIG_SPNFS */
+ 
+ 	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
+ 	if (err)
+@@ -1766,7 +1782,26 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	if (host_err)
+ 		goto out_dput_new;
+ 
++#ifdef CONFIG_SPNFS
++	/*
++	 * if the target is a preexisting regular file, remember the
++	 * inode number and generation so we can delete the stripes;
++	 * save the link count as well so that the stripes only get
++	 * get deleted when the last link is deleted
++	 */
++	if (ndentry && ndentry->d_inode && S_ISREG(ndentry->d_inode->i_mode)) {
++		ino = ndentry->d_inode->i_ino;
++		generation = ndentry->d_inode->i_generation;
++		nlink = ndentry->d_inode->i_nlink;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
++#ifdef CONFIG_SPNFS
++	if (spnfs_enabled() && (!host_err && ino && nlink == 1))
++		spnfs_remove(ino, generation);
++#endif /* CONFIG_SPNFS */
++
+ 	if (!host_err) {
+ 		host_err = commit_metadata(tfhp);
+ 		if (!host_err)
+@@ -1807,6 +1842,11 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	struct inode	*dirp;
+ 	__be32		err;
+ 	int		host_err;
++#if defined(CONFIG_SPNFS)
++	unsigned long	ino;
++	unsigned long	generation;
++	unsigned int	nlink;
++#endif /* defined(CONFIG_SPNFS) */
+ 
+ 	err = nfserr_acces;
+ 	if (!flen || isdotent(fname, flen))
+@@ -1830,6 +1870,17 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 		goto out;
+ 	}
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * Remember the inode number to communicate to the spnfsd
++	 * for removal of stripes; save the link count as well so that
++	 * the stripes only get get deleted when the last link is deleted
++	 */
++	ino = rdentry->d_inode->i_ino;
++	generation = rdentry->d_inode->i_generation;
++	nlink = rdentry->d_inode->i_nlink;
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	if (!type)
+ 		type = rdentry->d_inode->i_mode & S_IFMT;
+ 
+@@ -1854,6 +1905,29 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	if (!host_err)
+ 		host_err = commit_metadata(fhp);
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * spnfs: notify spnfsd of removal to destroy stripes
++	 */
++/*
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (sb->s_export_op->spnfs_remove) {
++*/
++	dprintk("%s check if spnfs_enabled\n", __FUNCTION__);
++	if (spnfs_enabled() && nlink == 1) {
++		BUG_ON(ino == 0);
++		dprintk("%s calling spnfs_remove inumber=%ld\n",
++			__FUNCTION__, ino);
++		if (spnfs_remove(ino, generation) == 0) {
++			dprintk("%s spnfs_remove success\n", __FUNCTION__);
++		} else {
++			/* XXX How do we make this atomic? */
++			printk(KERN_WARNING "nfsd: pNFS could not "
++				"remove stripes for inode: %ld\n", ino);
++		}
++	}
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ out_nfserr:
+ 	err = nfserrno(host_err);
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
+@@ -37,6 +37,8 @@
+ #ifndef _LINUX_NFSD_XDR4_H
+ #define _LINUX_NFSD_XDR4_H
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++
+ #include "state.h"
+ #include "nfsd.h"
+ 
+@@ -385,6 +387,51 @@ struct nfsd4_reclaim_complete {
+ 	u32 rca_one_fs;
+ };
+ 
++struct nfsd4_pnfs_getdevinfo {
++	struct nfsd4_pnfs_deviceid gd_devid;	/* request */
++	u32			gd_layout_type;	/* request */
++	u32			gd_maxcount;	/* request */
++	u32			gd_notify_types;/* request */
++	struct super_block	*gd_sb;
++};
++
++struct nfsd4_pnfs_getdevlist {
++	u32             gd_layout_type;	/* request */
++	u32		gd_maxdevices;	/* request */
++	u64		gd_cookie;	/* request - response */
++	u64		gd_verf;	/* request - response */
++	struct svc_fh 	*gd_fhp;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++struct nfsd4_pnfs_layoutget {
++	u64			lg_minlength;	/* request */
++	u32			lg_signal;	/* request */
++	u32			lg_maxcount;	/* request */
++	struct svc_fh		*lg_fhp;	/* request */
++	stateid_t		lg_sid;		/* request/response */
++	struct nfsd4_layout_seg	lg_seg;		/* request/response */
++	u32			lg_roc;		/* response */
++};
++
++struct nfsd4_pnfs_layoutcommit {
++	struct nfsd4_pnfs_layoutcommit_arg args;
++	stateid_t		lc_sid;		/* request */
++	struct nfsd4_pnfs_layoutcommit_res res;
++};
++
++enum layoutreturn_flags {
++	LR_FLAG_INTERN = 1 << 0,	/* internal return */
++	LR_FLAG_EXPIRE = 1 << 1,	/* return on client expiration */
++};
++
++struct nfsd4_pnfs_layoutreturn {
++	struct nfsd4_pnfs_layoutreturn_arg args;
++	u32			lr_flags;
++	stateid_t		lr_sid;		/* request/resopnse */
++	u32			lrs_present;	/* response */
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -426,6 +473,13 @@ struct nfsd4_op {
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
+ 		struct nfsd4_reclaim_complete	reclaim_complete;
++#if defined(CONFIG_PNFSD)
++		struct nfsd4_pnfs_getdevlist	pnfs_getdevlist;
++		struct nfsd4_pnfs_getdevinfo	pnfs_getdevinfo;
++		struct nfsd4_pnfs_layoutget	pnfs_layoutget;
++		struct nfsd4_pnfs_layoutcommit	pnfs_layoutcommit;
++		struct nfsd4_pnfs_layoutreturn	pnfs_layoutreturn;
++#endif /* CONFIG_PNFSD */
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
+@@ -28,6 +28,7 @@
+ #include <linux/aio.h>
+ #include <linux/gfp.h>
+ #include <linux/swap.h>
++#include <linux/pnfs_xdr.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -36,6 +37,7 @@
+ #include "internal.h"
+ #include "iostat.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_FILE
+ 
+@@ -388,12 +390,17 @@ static int nfs_write_begin(struct file *
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ 	struct page *page;
+ 	int once_thru = 0;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+ 		file->f_path.dentry->d_name.name,
+ 		mapping->host->i_ino, len, (long long) pos);
+ 
++	pnfs_update_layout(mapping->host,
++			   nfs_file_open_context(file),
++			   0, NFS4_MAX_UINT64, IOMODE_RW,
++			   &lseg);
+ start:
+ 	/*
+ 	 * Prevent starvation issues if someone is doing a consistency
+@@ -402,17 +409,22 @@ start:
+ 	ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ 			nfs_wait_bit_killable, TASK_KILLABLE);
+ 	if (ret)
+-		return ret;
++		goto out;
+ 
+ 	page = grab_cache_page_write_begin(mapping, index, flags);
+-	if (!page)
+-		return -ENOMEM;
++	if (!page) {
++		ret = -ENOMEM;
++		goto out;
++	}
+ 	*pagep = page;
+ 
+-	ret = nfs_flush_incompatible(file, page);
++	ret = nfs_flush_incompatible(file, page, lseg);
+ 	if (ret) {
+ 		unlock_page(page);
+ 		page_cache_release(page);
++		*pagep = NULL;
++		*fsdata = NULL;
++		goto out;
+ 	} else if (!once_thru &&
+ 		   nfs_want_read_modify_write(file, page, pos, len)) {
+ 		once_thru = 1;
+@@ -421,6 +433,12 @@ start:
+ 		if (!ret)
+ 			goto start;
+ 	}
++	ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
++ out:
++	if (ret) {
++		put_lseg(lseg);
++		*fsdata = NULL;
++	}
+ 	return ret;
+ }
+ 
+@@ -430,6 +448,7 @@ static int nfs_write_end(struct file *fi
+ {
+ 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ 	int status;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+@@ -456,10 +475,17 @@ static int nfs_write_end(struct file *fi
+ 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+ 	}
+ 
+-	status = nfs_updatepage(file, page, offset, copied);
++	lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
++	status = pnfs_write_end(file, page, pos, len, copied, lseg);
++	if (status)
++		goto out;
++	status = nfs_updatepage(file, page, offset, copied, lseg, fsdata);
+ 
++ out:
+ 	unlock_page(page);
+ 	page_cache_release(page);
++	pnfs_write_end_cleanup(file, fsdata);
++	put_lseg(lseg);
+ 
+ 	if (status < 0)
+ 		return status;
+@@ -570,6 +596,8 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	/* make sure the cache has finished storing the page */
+ 	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+ 
++	/* XXX Do we want to call pnfs_update_layout here? */
++
+ 	lock_page(page);
+ 	mapping = page->mapping;
+ 	if (mapping != dentry->d_inode->i_mapping)
+@@ -580,11 +608,11 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	if (pagelen == 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_flush_incompatible(filp, page);
++	ret = nfs_flush_incompatible(filp, page, NULL);
+ 	if (ret != 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_updatepage(filp, page, 0, pagelen);
++	ret = nfs_updatepage(filp, page, 0, pagelen, NULL, NULL);
+ out_unlock:
+ 	if (!ret)
+ 		return VM_FAULT_LOCKED;
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
+@@ -48,6 +48,7 @@
+ #include "internal.h"
+ #include "fscache.h"
+ #include "dns_resolve.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -278,7 +279,7 @@ nfs_fhget(struct super_block *sb, struct
+ 		 */
+ 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
+ 		if (S_ISREG(inode->i_mode)) {
+-			inode->i_fop = &nfs_file_operations;
++			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ 			inode->i_data.a_ops = &nfs_file_aops;
+ 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ 		} else if (S_ISDIR(inode->i_mode)) {
+@@ -530,6 +531,68 @@ out:
+ 	return err;
+ }
+ 
++static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
++{
++	atomic_set(&l_ctx->count, 1);
++	l_ctx->lockowner = current->files;
++	l_ctx->pid = current->tgid;
++	INIT_LIST_HEAD(&l_ctx->list);
++}
++
++static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *pos;
++
++	list_for_each_entry(pos, &ctx->lock_context.list, list) {
++		if (pos->lockowner != current->files)
++			continue;
++		if (pos->pid != current->tgid)
++			continue;
++		atomic_inc(&pos->count);
++		return pos;
++	}
++	return NULL;
++}
++
++struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *res, *new = NULL;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	spin_lock(&inode->i_lock);
++	res = __nfs_find_lock_context(ctx);
++	if (res == NULL) {
++		spin_unlock(&inode->i_lock);
++		new = kmalloc(sizeof(*new), GFP_KERNEL);
++		if (new == NULL)
++			return NULL;
++		nfs_init_lock_context(new);
++		spin_lock(&inode->i_lock);
++		res = __nfs_find_lock_context(ctx);
++		if (res == NULL) {
++			list_add_tail(&new->list, &ctx->lock_context.list);
++			new->open_context = ctx;
++			res = new;
++			new = NULL;
++		}
++	}
++	spin_unlock(&inode->i_lock);
++	kfree(new);
++	return res;
++}
++
++void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
++{
++	struct nfs_open_context *ctx = l_ctx->open_context;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
++		return;
++	list_del(&l_ctx->list);
++	spin_unlock(&inode->i_lock);
++	kfree(l_ctx);
++}
++
+ /**
+  * nfs_close_context - Common close_context() routine NFSv2/v3
+  * @ctx: pointer to context
+@@ -566,11 +629,11 @@ static struct nfs_open_context *alloc_nf
+ 		path_get(&ctx->path);
+ 		ctx->cred = get_rpccred(cred);
+ 		ctx->state = NULL;
+-		ctx->lockowner = current->files;
+ 		ctx->flags = 0;
+ 		ctx->error = 0;
+ 		ctx->dir_cookie = 0;
+-		atomic_set(&ctx->count, 1);
++		nfs_init_lock_context(&ctx->lock_context);
++		ctx->lock_context.open_context = ctx;
+ 	}
+ 	return ctx;
+ }
+@@ -578,15 +641,16 @@ static struct nfs_open_context *alloc_nf
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ 	if (ctx != NULL)
+-		atomic_inc(&ctx->count);
++		atomic_inc(&ctx->lock_context.count);
+ 	return ctx;
+ }
++EXPORT_SYMBOL(get_nfs_open_context);
+ 
+ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
+ {
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 
+-	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
++	if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ 		return;
+ 	list_del(&ctx->list);
+ 	spin_unlock(&inode->i_lock);
+@@ -933,6 +997,7 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->time_start = jiffies;
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
++EXPORT_SYMBOL(nfs_fattr_init);
+ 
+ struct nfs_fattr *nfs_alloc_fattr(void)
+ {
+@@ -1142,6 +1207,14 @@ static int nfs_update_inode(struct inode
+ 		server->fsid = fattr->fsid;
+ 
+ 	/*
++	 * file needs layout commit, server attributes may be stale
++	 */
++	if (layoutcommit_needed(nfsi) && nfsi->change_attr >= fattr->change_attr) {
++		dprintk("NFS: %s: layoutcommit is needed for file %s/%ld\n",
++			__func__, inode->i_sb->s_id, inode->i_ino);
++		return 0;
++	}
++	/*
+ 	 * Update the read time so we don't revalidate too often.
+ 	 */
+ 	nfsi->read_cache_jiffies = fattr->time_start;
+@@ -1340,9 +1413,10 @@ static int nfs_update_inode(struct inode
+  */
+ void nfs4_clear_inode(struct inode *inode)
+ {
++	pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	/* If we are holding a delegation, return it! */
+ 	nfs_inode_return_delegation_noreclaim(inode);
+-	/* First call standard NFS clear_inode() code */
+ 	nfs_clear_inode(inode);
+ }
+ #endif
+@@ -1367,7 +1441,10 @@ struct inode *nfs_alloc_inode(struct sup
+ 
+ void nfs_destroy_inode(struct inode *inode)
+ {
+-	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
++	struct nfs_inode *nfsi = NFS_I(inode);
++
++	pnfs_destroy_layout(nfsi);
++	kmem_cache_free(nfs_inode_cachep, nfsi);
+ }
+ 
+ static inline void nfs4_init_once(struct nfs_inode *nfsi)
+@@ -1377,6 +1454,11 @@ static inline void nfs4_init_once(struct
+ 	nfsi->delegation = NULL;
+ 	nfsi->delegation_state = 0;
+ 	init_rwsem(&nfsi->rwsem);
++#ifdef CONFIG_NFS_V4_1
++	init_waitqueue_head(&nfsi->lo_waitq);
++	nfsi->pnfs_layout_suspend = 0;
++	nfsi->layout = NULL;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif
+ }
+ 
+@@ -1488,6 +1570,12 @@ static int __init init_nfs_fs(void)
+ 	if (err)
+ 		goto out0;
+ 
++#ifdef CONFIG_NFS_V4_1
++	err = pnfs_initialize();
++	if (err)
++		goto out00;
++#endif /* CONFIG_NFS_V4_1 */
++
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_register(&nfs_rpcstat);
+ #endif
+@@ -1498,6 +1586,10 @@ out:
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++out00:
++	pnfs_uninitialize();
++#endif /* CONFIG_NFS_V4_1 */
+ 	nfs_destroy_directcache();
+ out0:
+ 	nfs_destroy_writepagecache();
+@@ -1531,6 +1623,9 @@ static void __exit exit_nfs_fs(void)
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++	pnfs_uninitialize();
++#endif
+ 	unregister_nfs_fs();
+ 	nfs_fs_proc_exit();
+ 	nfsiod_stop();
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
+@@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
+ 					   struct nfs_fattr *);
+ extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
+ extern int nfs4_check_client_ready(struct nfs_client *clp);
++extern int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++		const struct sockaddr *sa2);
++extern int nfs4_set_client(struct nfs_server *server,
++		const char *hostname,
++		const struct sockaddr *addr,
++		const size_t addrlen,
++		const char *ip_addr,
++		rpc_authflavor_t authflavour,
++		int proto, const struct rpc_timeout *timeparms,
++		u32 minorversion);
+ #ifdef CONFIG_PROC_FS
+ extern int __init nfs_fs_proc_init(void);
+ extern void nfs_fs_proc_exit(void);
+@@ -201,6 +211,8 @@ extern const u32 nfs41_maxwrite_overhead
+ extern struct rpc_procinfo nfs4_procedures[];
+ #endif
+ 
++extern int nfs4_recover_expired_lease(struct nfs_client *clp);
++
+ /* proc.c */
+ void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+ 
+@@ -248,10 +260,31 @@ extern int nfs4_get_rootfh(struct nfs_se
+ #endif
+ 
+ /* read.c */
++extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
++extern int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
+ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+ 
+ /* write.c */
++extern int nfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int pnfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int nfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++			       int how);
++extern int pnfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++				int how, int pnfs);
+ extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
++extern void nfs_mark_list_commit(struct list_head *head);
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+ 		struct page *, struct page *);
+diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
+--- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
+@@ -79,10 +79,48 @@ config NFS_V4_1
+ 	depends on NFS_V4 && EXPERIMENTAL
+ 	help
+ 	  This option enables support for minor version 1 of the NFSv4 protocol
+-	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
++	  (RFC5661) including support for the parallel NFS (pNFS) features
++	  in the kernel's NFS client.
+ 
+ 	  Unless you're an NFS developer, say N.
+ 
++config PNFS_FILE_LAYOUT
++	tristate "NFS client support for the pNFS nfs-files layout (DEVELOPER ONLY)"
++	depends on NFS_FS && NFS_V4_1
++	default y
++	help
++	  This option enables support for the pNFS nfs-files layout.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFS_OBJLAYOUT
++	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
++	help
++	  Say M here if you want your pNFS client to support the Objects Layout Driver.
++	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
++	  upper level driver (SCSI_OSD_ULD).
++
++	  If unsure, say N.
++
++config PNFS_PANLAYOUT
++	tristate "Provide support for the Panasas OSD Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on PNFS_OBJLAYOUT
++	help
++	  Say M or y here if you want your pNFS client to support the Panasas OSD Layout Driver.
++
++	  If unsure, say N.
++
++config PNFS_BLOCK
++	tristate "Provide a pNFS block client (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1
++	select MD
++	select BLK_DEV_DM
++	help
++	  Say M or y here if you want your pNfs client to support the block protocol
++
++	  If unsure, say N.
++
+ config ROOT_NFS
+ 	bool "Root file system on NFS"
+ 	depends on NFS_FS=y && IP_PNP
+diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
+--- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
+@@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
+ 			   delegation.o idmap.o \
+ 			   callback.o callback_xdr.o callback_proc.o \
+ 			   nfs4namespace.o
++nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
+ nfs-$(CONFIG_SYSCTL) += sysctl.o
+ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
++
++obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
++nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
++
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
++obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
+@@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs3_dir_inode_operations,
+ 	.file_inode_ops	= &nfs3_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs3_proc_get_root,
+ 	.getattr	= nfs3_proc_getattr,
+ 	.setattr	= nfs3_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
+@@ -0,0 +1,765 @@
++/*
++ *  linux/fs/nfs/nfs4filelayout.c
++ *
++ *  Module for the pnfs nfs4 file layout driver.
++ *  Defines all I/O and Policy interface operations, plus code
++ *  to register itself with the pNFS client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/time.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++
++#include "nfs4filelayout.h"
++#include "nfs4_fs.h"
++#include "internal.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dean Hildebrand <dhildebz@eecs.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4 file layout driver");
++
++/* Callback operations to the pNFS client */
++struct pnfs_client_operations *pnfs_callback_ops;
++
++/* Forward declaration */
++struct layoutdriver_io_operations filelayout_io_operations;
++
++int
++filelayout_initialize_mountpoint(struct nfs_server *nfss,
++				 const struct nfs_fh *mntfh)
++{
++	int status = nfs4_alloc_init_deviceid_cache(nfss->nfs_client,
++						nfs4_fl_free_deviceid_callback);
++	if (status) {
++		printk(KERN_WARNING "%s: deviceid cache could not be "
++			"initialized\n", __func__);
++		return status;
++	}
++	dprintk("%s: deviceid cache has been initialized successfully\n",
++		__func__);
++	return 0;
++}
++
++/* Uninitialize a mountpoint by destroying its device list */
++int
++filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
++{
++	dprintk("--> %s\n", __func__);
++
++	if (nfss->pnfs_curr_ld && nfss->nfs_client->cl_devid_cache)
++		nfs4_put_deviceid_cache(nfss->nfs_client);
++	return 0;
++}
++
++/* This function is used by the layout driver to calculate the
++ * offset of the file on the dserver based on whether the
++ * layout type is STRIPE_DENSE or STRIPE_SPARSE
++ */
++static loff_t
++filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++
++	switch (flseg->stripe_type) {
++	case STRIPE_SPARSE:
++		return offset;
++
++	case STRIPE_DENSE:
++	{
++		u32 stripe_width;
++		u64 tmp, off;
++		u32 unit = flseg->stripe_unit;
++
++		stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
++		tmp = off = offset - flseg->pattern_offset;
++		do_div(tmp, stripe_width);
++		return tmp * unit + do_div(off, unit);
++	}
++	default:
++		BUG();
++	}
++
++	/* We should never get here... just to stop the gcc warning */
++	return 0;
++}
++
++/*
++ * Call ops for the async read/write cases
++ * In the case of dense layouts, the offset needs to be reset to its
++ * original value.
++ */
++static void filelayout_read_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	if (rdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			rdata->args.offset, rdata->fldata.orig_offset);
++		rdata->args.offset = rdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	rdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_read_release(void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	put_lseg(rdata->pdata.lseg);
++	rdata->pdata.lseg = NULL;
++	rdata->pdata.call_ops->rpc_release(data);
++}
++
++static void filelayout_write_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	if (wdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			wdata->args.offset, wdata->fldata.orig_offset);
++		wdata->args.offset = wdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_write_release(void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	put_lseg(wdata->pdata.lseg);
++	wdata->pdata.lseg = NULL;
++	wdata->pdata.call_ops->rpc_release(data);
++}
++
++struct rpc_call_ops filelayout_read_call_ops = {
++	.rpc_call_prepare = nfs_read_prepare,
++	.rpc_call_done = filelayout_read_call_done,
++	.rpc_release = filelayout_read_release,
++};
++
++struct rpc_call_ops filelayout_write_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_write_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/* Perform sync or async reads.
++ *
++ * An optimization for the NFS file layout driver
++ * allows the original read/write data structs to be passed in the
++ * last argument.
++ *
++ * TODO: join with write_pagelist?
++ */
++static enum pnfs_try_status
++filelayout_read_pagelist(struct nfs_read_data *data, unsigned nr_pages)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
++		__func__, data->inode->i_ino, nr_pages,
++		data->args.pgbase, (size_t)data->args.count, offset);
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s USE DS:ip %x %s\n", __func__,
++		htonl(ds->ds_ip_addr), ds->r_addr);
++
++	/* just try the first data server for the index..*/
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++
++	/*
++	 * Now get the file offset on the dserver
++	 * Set the read offset to this offset, and
++	 * save the original offset in orig_offset
++	 * In the case of aync reads, the offset will be reset in the
++	 * call_ops->rpc_call_done() routine.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/* Perform an asynchronous read */
++	nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
++			  &filelayout_read_call_ops);
++
++	data->pdata.pnfs_error = 0;
++
++	return PNFS_ATTEMPTED;
++}
++
++/* Perform async writes. */
++static enum pnfs_try_status
++filelayout_write_pagelist(struct nfs_write_data *data, unsigned nr_pages, int sync)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu %s\n", __func__,
++		data->inode->i_ino, sync, (size_t) data->args.count, offset,
++		htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
++
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++	/*
++	 * Get the file offset on the dserver. Set the write offset to
++	 * this offset and save the original offset.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/*
++	 * Perform an asynchronous write The offset will be reset in the
++	 * call_ops->rpc_call_done() routine
++	 */
++	nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
++			   &filelayout_write_call_ops, sync);
++
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++}
++
++/*
++ * Create a filelayout layout structure and return it.  The pNFS client
++ * will use the pnfs_layout_type type to refer to the layout for this
++ * inode from now on.
++ */
++static struct pnfs_layout_type *
++filelayout_alloc_layout(struct inode *inode)
++{
++	struct nfs4_filelayout *flp;
++
++	dprintk("NFS_FILELAYOUT: allocating layout\n");
++	flp =  kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
++	return flp ? &flp->fl_layout : NULL;
++}
++
++/* Free a filelayout layout structure */
++static void
++filelayout_free_layout(struct pnfs_layout_type *lo)
++{
++	dprintk("NFS_FILELAYOUT: freeing layout\n");
++	kfree(FILE_LO(lo));
++}
++
++/*
++ * filelayout_check_layout()
++ *
++ * Make sure layout segment parameters are sane WRT the device.
++ *
++ * Notes:
++ * 1) current code insists that # stripe index = # data servers in ds_list
++ *    which is wrong.
++ * 2) pattern_offset is ignored and must == 0 which is wrong;
++ * 3) the pattern_offset needs to be a mutliple of the stripe unit.
++ * 4) stripe unit is multiple of page size
++ */
++
++static int
++filelayout_check_layout(struct pnfs_layout_type *lo,
++			struct pnfs_layout_segment *lseg)
++{
++	struct nfs4_filelayout_segment *fl = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	int status = -EINVAL;
++	struct nfs_server *nfss = NFS_SERVER(PNFS_INODE(lo));
++
++	dprintk("--> %s\n", __func__);
++	dsaddr = nfs4_pnfs_device_item_find(nfss->nfs_client, &fl->dev_id);
++	if (dsaddr == NULL) {
++		dsaddr = get_device_info(PNFS_INODE(lo), &fl->dev_id);
++		if (dsaddr == NULL) {
++			dprintk("%s NO device for dev_id %s\n",
++				__func__, deviceid_fmt(&fl->dev_id));
++			goto out;
++		}
++	}
++	if (fl->first_stripe_index < 0 ||
++	    fl->first_stripe_index > dsaddr->stripe_count) {
++		dprintk("%s Bad first_stripe_index %d\n",
++				__func__, fl->first_stripe_index);
++		goto out;
++	}
++
++	if (fl->pattern_offset != 0) {
++		dprintk("%s Unsupported no-zero pattern_offset %Ld\n",
++				__func__, fl->pattern_offset);
++		goto out;
++	}
++
++	if (fl->stripe_unit % PAGE_SIZE) {
++		dprintk("%s Stripe unit (%u) not page aligned\n",
++			__func__, fl->stripe_unit);
++		goto out;
++	}
++
++	/* XXX only support SPARSE packing. Don't support use MDS open fh */
++	if (!(fl->num_fh == 1 || fl->num_fh == dsaddr->ds_num)) {
++		dprintk("%s num_fh %u not equal to 1 or ds_num %u\n",
++			__func__, fl->num_fh, dsaddr->ds_num);
++		goto out;
++	}
++
++	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
++		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
++			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
++			nfss->wsize);
++	}
++
++	/* reference the device */
++	nfs4_set_layout_deviceid(lseg, &dsaddr->deviceid);
++
++	status = 0;
++out:
++	dprintk("--> %s returns %d\n", __func__, status);
++	return status;
++}
++
++static void _filelayout_free_lseg(struct pnfs_layout_segment *lseg);
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl);
++
++/* Decode layout and store in layoutid.  Overwrite any existing layout
++ * information for this file.
++ */
++static int
++filelayout_set_layout(struct nfs4_filelayout *flo,
++		      struct nfs4_filelayout_segment *fl,
++		      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t nfl_util;
++	int i;
++
++	dprintk("%s: set_layout_map Begin\n", __func__);
++
++	memcpy(&fl->dev_id, p, NFS4_PNFS_DEVICEID4_SIZE);
++	p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++	nfl_util = be32_to_cpup(p++);
++	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
++		fl->commit_through_mds = 1;
++	if (nfl_util & NFL4_UFLG_DENSE)
++		fl->stripe_type = STRIPE_DENSE;
++	else
++		fl->stripe_type = STRIPE_SPARSE;
++	fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
++
++	if (!flo->stripe_unit)
++		flo->stripe_unit = fl->stripe_unit;
++	else if (flo->stripe_unit != fl->stripe_unit) {
++		printk(KERN_NOTICE "%s: updating strip_unit from %u to %u\n",
++			__func__, flo->stripe_unit, fl->stripe_unit);
++		flo->stripe_unit = fl->stripe_unit;
++	}
++
++	fl->first_stripe_index = be32_to_cpup(p++);
++	p = xdr_decode_hyper(p, &fl->pattern_offset);
++	fl->num_fh = be32_to_cpup(p++);
++
++	dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu dev_id %s\n",
++		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
++		fl->pattern_offset, deviceid_fmt(&fl->dev_id));
++
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE) {
++		fl->fh_array = vmalloc(fl->num_fh * sizeof(struct nfs_fh));
++		if (fl->fh_array)
++			memset(fl->fh_array, 0,
++				fl->num_fh * sizeof(struct nfs_fh));
++	} else {
++		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh),
++					GFP_KERNEL);
++       }
++	if (!fl->fh_array)
++		return -ENOMEM;
++
++	for (i = 0; i < fl->num_fh; i++) {
++		/* fh */
++		fl->fh_array[i].size = be32_to_cpup(p++);
++		if (sizeof(struct nfs_fh) < fl->fh_array[i].size) {
++			printk(KERN_ERR "Too big fh %d received %d\n",
++				i, fl->fh_array[i].size);
++			/* Layout is now invalid, pretend it doesn't exist */
++			filelayout_free_fh_array(fl);
++			fl->num_fh = 0;
++			break;
++		}
++		memcpy(fl->fh_array[i].data, p, fl->fh_array[i].size);
++		p += XDR_QUADLEN(fl->fh_array[i].size);
++		dprintk("DEBUG: %s: fh len %d\n", __func__,
++					fl->fh_array[i].size);
++	}
++
++	return 0;
++}
++
++static struct pnfs_layout_segment *
++filelayout_alloc_lseg(struct pnfs_layout_type *layoutid,
++		      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++	struct pnfs_layout_segment *lseg;
++	int rc;
++
++	dprintk("--> %s\n", __func__);
++	lseg = kzalloc(sizeof(struct pnfs_layout_segment) +
++		       sizeof(struct nfs4_filelayout_segment), GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++
++	rc = filelayout_set_layout(flo, LSEG_LD_DATA(lseg), lgr);
++
++	if (rc != 0 || filelayout_check_layout(layoutid, lseg)) {
++		_filelayout_free_lseg(lseg);
++		lseg = NULL;
++	}
++	return lseg;
++}
++
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
++{
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE)
++		vfree(fl->fh_array);
++	else
++		kfree(fl->fh_array);
++
++	fl->fh_array = NULL;
++}
++
++static void
++_filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	filelayout_free_fh_array(LSEG_LD_DATA(lseg));
++	kfree(lseg);
++}
++
++static void
++filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("--> %s\n", __func__);
++	nfs4_unset_layout_deviceid(lseg, lseg->deviceid,
++				   nfs4_fl_free_deviceid_callback);
++	_filelayout_free_lseg(lseg);
++}
++
++/* Allocate a new nfs_write_data struct and initialize */
++static struct nfs_write_data *
++filelayout_clone_write_data(struct nfs_write_data *old)
++{
++	static struct nfs_write_data *new;
++
++	new = nfs_commitdata_alloc();
++	if (!new)
++		goto out;
++	kref_init(&new->refcount);
++	new->parent      = old;
++	kref_get(&old->refcount);
++	new->inode       = old->inode;
++	new->cred        = old->cred;
++	new->args.offset = 0;
++	new->args.count  = 0;
++	new->res.count   = 0;
++	new->res.fattr   = &new->fattr;
++	nfs_fattr_init(&new->fattr);
++	new->res.verf    = &new->verf;
++	new->args.context = get_nfs_open_context(old->args.context);
++	new->pdata.lseg = NULL;
++	new->pdata.call_ops = old->pdata.call_ops;
++	new->pdata.how = old->pdata.how;
++out:
++	return new;
++}
++
++static void filelayout_commit_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static struct rpc_call_ops filelayout_commit_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_commit_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/*
++ * Execute a COMMIT op to the MDS or to each data server on which a page
++ * in 'pages' exists.
++ * Invoke the pnfs_commit_complete callback.
++ */
++enum pnfs_try_status
++filelayout_commit(struct nfs_write_data *data, int sync)
++{
++	LIST_HEAD(head);
++	struct nfs_page *req;
++	loff_t file_offset = 0;
++	u16 idx, i;
++	struct list_head **ds_page_list = NULL;
++	u16 *indices_used;
++	int num_indices_seen = 0;
++	const struct rpc_call_ops *call_ops;
++	struct rpc_clnt *clnt;
++	struct nfs_write_data **clone_list = NULL;
++	struct nfs_write_data *dsdata;
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("%s data %p sync %d\n", __func__, data, sync);
++
++	/* Alloc room for both in one go */
++	ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
++			       (sizeof(u16) + sizeof(struct list_head *)),
++			       GFP_KERNEL);
++	if (!ds_page_list)
++		goto mem_error;
++	indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
++	/*
++	 * Sort pages based on which ds to send to.
++	 * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
++	 * Note we are assuming there is only a single lseg in play.
++	 * When that is not true, we could first sort on lseg, then
++	 * sort within each as we do here.
++	 */
++	while (!list_empty(&data->pages)) {
++		req = nfs_list_entry(data->pages.next);
++		nfs_list_remove_request(req);
++		if (!req->wb_lseg ||
++		    ((struct nfs4_filelayout_segment *)
++		     LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
++			idx = NFS4_PNFS_MAX_MULTI_CNT;
++		else {
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
++		}
++		if (ds_page_list[idx]) {
++			/* Already seen this idx */
++			list_add(&req->wb_list, ds_page_list[idx]);
++		} else {
++			/* New idx not seen so far */
++			list_add_tail(&req->wb_list, &head);
++			indices_used[num_indices_seen++] = idx;
++		}
++		ds_page_list[idx] = &req->wb_list;
++	}
++	/* Once created, clone must be released via call_op */
++	clone_list = kzalloc(num_indices_seen *
++			     sizeof(struct nfs_write_data *), GFP_KERNEL);
++	if (!clone_list)
++		goto mem_error;
++	for (i = 0; i < num_indices_seen - 1; i++) {
++		clone_list[i] = filelayout_clone_write_data(data);
++		if (!clone_list[i])
++			goto mem_error;
++	}
++	clone_list[i] = data;
++	/*
++	 * Now send off the RPCs to each ds.  Note that it is important
++	 * that any RPC to the MDS be sent last (or at least after all
++	 * clones have been made.)
++	 */
++	for (i = 0; i < num_indices_seen; i++) {
++		dsdata = clone_list[i];
++		idx = indices_used[i];
++		list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
++		if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
++			call_ops = data->pdata.call_ops;;
++			clnt = NFS_CLIENT(dsdata->inode);
++			ds = NULL;
++		} else {
++			struct nfs_fh *fh;
++
++			call_ops = &filelayout_commit_call_ops;
++			req = nfs_list_entry(dsdata->pages.next);
++			ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
++			if (!ds) {
++				/* Trigger retry of this chunk through MDS */
++				dsdata->task.tk_status = -EIO;
++				data->pdata.call_ops->rpc_release(dsdata);
++				continue;
++			}
++			clnt = ds->ds_clp->cl_rpcclient;
++			dsdata->fldata.ds_nfs_client = ds->ds_clp;
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
++			if (fh)
++				dsdata->args.fh = fh;
++		}
++		dprintk("%s: Initiating commit: %llu USE DS:\n",
++			__func__, file_offset);
++		print_ds(ds);
++
++		/* Send COMMIT to data server */
++		nfs_initiate_commit(dsdata, clnt, call_ops, sync);
++	}
++	kfree(clone_list);
++	kfree(ds_page_list);
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++
++ mem_error:
++	if (clone_list) {
++		for (i = 0; i < num_indices_seen - 1; i++) {
++			if (!clone_list[i])
++				break;
++			data->pdata.call_ops->rpc_release(clone_list[i]);
++		}
++		kfree(clone_list);
++	}
++	kfree(ds_page_list);
++	/* One of these will be empty, but doesn't hurt to do both */
++	nfs_mark_list_commit(&head);
++	nfs_mark_list_commit(&data->pages);
++	data->pdata.call_ops->rpc_release(data);
++	return PNFS_ATTEMPTED;
++}
++
++/* Return the stripesize for the specified file */
++ssize_t
++filelayout_get_stripesize(struct pnfs_layout_type *layoutid)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++
++	return flo->stripe_unit;
++}
++
++/*
++ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
++ *
++ * return 1 :  coalesce page
++ * return 0 :  don't coalesce page
++ */
++int
++filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++		   struct nfs_page *req)
++{
++	u64 p_stripe, r_stripe;
++
++	if (pgio->pg_boundary == 0)
++		return 1;
++	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
++	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
++
++	do_div(p_stripe, pgio->pg_boundary);
++	do_div(r_stripe, pgio->pg_boundary);
++
++	return (p_stripe == r_stripe);
++}
++
++struct layoutdriver_io_operations filelayout_io_operations = {
++	.commit                  = filelayout_commit,
++	.read_pagelist           = filelayout_read_pagelist,
++	.write_pagelist          = filelayout_write_pagelist,
++	.alloc_layout            = filelayout_alloc_layout,
++	.free_layout             = filelayout_free_layout,
++	.alloc_lseg              = filelayout_alloc_lseg,
++	.free_lseg               = filelayout_free_lseg,
++	.initialize_mountpoint   = filelayout_initialize_mountpoint,
++	.uninitialize_mountpoint = filelayout_uninitialize_mountpoint,
++};
++
++struct layoutdriver_policy_operations filelayout_policy_operations = {
++	.flags                 = PNFS_USE_RPC_CODE,
++	.get_stripesize        = filelayout_get_stripesize,
++	.pg_test               = filelayout_pg_test,
++};
++
++struct pnfs_layoutdriver_type filelayout_type = {
++	.id = LAYOUT_NFSV4_1_FILES,
++	.name = "LAYOUT_NFSV4_1_FILES",
++	.ld_io_ops = &filelayout_io_operations,
++	.ld_policy_ops = &filelayout_policy_operations,
++};
++
++static int __init nfs4filelayout_init(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
++	       __func__);
++
++	/*
++	 * Need to register file_operations struct with global list to indicate
++	 * that NFS4 file layout is a possible pNFS I/O module
++	 */
++	pnfs_callback_ops = pnfs_register_layoutdriver(&filelayout_type);
++
++	return 0;
++}
++
++static void __exit nfs4filelayout_exit(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
++	       __func__);
++
++	/* Unregister NFS4 file layout driver with pNFS client*/
++	pnfs_unregister_layoutdriver(&filelayout_type);
++}
++
++module_init(nfs4filelayout_init);
++module_exit(nfs4filelayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
+@@ -0,0 +1,636 @@
++/*
++ *  linux/fs/nfs/nfs4filelayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *  Garth Goodson   <Garth.Goodson@netapp.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/hash.h>
++
++#include <linux/nfs4.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_xdr.h>
++
++#include <asm/div64.h>
++
++#include <linux/utsname.h>
++#include <linux/vmalloc.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
++#include "nfs4filelayout.h"
++#include "internal.h"
++#include "nfs4_fs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
++
++DEFINE_SPINLOCK(nfs4_ds_cache_lock);
++static LIST_HEAD(nfs4_data_server_cache);
++
++void
++print_ds(struct nfs4_pnfs_ds *ds)
++{
++	if (ds == NULL) {
++		dprintk("%s NULL device \n", __func__);
++		return;
++	}
++	dprintk("        ip_addr %x\n", ntohl(ds->ds_ip_addr));
++	dprintk("        port %hu\n", ntohs(ds->ds_port));
++	dprintk("        client %p\n", ds->ds_clp);
++	dprintk("        ref count %d\n", atomic_read(&ds->ds_count));
++	if (ds->ds_clp)
++		dprintk("        cl_exchange_flags %x\n",
++					    ds->ds_clp->cl_exchange_flags);
++	dprintk("        ip:port %s\n", ds->r_addr);
++}
++
++void
++print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	int i;
++
++	dprintk("%s dsaddr->ds_num %d\n", __func__,
++		dsaddr->ds_num);
++	for (i = 0; i < dsaddr->ds_num; i++)
++		print_ds(dsaddr->ds_list[i]);
++}
++
++/* Debugging function assuming a 64bit major/minor split of the deviceid */
++char *
++deviceid_fmt(const struct pnfs_deviceid *dev_id)
++{
++	static char buf[17];
++	uint32_t *p = (uint32_t *)dev_id->data;
++	uint64_t major, minor;
++
++	p = xdr_decode_hyper(p, &major);
++	p = xdr_decode_hyper(p, &minor);
++
++	sprintf(buf, "%08llu %08llu", major, minor);
++	return buf;
++}
++
++/* nfs4_ds_cache_lock is held */
++static inline struct nfs4_pnfs_ds *
++_data_server_lookup(u32 ip_addr, u32 port)
++{
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
++			ntohl(ip_addr), ntohs(port));
++
++	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
++		if (ds->ds_ip_addr == ip_addr &&
++		    ds->ds_port == port) {
++			return ds;
++		}
++	}
++	return NULL;
++}
++
++/* Create an rpc to the data server defined in 'dev_list' */
++static int
++nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
++{
++	struct nfs_server	*tmp;
++	struct sockaddr_in	sin;
++	struct rpc_clnt 	*mds_clnt = mds_srv->client;
++	struct nfs_client	*clp = mds_srv->nfs_client;
++	struct sockaddr		*mds_addr;
++	int err = 0;
++
++	dprintk("--> %s ip:port %s au_flavor %d\n", __func__,
++		ds->r_addr, mds_clnt->cl_auth->au_flavor);
++
++	sin.sin_family = AF_INET;
++	sin.sin_addr.s_addr = ds->ds_ip_addr;
++	sin.sin_port = ds->ds_port;
++
++	/*
++	 * If this DS is also the MDS, use the MDS session only if the
++	 * MDS exchangeid flags show the EXCHGID4_FLAG_USE_PNFS_DS pNFS role.
++	 */
++	mds_addr = (struct sockaddr *)&clp->cl_addr;
++	if (nfs_sockaddr_cmp((struct sockaddr *)&sin, mds_addr)) {
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++			printk(KERN_INFO "ip:port %s is not a pNFS Data "
++				"Server\n", ds->r_addr);
++			err = -ENODEV;
++		} else {
++			atomic_inc(&clp->cl_count);
++			ds->ds_clp = clp;
++			dprintk("%s Using MDS Session for DS\n", __func__);
++		}
++		goto out;
++	}
++
++	/* Temporay server for nfs4_set_client */
++	tmp = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
++	if (!tmp)
++		goto out;
++
++	/*
++	 * Set a retrans, timeout interval, and authflavor equual to the MDS
++	 * values. Use the MDS nfs_client cl_ipaddr field so as to use the
++	 * same co_ownerid as the MDS.
++	 */
++	err = nfs4_set_client(tmp,
++			      mds_srv->nfs_client->cl_hostname,
++			      (struct sockaddr *)&sin,
++			      sizeof(struct sockaddr),
++			      mds_srv->nfs_client->cl_ipaddr,
++			      mds_clnt->cl_auth->au_flavor,
++			      IPPROTO_TCP,
++			      mds_clnt->cl_xprt->timeout,
++			      1 /* minorversion */);
++	if (err < 0)
++		goto out_free;
++
++	clp = tmp->nfs_client;
++
++	/* Ask for only the EXCHGID4_FLAG_USE_PNFS_DS pNFS role */
++	dprintk("%s EXCHANGE_ID for clp %p\n", __func__, clp);
++	clp->cl_exchange_flags = EXCHGID4_FLAG_USE_PNFS_DS;
++
++	err = nfs4_recover_expired_lease(clp);
++	if (!err)
++		err = nfs4_check_client_ready(clp);
++	if (err)
++		goto out_put;
++
++	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++		printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
++			ds->r_addr);
++		err = -ENODEV;
++		goto out_put;
++	}
++	/*
++	 * Mask the (possibly) returned EXCHGID4_FLAG_USE_PNFS_MDS pNFS role
++	 * The is_ds_only_session depends on this.
++	 */
++	clp->cl_exchange_flags &= ~EXCHGID4_FLAG_USE_PNFS_MDS;
++	/*
++	 * Set DS lease equal to the MDS lease, renewal is scheduled in
++	 * create_session
++	 */
++	spin_lock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
++	spin_unlock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_last_renewal = jiffies;
++
++	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
++	ds->ds_clp = clp;
++
++	dprintk("%s: ip=%x, port=%hu, rpcclient %p\n", __func__,
++				ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
++				clp->cl_rpcclient);
++out_free:
++	kfree(tmp);
++out:
++	dprintk("%s Returns %d\n", __func__, err);
++	return err;
++out_put:
++	nfs_put_client(clp);
++	goto out_free;
++}
++
++static void
++destroy_ds(struct nfs4_pnfs_ds *ds)
++{
++	dprintk("--> %s\n", __func__);
++	print_ds(ds);
++
++	if (ds->ds_clp)
++		nfs_put_client(ds->ds_clp);
++	kfree(ds);
++}
++
++static void
++nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	struct nfs4_pnfs_ds *ds;
++	int i;
++
++	dprintk("%s: device id=%s\n", __func__,
++		deviceid_fmt(&dsaddr->deviceid.de_id));
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		ds = dsaddr->ds_list[i];
++		if (ds != NULL) {
++			if (atomic_dec_and_lock(&ds->ds_count,
++						&nfs4_ds_cache_lock)) {
++				list_del_init(&ds->ds_node);
++				spin_unlock(&nfs4_ds_cache_lock);
++				destroy_ds(ds);
++			}
++		}
++	}
++	kfree(dsaddr->stripe_indices);
++	kfree(dsaddr);
++}
++
++void
++nfs4_fl_free_deviceid_callback(struct kref *kref)
++{
++	struct nfs4_deviceid *device =
++		container_of(kref, struct nfs4_deviceid, de_kref);
++	struct nfs4_file_layout_dsaddr *dsaddr =
++		container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
++
++	nfs4_fl_free_deviceid(dsaddr);
++}
++
++static void
++nfs4_pnfs_ds_add(struct inode *inode, struct nfs4_pnfs_ds **dsp,
++		 u32 ip_addr, u32 port, char *r_addr, int len)
++{
++	struct nfs4_pnfs_ds *tmp_ds, *ds;
++
++	*dsp = NULL;
++
++	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
++	if (!ds)
++		return;
++
++	spin_lock(&nfs4_ds_cache_lock);
++	tmp_ds = _data_server_lookup(ip_addr, port);
++	if (tmp_ds == NULL) {
++		ds->ds_ip_addr = ip_addr;
++		ds->ds_port = port;
++		strncpy(ds->r_addr, r_addr, len);
++		atomic_set(&ds->ds_count, 1);
++		INIT_LIST_HEAD(&ds->ds_node);
++		ds->ds_clp = NULL;
++		list_add(&ds->ds_node, &nfs4_data_server_cache);
++		*dsp = ds;
++		dprintk("%s add new data server ip 0x%x\n", __func__,
++				ds->ds_ip_addr);
++		spin_unlock(&nfs4_ds_cache_lock);
++	} else {
++		atomic_inc(&tmp_ds->ds_count);
++		*dsp = tmp_ds;
++		dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
++				__func__, tmp_ds->ds_ip_addr,
++				atomic_read(&tmp_ds->ds_count));
++		spin_unlock(&nfs4_ds_cache_lock);
++		kfree(ds);
++	}
++}
++
++static struct nfs4_pnfs_ds *
++decode_and_add_ds(uint32_t **pp, struct inode *inode)
++{
++	struct nfs4_pnfs_ds *ds = NULL;
++	char r_addr[29]; /* max size of ip/port string */
++	int len;
++	u32 ip_addr, port;
++	int tmp[6];
++	uint32_t *p = *pp;
++
++	dprintk("%s enter\n", __func__);
++	/* check and skip r_netid */
++	len = be32_to_cpup(p++);
++	/* "tcp" */
++	if (len != 3) {
++		printk("%s: ERROR: non TCP r_netid len %d\n",
++			__func__, len);
++		goto out_err;
++	}
++	/*
++	 * Read the bytes into a temporary buffer
++	 * XXX: should probably sanity check them
++	 */
++	tmp[0] = be32_to_cpup(p++);
++
++	len = be32_to_cpup(p++);
++	if (len >= sizeof(r_addr)) {
++		printk("%s: ERROR: Device ip/port too long (%d)\n",
++			__func__, len);
++		goto out_err;
++	}
++	memcpy(r_addr, p, len);
++	p += XDR_QUADLEN(len);
++	*pp = p;
++	r_addr[len] = '\0';
++	sscanf(r_addr, "%d.%d.%d.%d.%d.%d", &tmp[0], &tmp[1],
++	       &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
++	ip_addr = htonl((tmp[0]<<24) | (tmp[1]<<16) | (tmp[2]<<8) | (tmp[3]));
++	port = htons((tmp[4] << 8) | (tmp[5]));
++
++	nfs4_pnfs_ds_add(inode, &ds, ip_addr, port, r_addr, len);
++
++	dprintk("%s: addr:port string = %s\n", __func__, r_addr);
++	return ds;
++out_err:
++	dprintk("%s returned NULL\n", __func__);
++	return NULL;
++}
++
++/* Decode opaque device data and return the result */
++static struct nfs4_file_layout_dsaddr*
++decode_device(struct inode *ino, struct pnfs_device *pdev)
++{
++	int i, dummy;
++	u32 cnt, num;
++	u8 *indexp;
++	uint32_t *p = (u32 *)pdev->area, *indicesp;
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	/* Get the stripe count (number of stripe index) */
++	cnt = be32_to_cpup(p++);
++	dprintk("%s stripe count  %d\n", __func__, cnt);
++	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
++		printk(KERN_WARNING "%s: stripe count %d greater than "
++		       "supported maximum %d\n", __func__,
++			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
++		goto out_err;
++	}
++
++	/* Check the multipath list count */
++	indicesp = p;
++	p += XDR_QUADLEN(cnt << 2);
++	num = be32_to_cpup(p++);
++	dprintk("%s ds_num %u\n", __func__, num);
++	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
++		printk(KERN_WARNING "%s: multipath count %d greater than "
++			"supported maximum %d\n", __func__,
++			num, NFS4_PNFS_MAX_MULTI_CNT);
++		goto out_err;
++	}
++	dsaddr = kzalloc(sizeof(*dsaddr) +
++			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
++			GFP_KERNEL);
++	if (!dsaddr)
++		goto out_err;
++
++	dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
++	if (!dsaddr->stripe_indices)
++		goto out_err_free;
++
++	dsaddr->stripe_count = cnt;
++	dsaddr->ds_num = num;
++
++	memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id,
++	       NFS4_PNFS_DEVICEID4_SIZE);
++
++	/* Go back an read stripe indices */
++	p = indicesp;
++	indexp = &dsaddr->stripe_indices[0];
++	for (i = 0; i < dsaddr->stripe_count; i++) {
++		dummy = be32_to_cpup(p++);
++		*indexp = dummy; /* bound by NFS4_PNFS_MAX_MULTI_CNT */
++		indexp++;
++	}
++	/* Skip already read multipath list count */
++	p++;
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		int j;
++
++		dummy = be32_to_cpup(p++); /* multipath count */
++		if (dummy > 1) {
++			printk(KERN_WARNING
++			       "%s: Multipath count %d not supported, "
++			       "skipping all greater than 1\n", __func__,
++				dummy);
++		}
++		for (j = 0; j < dummy; j++) {
++			if (j == 0) {
++				dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
++				if (dsaddr->ds_list[i] == NULL)
++					goto out_err_free;
++			} else {
++				u32 len;
++				/* skip extra multipath */
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				continue;
++			}
++		}
++	}
++	nfs4_init_deviceid_node(&dsaddr->deviceid);
++
++	return dsaddr;
++
++out_err_free:
++	nfs4_fl_free_deviceid(dsaddr);
++out_err:
++	dprintk("%s ERROR: returning NULL\n", __func__);
++	return NULL;
++}
++
++/*
++ * Decode the opaque device specified in 'dev'
++ * and add it to the list of available devices.
++ * If the deviceid is already cached, nfs4_add_deviceid will return
++ * a pointer to the cached struct and throw away the new.
++ */
++static struct nfs4_file_layout_dsaddr*
++decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
++{
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	struct nfs4_deviceid *d;
++
++	dsaddr = decode_device(inode, dev);
++	if (!dsaddr) {
++		printk(KERN_WARNING "%s: Could not decode or add device\n",
++			__func__);
++		return NULL;
++	}
++
++	d = nfs4_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
++			      &dsaddr->deviceid);
++
++	return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Retrieve the information for dev_id, add it to the list
++ * of available devices, and return it.
++ */
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id)
++{
++	struct pnfs_device *pdev = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
++	int rc, i;
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
++		__func__, inode, max_resp_sz, max_pages);
++
++	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
++	if (pdev == NULL)
++		return NULL;
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(pdev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set pdev->area */
++	pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!pdev->area)
++		goto out_free;
++
++	memcpy(&pdev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE);
++	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
++	pdev->pages = pages;
++	pdev->pgbase = 0;
++	pdev->pglen = PAGE_SIZE * max_pages;
++	pdev->mincount = 0;
++	/* TODO: Update types when CB_NOTIFY_DEVICEID is available */
++	pdev->dev_notify_types = 0;
++
++	rc = pnfs_callback_ops->nfs_getdeviceinfo(server, pdev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	/*
++	 * Found new device, need to decode it and then add it to the
++	 * list of known devices for this mountpoint.
++	 */
++	dsaddr = decode_and_add_device(inode, pdev);
++out_free:
++	if (pdev->area != NULL)
++		vunmap(pdev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(pdev);
++	dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
++	return dsaddr;
++}
++
++struct nfs4_file_layout_dsaddr *
++nfs4_pnfs_device_item_find(struct nfs_client *clp, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++
++	d = nfs4_find_deviceid(clp->cl_devid_cache, id);
++	dprintk("%s device id (%s) nfs4_deviceid %p\n", __func__,
++		deviceid_fmt(id), d);
++	return (d == NULL) ? NULL :
++		container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
++ * Then: ((res + fsi) % dsaddr->stripe_count)
++ */
++static inline u32
++_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u64 tmp;
++
++	tmp = offset - flseg->pattern_offset;
++	do_div(tmp, flseg->stripe_unit);
++	tmp += flseg->first_stripe_index;
++	return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
++}
++
++u32
++nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	u32 j;
++
++	j = _nfs4_fl_calc_j_index(lseg, offset);
++	return FILE_DSADDR(lseg)->stripe_indices[j];
++}
++
++struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u32 i;
++
++	if (flseg->stripe_type == STRIPE_SPARSE) {
++		if (flseg->num_fh == 1)
++			i = 0;
++		else if (flseg->num_fh == 0)
++			return NULL;
++		else
++			i = nfs4_fl_calc_ds_index(lseg, offset);
++	} else
++		i = _nfs4_fl_calc_j_index(lseg, offset);
++	return &flseg->fh_array[i];
++}
++
++struct nfs4_pnfs_ds *
++nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	dsaddr = FILE_DSADDR(lseg);
++	if (dsaddr->ds_list[ds_idx] == NULL) {
++		printk(KERN_ERR "%s: No data server for device id (%s)!!\n",
++			__func__, deviceid_fmt(&flseg->dev_id));
++		return NULL;
++	}
++
++	if (!dsaddr->ds_list[ds_idx]->ds_clp) {
++		int err;
++
++		err = nfs4_pnfs_ds_create(PNFS_NFS_SERVER(lseg->layout),
++					  dsaddr->ds_list[ds_idx]);
++		if (err) {
++			printk(KERN_ERR "%s nfs4_pnfs_ds_create error %d\n",
++			       __func__, err);
++			return NULL;
++		}
++	}
++	dprintk("%s: dev_id=%s, ds_idx=%u\n",
++		__func__, deviceid_fmt(&flseg->dev_id), ds_idx);
++
++	return dsaddr->ds_list[ds_idx];
++}
++
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
+@@ -0,0 +1,97 @@
++/*
++ *  pnfs_nfs4filelayout.h
++ *
++ *  NFSv4 file layout driver data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_NFS4FILELAYOUT_H
++#define FS_NFS_NFS4FILELAYOUT_H
++
++#include <linux/kref.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
++
++#define NFS4_PNFS_DEV_HASH_BITS 5
++#define NFS4_PNFS_DEV_HASH_SIZE (1 << NFS4_PNFS_DEV_HASH_BITS)
++#define NFS4_PNFS_DEV_HASH_MASK (NFS4_PNFS_DEV_HASH_SIZE - 1)
++
++#define NFS4_PNFS_MAX_STRIPE_CNT 4096
++#define NFS4_PNFS_MAX_MULTI_CNT  64 /* 256 fit into a u8 stripe_index */
++#define NFS4_PNFS_MAX_MULTI_DS   2
++
++#define FILE_DSADDR(lseg) (container_of(lseg->deviceid, \
++					struct nfs4_file_layout_dsaddr, \
++					deviceid))
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++/* Individual ip address */
++struct nfs4_pnfs_ds {
++	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
++	u32 			ds_ip_addr;
++	u32 			ds_port;
++	struct nfs_client	*ds_clp;
++	atomic_t		ds_count;
++	char r_addr[29];
++};
++
++struct nfs4_file_layout_dsaddr {
++	struct nfs4_deviceid	deviceid;
++	u32 			stripe_count;
++	u8			*stripe_indices;
++	u32			ds_num;
++	struct nfs4_pnfs_ds	*ds_list[1];
++};
++
++struct nfs4_pnfs_dev_hlist {
++	rwlock_t		dev_lock;
++	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH_SIZE];
++};
++
++struct nfs4_filelayout_segment {
++	u32 stripe_type;
++	u32 commit_through_mds;
++	u32 stripe_unit;
++	u32 first_stripe_index;
++	u64 pattern_offset;
++	struct pnfs_deviceid dev_id;
++	unsigned int num_fh;
++	struct nfs_fh *fh_array;
++};
++
++struct nfs4_filelayout {
++	struct pnfs_layout_type fl_layout;
++	u32 stripe_unit;
++};
++
++extern struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
++
++static inline struct nfs4_filelayout *
++FILE_LO(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct nfs4_filelayout, fl_layout);
++}
++
++extern struct pnfs_client_operations *pnfs_callback_ops;
++
++extern void nfs4_fl_free_deviceid_callback(struct kref *);
++extern void print_ds(struct nfs4_pnfs_ds *ds);
++char *deviceid_fmt(const struct pnfs_deviceid *dev_id);
++u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset);
++struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
++					u32 ds_idx);
++extern struct nfs4_file_layout_dsaddr *
++nfs4_pnfs_device_item_find(struct nfs_client *, struct pnfs_deviceid *dev_id);
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id);
++
++#endif /* FS_NFS_NFS4FILELAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
+@@ -45,8 +45,28 @@ enum nfs4_client_state {
+ 	NFS4CLNT_RECLAIM_NOGRACE,
+ 	NFS4CLNT_DELEGRETURN,
+ 	NFS4CLNT_SESSION_RESET,
+-	NFS4CLNT_SESSION_DRAINING,
+ 	NFS4CLNT_RECALL_SLOT,
++	NFS4CLNT_LAYOUT_RECALL,
++};
++
++enum nfs4_session_state {
++	NFS4_SESSION_INITING,
++	NFS4_SESSION_DRAINING,
++};
++
++struct nfs4_minor_version_ops {
++	u32	minor_version;
++
++	int	(*call_sync)(struct nfs_server *server,
++			struct rpc_message *msg,
++			struct nfs4_sequence_args *args,
++			struct nfs4_sequence_res *res,
++			int cache_reply);
++	int	(*validate_stateid)(struct nfs_delegation *,
++			const nfs4_stateid *);
++	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
++	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
++	const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ };
+ 
+ /*
+@@ -89,7 +109,6 @@ struct nfs_unique_id {
+  */
+ struct nfs4_state_owner {
+ 	struct nfs_unique_id so_owner_id;
+-	struct nfs_client    *so_client;
+ 	struct nfs_server    *so_server;
+ 	struct rb_node	     so_client_node;
+ 
+@@ -99,7 +118,6 @@ struct nfs4_state_owner {
+ 	atomic_t	     so_count;
+ 	unsigned long	     so_flags;
+ 	struct list_head     so_states;
+-	struct list_head     so_delegations;
+ 	struct nfs_seqid_counter so_seqid;
+ 	struct rpc_sequence  so_sequence;
+ };
+@@ -125,10 +143,20 @@ enum {
+  * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+  */
+ 
++struct nfs4_lock_owner {
++	unsigned int lo_type;
++#define NFS4_ANY_LOCK_TYPE	(0U)
++#define NFS4_FLOCK_LOCK_TYPE	(1U << 0)
++#define NFS4_POSIX_LOCK_TYPE	(1U << 1)
++	union {
++		fl_owner_t posix_owner;
++		pid_t flock_owner;
++	} lo_u;
++};
++
+ struct nfs4_lock_state {
+ 	struct list_head	ls_locks;	/* Other lock stateids */
+ 	struct nfs4_state *	ls_state;	/* Pointer to open state */
+-	fl_owner_t		ls_owner;	/* POSIX lock owner */
+ #define NFS_LOCK_INITIALIZED 1
+ 	int			ls_flags;
+ 	struct nfs_seqid_counter	ls_seqid;
+@@ -136,6 +164,7 @@ struct nfs4_lock_state {
+ 	struct nfs_unique_id	ls_id;
+ 	nfs4_stateid		ls_stateid;
+ 	atomic_t		ls_count;
++	struct nfs4_lock_owner	ls_owner;
+ };
+ 
+ /* bits for nfs4_state->flags */
+@@ -219,22 +248,34 @@ extern int nfs4_open_revalidate(struct i
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
+ 		struct nfs4_fs_locations *fs_locations, struct page *page);
++extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+ 
+-extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
+-extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
+ #if defined(CONFIG_NFS_V4_1)
+-extern int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return server->nfs_client->cl_session;
++}
++
++extern int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task);
+ extern void nfs4_destroy_session(struct nfs4_session *session);
+ extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
++extern int nfs4_proc_exchange_id(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_create_session(struct nfs_client *);
+ extern int nfs4_proc_destroy_session(struct nfs4_session *);
+ extern int nfs4_init_session(struct nfs_server *server);
+ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
+ 		struct nfs_fsinfo *fsinfo);
+ #else /* CONFIG_NFS_v4_1 */
+-static inline int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return NULL;
++}
++
++static inline int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task)
+ {
+@@ -247,12 +288,12 @@ static inline int nfs4_init_session(stru
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+-extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
++extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
+ 
+ extern const u32 nfs4_fattr_bitmap[2];
+ extern const u32 nfs4_statfs_bitmap[2];
+ extern const u32 nfs4_pathconf_bitmap[2];
+-extern const u32 nfs4_fsinfo_bitmap[2];
++extern const u32 nfs4_fsinfo_bitmap[3];
+ extern const u32 nfs4_fs_locations_bitmap[2];
+ 
+ /* nfs4renewd.c */
+@@ -284,7 +325,7 @@ extern void nfs41_handle_sequence_flag_e
+ extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
++extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+ 
+ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+@@ -293,6 +334,7 @@ extern void nfs_increment_lock_seqid(int
+ extern void nfs_release_seqid(struct nfs_seqid *seqid);
+ extern void nfs_free_seqid(struct nfs_seqid *seqid);
+ 
++/* write.c */
+ extern const nfs4_stateid zero_stateid;
+ 
+ /* nfs4xdr.c */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
+@@ -49,12 +49,15 @@
+ #include <linux/mount.h>
+ #include <linux/module.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "delegation.h"
+ #include "internal.h"
+ #include "iostat.h"
+ #include "callback.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PROC
+ 
+@@ -67,7 +70,7 @@ struct nfs4_opendata;
+ static int _nfs4_proc_open(struct nfs4_opendata *data);
+ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
+ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
++static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, struct nfs_client *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+@@ -125,11 +128,16 @@ const u32 nfs4_pathconf_bitmap[2] = {
+ 	0
+ };
+ 
+-const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
++const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
+ 			| FATTR4_WORD0_MAXREAD
+ 			| FATTR4_WORD0_MAXWRITE
+ 			| FATTR4_WORD0_LEASE_TIME,
++#ifdef CONFIG_NFS_V4_1
++			FATTR4_WORD1_FS_LAYOUT_TYPES,
++			FATTR4_WORD2_LAYOUT_BLKSIZE
++#else /* CONFIG_NFS_V4_1 */
+ 			0
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ const u32 nfs4_fs_locations_bitmap[2] = {
+@@ -356,7 +364,7 @@ static void nfs41_check_drain_session_co
+ {
+ 	struct rpc_task *task;
+ 
+-	if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
++	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
+ 		if (task)
+ 			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+@@ -370,12 +378,11 @@ static void nfs41_check_drain_session_co
+ 	complete(&ses->complete);
+ }
+ 
+-static void nfs41_sequence_free_slot(const struct nfs_client *clp,
+-			      struct nfs4_sequence_res *res)
++static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+ {
+ 	struct nfs4_slot_table *tbl;
+ 
+-	tbl = &clp->cl_session->fc_slot_table;
++	tbl = &res->sr_session->fc_slot_table;
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+ 		/* just wake up the next guy waiting since
+ 		 * we may have not consumed a slot after all */
+@@ -385,18 +392,17 @@ static void nfs41_sequence_free_slot(con
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+ 	nfs4_free_slot(tbl, res->sr_slotid);
+-	nfs41_check_drain_session_complete(clp->cl_session);
++	nfs41_check_drain_session_complete(res->sr_session);
+ 	spin_unlock(&tbl->slot_tbl_lock);
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ }
+ 
+-static void nfs41_sequence_done(struct nfs_client *clp,
+-				struct nfs4_sequence_res *res,
+-				int rpc_status)
++static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+ {
+ 	unsigned long timestamp;
+ 	struct nfs4_slot_table *tbl;
+ 	struct nfs4_slot *slot;
++	struct nfs_client *clp;
+ 
+ 	/*
+ 	 * sr_status remains 1 if an RPC level error occurred. The server
+@@ -411,13 +417,16 @@ static void nfs41_sequence_done(struct n
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+ 		goto out;
+ 
++	tbl = &res->sr_session->fc_slot_table;
++	slot = tbl->slots + res->sr_slotid;
++
+ 	/* Check the SEQUENCE operation status */
+-	if (res->sr_status == 0) {
+-		tbl = &clp->cl_session->fc_slot_table;
+-		slot = tbl->slots + res->sr_slotid;
++	switch (res->sr_status) {
++	case 0:
+ 		/* Update the slot's sequence and clientid lease timer */
+ 		++slot->seq_nr;
+ 		timestamp = res->sr_renewal_time;
++		clp = res->sr_session->clp;
+ 		spin_lock(&clp->cl_lock);
+ 		if (time_before(clp->cl_last_renewal, timestamp))
+ 			clp->cl_last_renewal = timestamp;
+@@ -425,11 +434,39 @@ static void nfs41_sequence_done(struct n
+ 		/* Check sequence flags */
+ 		if (atomic_read(&clp->cl_count) > 1)
+ 			nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
++		break;
++	case -NFS4ERR_DELAY:
++		/* The server detected a resend of the RPC call and
++		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
++		 * of RFC5661.
++		 */
++		dprintk("%s: slot=%d seq=%d: Operation in progress\n",
++				__func__, res->sr_slotid, slot->seq_nr);
++		goto out_retry;
++	default:
++		/* Just update the slot sequence no. */
++		++slot->seq_nr;
+ 	}
+ out:
+ 	/* The session may be reset by one of the error handlers. */
+ 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
+-	nfs41_sequence_free_slot(clp, res);
++	nfs41_sequence_free_slot(res);
++	return 1;
++out_retry:
++	rpc_delay(task, NFS4_POLL_RETRY_MAX);
++	rpc_restart_call(task);
++	/* FIXME: rpc_restart_call() should be made to return success/fail */
++	if (RPC_ASSASSINATED(task))
++		goto out;
++	return 0;
++}
++
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	if (res->sr_session == NULL)
++		return 1;
++	return nfs41_sequence_done(task, res);
+ }
+ 
+ /*
+@@ -480,12 +517,11 @@ static int nfs41_setup_sequence(struct n
+ 	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+ 		return 0;
+ 
+-	memset(res, 0, sizeof(*res));
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ 	tbl = &session->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
++	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+ 	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+ 		/*
+ 		 * The state manager will wait until the slot table is empty.
+@@ -525,6 +561,7 @@ static int nfs41_setup_sequence(struct n
+ 	res->sr_session = session;
+ 	res->sr_slotid = slotid;
+ 	res->sr_renewal_time = jiffies;
++	res->sr_status_flags = 0;
+ 	/*
+ 	 * sr_status is only set in decode_sequence, and so will remain
+ 	 * set to 1 if an rpc level failure occurs.
+@@ -533,33 +570,36 @@ static int nfs41_setup_sequence(struct n
+ 	return 0;
+ }
+ 
+-int nfs4_setup_sequence(struct nfs_client *clp,
++int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 			struct nfs4_sequence_args *args,
+ 			struct nfs4_sequence_res *res,
+ 			int cache_reply,
+ 			struct rpc_task *task)
+ {
++	struct nfs4_session *session = nfs4_get_session(server);
+ 	int ret = 0;
+ 
++	if (ds_session)
++		session = ds_session;
++	if (session == NULL) {
++		args->sa_session = NULL;
++		res->sr_session = NULL;
++		goto out;
++	}
++
+ 	dprintk("--> %s clp %p session %p sr_slotid %d\n",
+-		__func__, clp, clp->cl_session, res->sr_slotid);
++		__func__, session->clp, session, res->sr_slotid);
+ 
+-	if (!nfs4_has_session(clp))
+-		goto out;
+-	ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
++	ret = nfs41_setup_sequence(session, args, res, cache_reply,
+ 				   task);
+-	if (ret && ret != -EAGAIN) {
+-		/* terminate rpc task */
+-		task->tk_status = ret;
+-		task->tk_action = NULL;
+-	}
+ out:
+ 	dprintk("<-- %s status=%d\n", __func__, ret);
+ 	return ret;
+ }
+ 
+ struct nfs41_call_sync_data {
+-	struct nfs_client *clp;
++	const struct nfs_server *seq_server;
+ 	struct nfs4_sequence_args *seq_args;
+ 	struct nfs4_sequence_res *seq_res;
+ 	int cache_reply;
+@@ -569,9 +609,9 @@ static void nfs41_call_sync_prepare(stru
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	dprintk("--> %s data->clp->cl_session %p\n", __func__,
+-		data->clp->cl_session);
+-	if (nfs4_setup_sequence(data->clp, data->seq_args,
++	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
++
++	if (nfs4_setup_sequence(data->seq_server, NULL, data->seq_args,
+ 				data->seq_res, data->cache_reply, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -587,7 +627,7 @@ static void nfs41_call_sync_done(struct 
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
++	nfs41_sequence_done(task, data->seq_res);
+ }
+ 
+ struct rpc_call_ops nfs41_call_sync_ops = {
+@@ -600,8 +640,7 @@ struct rpc_call_ops nfs41_call_priv_sync
+ 	.rpc_call_done = nfs41_call_sync_done,
+ };
+ 
+-static int nfs4_call_sync_sequence(struct nfs_client *clp,
+-				   struct rpc_clnt *clnt,
++static int nfs4_call_sync_sequence(struct nfs_server *server,
+ 				   struct rpc_message *msg,
+ 				   struct nfs4_sequence_args *args,
+ 				   struct nfs4_sequence_res *res,
+@@ -611,13 +650,13 @@ static int nfs4_call_sync_sequence(struc
+ 	int ret;
+ 	struct rpc_task *task;
+ 	struct nfs41_call_sync_data data = {
+-		.clp = clp,
++		.seq_server = server,
+ 		.seq_args = args,
+ 		.seq_res = res,
+ 		.cache_reply = cache_reply,
+ 	};
+ 	struct rpc_task_setup task_setup = {
+-		.rpc_client = clnt,
++		.rpc_client = server->client,
+ 		.rpc_message = msg,
+ 		.callback_ops = &nfs41_call_sync_ops,
+ 		.callback_data = &data
+@@ -642,10 +681,15 @@ int _nfs4_call_sync_session(struct nfs_s
+ 			    struct nfs4_sequence_res *res,
+ 			    int cache_reply)
+ {
+-	return nfs4_call_sync_sequence(server->nfs_client, server->client,
+-				       msg, args, res, cache_reply, 0);
++	return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
+ }
+ 
++#else
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	return 1;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ int _nfs4_call_sync(struct nfs_server *server,
+@@ -659,18 +703,9 @@ int _nfs4_call_sync(struct nfs_server *s
+ }
+ 
+ #define nfs4_call_sync(server, msg, args, res, cache_reply) \
+-	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
++	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
+ 			&(res)->seq_res, (cache_reply))
+ 
+-static void nfs4_sequence_done(const struct nfs_server *server,
+-			       struct nfs4_sequence_res *res, int rpc_status)
+-{
+-#ifdef CONFIG_NFS_V4_1
+-	if (nfs4_has_session(server->nfs_client))
+-		nfs41_sequence_done(server->nfs_client, res, rpc_status);
+-#endif /* CONFIG_NFS_V4_1 */
+-}
+-
+ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(dir);
+@@ -745,19 +780,14 @@ static struct nfs4_opendata *nfs4_openda
+ 	p->o_arg.server = server;
+ 	p->o_arg.bitmask = server->attr_bitmask;
+ 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+-	if (flags & O_EXCL) {
+-		if (nfs4_has_persistent_session(server->nfs_client)) {
+-			/* GUARDED */
+-			p->o_arg.u.attrs = &p->attrs;
+-			memcpy(&p->attrs, attrs, sizeof(p->attrs));
+-		} else { /* EXCLUSIVE4_1 */
+-			u32 *s = (u32 *) p->o_arg.u.verifier.data;
+-			s[0] = jiffies;
+-			s[1] = current->pid;
+-		}
+-	} else if (flags & O_CREAT) {
++	if (flags & O_CREAT) {
++		u32 *s;
++
+ 		p->o_arg.u.attrs = &p->attrs;
+ 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
++		s = (u32 *) p->o_arg.u.verifier.data;
++		s[0] = jiffies;
++		s[1] = current->pid;
+ 	}
+ 	p->c_arg.fh = &p->o_res.fh;
+ 	p->c_arg.stateid = &p->o_res.stateid;
+@@ -851,8 +881,10 @@ static void update_open_stateflags(struc
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+ {
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+-	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
++		memcpy(state->stateid.u.data, stateid->u.data,
++		       sizeof(state->stateid.u.data));
++	memcpy(state->open_stateid.u.data, stateid->u.data,
++	       sizeof(state->open_stateid.u.data));
+ 	switch (fmode) {
+ 		case FMODE_READ:
+ 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+@@ -880,7 +912,8 @@ static void __update_open_stateid(struct
+ 	 */
+ 	write_seqlock(&state->seqlock);
+ 	if (deleg_stateid != NULL) {
+-		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
++		memcpy(state->stateid.u.data, deleg_stateid->u.data,
++		       sizeof(state->stateid.u.data));
+ 		set_bit(NFS_DELEGATED_STATE, &state->flags);
+ 	}
+ 	if (open_stateid != NULL)
+@@ -911,7 +944,8 @@ static int update_open_stateid(struct nf
+ 
+ 	if (delegation == NULL)
+ 		delegation = &deleg_cur->stateid;
+-	else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
++	else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
++			NFS4_STATEID_SIZE) != 0)
+ 		goto no_delegation_unlock;
+ 
+ 	nfs_mark_delegation_referenced(deleg_cur);
+@@ -973,7 +1007,8 @@ static struct nfs4_state *nfs4_try_open_
+ 			break;
+ 		}
+ 		/* Save the delegation */
+-		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
++		memcpy(stateid.u.data, delegation->stateid.u.data,
++		       sizeof(stateid.u.data));
+ 		rcu_read_unlock();
+ 		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
+ 		if (ret != 0)
+@@ -1127,10 +1162,13 @@ static int nfs4_open_recover(struct nfs4
+ 	 * Check if we need to update the current stateid.
+ 	 */
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+-	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
++	    memcmp(state->stateid.u.data, state->open_stateid.u.data,
++		   sizeof(state->stateid.u.data)) != 0) {
+ 		write_seqlock(&state->seqlock);
+ 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
++			memcpy(state->stateid.u.data,
++			       state->open_stateid.u.data,
++			       sizeof(state->stateid.u.data));
+ 		write_sequnlock(&state->seqlock);
+ 	}
+ 	return 0;
+@@ -1199,8 +1237,8 @@ static int _nfs4_open_delegation_recall(
+ 	if (IS_ERR(opendata))
+ 		return PTR_ERR(opendata);
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+-	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
+-			sizeof(opendata->o_arg.u.delegation.data));
++	memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
++			sizeof(opendata->o_arg.u.delegation.u.data));
+ 	ret = nfs4_open_recover(opendata, state);
+ 	nfs4_opendata_put(opendata);
+ 	return ret;
+@@ -1258,8 +1296,8 @@ static void nfs4_open_confirm_done(struc
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+-				sizeof(data->o_res.stateid.data));
++		memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
++				sizeof(data->o_res.stateid.u.data));
+ 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 		renew_lease(data->o_res.server, data->timestamp);
+ 		data->rpc_done = 1;
+@@ -1356,13 +1394,13 @@ static void nfs4_open_prepare(struct rpc
+ 	}
+ 	/* Update sequence id. */
+ 	data->o_arg.id = sp->so_owner_id.id;
+-	data->o_arg.clientid = sp->so_client->cl_clientid;
++	data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
+ 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
+ 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
+ 	}
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
++	if (nfs4_setup_sequence(data->o_arg.server, NULL,
+ 				&data->o_arg.seq_args,
+ 				&data->o_res.seq_res, 1, task))
+ 		return;
+@@ -1385,8 +1423,8 @@ static void nfs4_open_done(struct rpc_ta
+ 
+ 	data->rpc_status = task->tk_status;
+ 
+-	nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->o_res.seq_res))
++		return;
+ 
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+@@ -1539,9 +1577,8 @@ static int _nfs4_proc_open(struct nfs4_o
+ 	return 0;
+ }
+ 
+-static int nfs4_recover_expired_lease(struct nfs_server *server)
++int nfs4_recover_expired_lease(struct nfs_client *clp)
+ {
+-	struct nfs_client *clp = server->nfs_client;
+ 	unsigned int loop;
+ 	int ret;
+ 
+@@ -1557,6 +1594,7 @@ static int nfs4_recover_expired_lease(st
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(nfs4_recover_expired_lease);
+ 
+ /*
+  * OPEN_EXPIRED:
+@@ -1646,7 +1684,7 @@ static int _nfs4_do_open(struct inode *d
+ 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+ 		goto out_err;
+ 	}
+-	status = nfs4_recover_expired_lease(server);
++	status = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (status != 0)
+ 		goto err_put_state_owner;
+ 	if (path->dentry->d_inode != NULL)
+@@ -1773,7 +1811,7 @@ static int _nfs4_do_setattr(struct inode
+ 	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ 		/* Use that stateid */
+ 	} else if (state != NULL) {
+-		nfs4_copy_stateid(&arg.stateid, state, current->files);
++		nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
+ 	} else
+ 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+ 
+@@ -1838,7 +1876,8 @@ static void nfs4_close_done(struct rpc_t
+ 	struct nfs4_state *state = calldata->state;
+ 	struct nfs_server *server = NFS_SERVER(calldata->inode);
+ 
+-	nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+         /* hmm. we are done with the inode, and in the process of freeing
+@@ -1858,7 +1897,7 @@ static void nfs4_close_done(struct rpc_t
+ 			if (calldata->arg.fmode == 0)
+ 				break;
+ 		default:
+-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
++			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ 				rpc_restart_call_prepare(task);
+ 	}
+ 	nfs_release_seqid(calldata->arg.seqid);
+@@ -1903,7 +1942,7 @@ static void nfs4_close_prepare(struct rp
+ 
+ 	nfs_fattr_init(calldata->res.fattr);
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
++	if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), NULL,
+ 				&calldata->arg.seq_args, &calldata->res.seq_res,
+ 				1, task))
+ 		return;
+@@ -2323,6 +2362,9 @@ nfs4_proc_setattr(struct dentry *dentry,
+ 	struct nfs4_state *state = NULL;
+ 	int status;
+ 
++	if (pnfs_ld_layoutret_on_setattr(inode))
++		pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	nfs_fattr_init(fattr);
+ 	
+ 	/* Search for an existing open(O_WRITE) file */
+@@ -2648,8 +2690,9 @@ static int nfs4_proc_unlink_done(struct 
+ {
+ 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
+ 
+-	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
+-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
++	if (!nfs4_sequence_done(task, &res->seq_res))
++		return 0;
++	if (nfs4_async_handle_error(task, res->server, NULL, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+ 	nfs_post_op_update_inode(dir, res->dir_attr);
+@@ -3090,18 +3133,31 @@ static int nfs4_proc_pathconf(struct nfs
+ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+ {
+ 	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
+-	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
+ 
+-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, server->nfs_client);
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
+ 
+ 	nfs_invalidate_atime(data->inode);
+-	if (task->tk_status > 0)
++	if (task->tk_status > 0 && client == server->nfs_client)
+ 		renew_lease(server, data->timestamp);
+ 	return 0;
+ }
+@@ -3112,20 +3168,56 @@ static void nfs4_proc_read_setup(struct 
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ }
+ 
++static void pnfs4_update_write_done(struct nfs_inode *nfsi, struct nfs_write_data *data)
++{
++#ifdef CONFIG_NFS_V4_1
++	pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++	pnfs_need_layoutcommit(nfsi, data->args.context);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
+ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
++	struct nfs_server *server = NFS_SERVER(inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++#ifdef CONFIG_NFS_V4_1
++	/* restore original count after retry? */
++	if (data->pdata.orig_count) {
++		dprintk("%s: restoring original count %u\n", __func__,
++			data->pdata.orig_count);
++		data->args.count = data->pdata.orig_count;
++	}
++
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
++
++	/*
++	 * MDS write: renew lease
++	 * DS write: update lastbyte written, mark for layout commit
++	 */
+ 	if (task->tk_status >= 0) {
+-		renew_lease(NFS_SERVER(inode), data->timestamp);
+-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		if (client == server->nfs_client) {
++			renew_lease(server, data->timestamp);
++			nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		} else
++			pnfs4_update_write_done(NFS_I(inode), data);
+ 	}
+ 	return 0;
+ }
+@@ -3138,20 +3230,42 @@ static void nfs4_proc_write_setup(struct
+ 	data->res.server = server;
+ 	data->timestamp   = jiffies;
+ 
++#ifdef CONFIG_NFS_V4_1
++	/* writes to DS use pnfs vector */
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_WRITE];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+ }
+ 
+ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
++	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
++
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS commit\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL, NULL) == -EAGAIN) {
+ 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ 		return -EAGAIN;
+ 	}
+-	nfs_refresh_inode(inode, data->res.fattr);
++	if (client == server->nfs_client)
++		nfs_refresh_inode(inode, data->res.fattr);
+ 	return 0;
+ }
+ 
+@@ -3161,6 +3275,12 @@ static void nfs4_proc_commit_setup(struc
+ 	
+ 	data->args.bitmask = server->cache_consistency_bitmask;
+ 	data->res.server = server;
++#if defined(CONFIG_NFS_V4_1)
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_COMMIT];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
+@@ -3464,9 +3584,12 @@ static int nfs4_proc_set_acl(struct inod
+ }
+ 
+ static int
+-_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
++nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state, struct nfs_client *clp)
+ {
+-	if (!clp || task->tk_status >= 0)
++	if (!clp)
++		clp = server->nfs_client;
++
++	if (task->tk_status >= 0)
+ 		return 0;
+ 	switch(task->tk_status) {
+ 		case -NFS4ERR_ADMIN_REVOKED:
+@@ -3491,8 +3614,9 @@ _nfs4_async_handle_error(struct rpc_task
+ 		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ 		case -NFS4ERR_SEQ_FALSE_RETRY:
+ 		case -NFS4ERR_SEQ_MISORDERED:
+-			dprintk("%s ERROR %d, Reset session\n", __func__,
+-				task->tk_status);
++			dprintk("%s ERROR %d, Reset session. Exchangeid "
++				"flags 0x%x\n", __func__, task->tk_status,
++				clp->cl_exchange_flags);
+ 			nfs4_schedule_state_recovery(clp);
+ 			task->tk_status = 0;
+ 			return -EAGAIN;
+@@ -3512,6 +3636,8 @@ _nfs4_async_handle_error(struct rpc_task
+ 	task->tk_status = nfs4_map_errors(task->tk_status);
+ 	return 0;
+ do_state_recovery:
++	if (is_ds_only_client(clp))
++		return 0;
+ 	rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+ 	nfs4_schedule_state_recovery(clp);
+ 	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+@@ -3520,12 +3646,6 @@ do_state_recovery:
+ 	return -EAGAIN;
+ }
+ 
+-static int
+-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+-{
+-	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+-}
+-
+ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
+ 		unsigned short port, struct rpc_cred *cred,
+ 		struct nfs4_setclientid_res *res)
+@@ -3641,8 +3761,8 @@ static void nfs4_delegreturn_done(struct
+ {
+ 	struct nfs4_delegreturndata *data = calldata;
+ 
+-	nfs4_sequence_done(data->res.server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_STALE_STATEID:
+@@ -3651,8 +3771,8 @@ static void nfs4_delegreturn_done(struct
+ 		renew_lease(data->res.server, data->timestamp);
+ 		break;
+ 	default:
+-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
+-				-EAGAIN) {
++		if (nfs4_async_handle_error(task, data->res.server, NULL, NULL)
++				== -EAGAIN) {
+ 			nfs_restart_rpc(task, data->res.server->nfs_client);
+ 			return;
+ 		}
+@@ -3672,7 +3792,7 @@ static void nfs4_delegreturn_prepare(str
+ 
+ 	d_data = (struct nfs4_delegreturndata *)data;
+ 
+-	if (nfs4_setup_sequence(d_data->res.server->nfs_client,
++	if (nfs4_setup_sequence(d_data->res.server, NULL,
+ 				&d_data->args.seq_args,
+ 				&d_data->res.seq_res, 1, task))
+ 		return;
+@@ -3892,15 +4012,16 @@ static void nfs4_locku_done(struct rpc_t
+ {
+ 	struct nfs4_unlockdata *calldata = data;
+ 
+-	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
+-			   task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	switch (task->tk_status) {
+ 		case 0:
+-			memcpy(calldata->lsp->ls_stateid.data,
+-					calldata->res.stateid.data,
+-					sizeof(calldata->lsp->ls_stateid.data));
++			memcpy(calldata->lsp->ls_stateid.u.data,
++					calldata->res.stateid.u.data,
++					sizeof(calldata->lsp->ls_stateid.u.
++					       data));
+ 			renew_lease(calldata->server, calldata->timestamp);
+ 			break;
+ 		case -NFS4ERR_BAD_STATEID:
+@@ -3909,7 +4030,7 @@ static void nfs4_locku_done(struct rpc_t
+ 		case -NFS4ERR_EXPIRED:
+ 			break;
+ 		default:
+-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
++			if (nfs4_async_handle_error(task, calldata->server, NULL, NULL) == -EAGAIN)
+ 				nfs_restart_rpc(task,
+ 						 calldata->server->nfs_client);
+ 	}
+@@ -3927,7 +4048,7 @@ static void nfs4_locku_prepare(struct rp
+ 		return;
+ 	}
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence(calldata->server->nfs_client,
++	if (nfs4_setup_sequence(calldata->server, NULL,
+ 				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 1, task))
+ 		return;
+@@ -4082,7 +4203,8 @@ static void nfs4_lock_prepare(struct rpc
+ 	} else
+ 		data->arg.new_lock_owner = 0;
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
++	if (nfs4_setup_sequence(data->server, NULL,
++				&data->arg.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -4101,8 +4223,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 
+-	nfs4_sequence_done(data->server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	data->rpc_status = task->tk_status;
+ 	if (RPC_ASSASSINATED(task))
+@@ -4114,8 +4236,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 			goto out;
+ 	}
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+-					sizeof(data->lsp->ls_stateid.data));
++		memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
++					sizeof(data->lsp->ls_stateid.u.data));
+ 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ 		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ 	}
+@@ -4424,6 +4546,34 @@ out:
+ 	return err;
+ }
+ 
++static void nfs4_release_lockowner_release(void *calldata)
++{
++	kfree(calldata);
++}
++
++const struct rpc_call_ops nfs4_release_lockowner_ops = {
++	.rpc_release = nfs4_release_lockowner_release,
++};
++
++void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
++{
++	struct nfs_server *server = lsp->ls_state->owner->so_server;
++	struct nfs_release_lockowner_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
++	};
++
++	if (server->nfs_client->cl_mvops->minor_version != 0)
++		return;
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (!args)
++		return;
++	args->lock_owner.clientid = server->nfs_client->cl_clientid;
++	args->lock_owner.id = lsp->ls_id.id;
++	msg.rpc_argp = args;
++	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
++}
++
+ #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+ 
+ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+@@ -4526,7 +4676,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	nfs4_verifier verifier;
+ 	struct nfs41_exchange_id_args args = {
+ 		.client = clp,
+-		.flags = clp->cl_exchange_flags,
++		.flags = clp->cl_exchange_flags & ~EXCHGID4_FLAG_CONFIRMED_R,
+ 	};
+ 	struct nfs41_exchange_id_res res = {
+ 		.client = clp,
+@@ -4574,6 +4724,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	dprintk("<-- %s status= %d\n", __func__, status);
+ 	return status;
+ }
++EXPORT_SYMBOL(nfs4_proc_exchange_id);
+ 
+ struct nfs4_get_lease_time_data {
+ 	struct nfs4_get_lease_time_args *args;
+@@ -4611,7 +4762,8 @@ static void nfs4_get_lease_time_done(str
+ 			(struct nfs4_get_lease_time_data *)calldata;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
++	if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
++		return;
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_DELAY:
+ 	case -NFS4ERR_GRACE:
+@@ -4805,13 +4957,6 @@ struct nfs4_session *nfs4_alloc_session(
+ 	if (!session)
+ 		return NULL;
+ 
+-	/*
+-	 * The create session reply races with the server back
+-	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+-	 * so that the client back channel can find the
+-	 * nfs_client struct
+-	 */
+-	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	init_completion(&session->complete);
+ 
+ 	tbl = &session->fc_slot_table;
+@@ -4824,6 +4969,8 @@ struct nfs4_session *nfs4_alloc_session(
+ 	spin_lock_init(&tbl->slot_tbl_lock);
+ 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ 
++	session->session_state = 1<<NFS4_SESSION_INITING;
++
+ 	session->clp = clp;
+ 	return session;
+ }
+@@ -5040,6 +5187,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (!nfs4_has_session(clp))
+ 		return 0;
+ 
++	session = clp->cl_session;
++	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
++		return 0;
++
+ 	rsize = server->rsize;
+ 	if (rsize == 0)
+ 		rsize = NFS_MAX_FILE_IO_SIZE;
+@@ -5047,11 +5198,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (wsize == 0)
+ 		wsize = NFS_MAX_FILE_IO_SIZE;
+ 
+-	session = clp->cl_session;
+ 	session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ 	session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ 
+-	ret = nfs4_recover_expired_lease(server);
++	ret = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (!ret)
+ 		ret = nfs4_check_client_ready(clp);
+ 	return ret;
+@@ -5060,69 +5210,70 @@ int nfs4_init_session(struct nfs_server 
+ /*
+  * Renew the cl_session lease.
+  */
+-static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+-{
++struct nfs4_sequence_data {
++	struct nfs_client *clp;
+ 	struct nfs4_sequence_args args;
+ 	struct nfs4_sequence_res res;
+-
+-	struct rpc_message msg = {
+-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+-		.rpc_argp = &args,
+-		.rpc_resp = &res,
+-		.rpc_cred = cred,
+-	};
+-
+-	args.sa_cache_this = 0;
+-
+-	return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
+-				       &res, args.sa_cache_this, 1);
+-}
++};
+ 
+ static void nfs41_sequence_release(void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(calldata);
++}
++
++static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
+ }
+ 
+ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+-	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
++	if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
++		return;
+ 
+ 	if (task->tk_status < 0) {
+ 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+ 		if (atomic_read(&clp->cl_count) == 1)
+ 			goto out;
+ 
+-		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+-								== -EAGAIN) {
+-			nfs_restart_rpc(task, clp);
++		if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
++			rpc_restart_call_prepare(task);
+ 			return;
+ 		}
+ 	}
+ 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
+ out:
+-	kfree(task->tk_msg.rpc_argp);
+-	kfree(task->tk_msg.rpc_resp);
+-
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 	struct nfs4_sequence_args *args;
+ 	struct nfs4_sequence_res *res;
+ 
+-	clp = (struct nfs_client *)data;
+ 	args = task->tk_msg.rpc_argp;
+ 	res = task->tk_msg.rpc_resp;
+ 
+-	if (nfs4_setup_sequence(clp, args, res, 0, task))
++	if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
+@@ -5133,32 +5284,67 @@ static const struct rpc_call_ops nfs41_s
+ 	.rpc_release = nfs41_sequence_release,
+ };
+ 
+-static int nfs41_proc_async_sequence(struct nfs_client *clp,
+-				     struct rpc_cred *cred)
++static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+ {
+-	struct nfs4_sequence_args *args;
+-	struct nfs4_sequence_res *res;
++	struct nfs4_sequence_data *calldata;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+ 		.rpc_cred = cred,
+ 	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = clp->cl_rpcclient,
++		.rpc_message = &msg,
++		.callback_ops = &nfs41_sequence_ops,
++		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
++	};
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+-		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_NOFS);
+-	res = kzalloc(sizeof(*res), GFP_NOFS);
+-	if (!args || !res) {
+-		kfree(args);
+-		kfree(res);
++		return ERR_PTR(-EIO);
++	calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
++	if (calldata == NULL) {
+ 		nfs_put_client(clp);
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 	}
+-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+-	msg.rpc_argp = args;
+-	msg.rpc_resp = res;
++	calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	msg.rpc_argp = &calldata->args;
++	msg.rpc_resp = &calldata->res;
++	calldata->clp = clp;
++	task_setup_data.callback_data = calldata;
+ 
+-	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			      &nfs41_sequence_ops, (void *)clp);
++	return rpc_run_task(&task_setup_data);
++}
++
++static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret = 0;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task))
++		ret = PTR_ERR(task);
++	else
++		rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
++}
++
++static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	ret = rpc_wait_for_completion_task(task);
++	if (!ret)
++		ret = task->tk_status;
++	rpc_put_task(task);
++out:
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
+ }
+ 
+ struct nfs4_reclaim_complete_data {
+@@ -5172,13 +5358,31 @@ static void nfs4_reclaim_complete_prepar
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+ 
+ 	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+-	if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
++	if (nfs41_setup_sequence(calldata->clp->cl_session,
++				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 0, task))
+ 		return;
+ 
+ 	rpc_call_start(task);
+ }
+ 
++static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case 0:
++	case -NFS4ERR_COMPLETE_ALREADY:
++	case -NFS4ERR_WRONG_CRED: /* What to do here? */
++		break;
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
++}
++
+ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
+ {
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+@@ -5186,32 +5390,13 @@ static void nfs4_reclaim_complete_done(s
+ 	struct nfs4_sequence_res *res = &calldata->res.seq_res;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(clp, res, task->tk_status);
+-	switch (task->tk_status) {
+-	case 0:
+-	case -NFS4ERR_COMPLETE_ALREADY:
+-		break;
+-	case -NFS4ERR_BADSESSION:
+-	case -NFS4ERR_DEADSESSION:
+-		/*
+-		 * Handle the session error, but do not retry the operation, as
+-		 * we have no way of telling whether the clientid had to be
+-		 * reset before we got our reply.  If reset, a new wave of
+-		 * reclaim operations will follow, containing their own reclaim
+-		 * complete.  We don't want our retry to get on the way of
+-		 * recovery by incorrectly indicating to the server that we're
+-		 * done reclaiming state since the process had to be restarted.
+-		 */
+-		_nfs4_async_handle_error(task, NULL, clp, NULL);
+-		break;
+-	default:
+-		if (_nfs4_async_handle_error(
+-				task, NULL, clp, NULL) == -EAGAIN) {
+-			rpc_restart_call_prepare(task);
+-			return;
+-		}
+-	}
++	if (!nfs41_sequence_done(task, res))
++		return;
+ 
++	if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
++		rpc_restart_call_prepare(task);
++		return;
++	}
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+@@ -5268,6 +5453,404 @@ out:
+ 	dprintk("<-- %s status=%d\n", __func__, status);
+ 	return status;
+ }
++
++static void
++nfs4_pnfs_layoutget_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
++				&lgp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_pnfs_layoutget_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	pnfs_get_layout_done(lgp, task->tk_status);
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	lgp->status = task->tk_status;
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_pnfs_layoutget_release(void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++
++	dprintk("--> %s\n", __func__);
++	pnfs_layout_release(NFS_I(lgp->args.inode)->layout, NULL);
++	if (lgp->res.layout.buf != NULL)
++		free_page((unsigned long) lgp->res.layout.buf);
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_pnfs_layoutget_call_ops = {
++	.rpc_call_prepare = nfs4_pnfs_layoutget_prepare,
++	.rpc_call_done = nfs4_pnfs_layoutget_done,
++	.rpc_release = nfs4_pnfs_layoutget_release,
++};
++
++/* FIXME: We need to call nfs4_handle_exception
++ * and deal with retries.
++ * Currently we can't since we release lgp and its contents.
++ */
++static int _pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTGET],
++		.rpc_argp = &lgp->args,
++		.rpc_resp = &lgp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_pnfs_layoutget_call_ops,
++		.callback_data = lgp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
++	if (lgp->res.layout.buf == NULL) {
++		nfs4_pnfs_layoutget_release(lgp);
++		return -ENOMEM;
++	}
++
++	lgp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = lgp->status;
++	if (status != 0)
++		goto out;
++	status = pnfs_layout_process(lgp);
++out:
++	rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server, _pnfs4_proc_layoutget(lgp),
++					    &exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void pnfs_layoutcommit_prepare(struct rpc_task *task, void *data)
++{
++	struct pnfs_layoutcommit_data *ldata =
++		(struct pnfs_layoutcommit_data *)data;
++	struct nfs_server *server = NFS_SERVER(ldata->args.inode);
++
++	if (nfs4_setup_sequence(server, NULL, &ldata->args.seq_args,
++				&ldata->res.seq_res, 1, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void
++pnfs_layoutcommit_done(struct rpc_task *task, void *calldata)
++{
++	struct pnfs_layoutcommit_data *data =
++		(struct pnfs_layoutcommit_data *)calldata;
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	data->status = task->tk_status;
++}
++
++static void pnfs_layoutcommit_release(void *lcdata)
++{
++	struct pnfs_layoutcommit_data *data =
++		(struct pnfs_layoutcommit_data *)lcdata;
++
++	put_rpccred(data->cred);
++	pnfs_cleanup_layoutcommit(lcdata);
++	pnfs_layoutcommit_free(lcdata);
++	/* Matched by get_layout in pnfs_layoutcommit_inode */
++	put_layout(data->args.inode);
++}
++
++static const struct rpc_call_ops pnfs_layoutcommit_ops = {
++	.rpc_call_prepare = pnfs_layoutcommit_prepare,
++	.rpc_call_done = pnfs_layoutcommit_done,
++	.rpc_release = pnfs_layoutcommit_release,
++};
++
++/* Execute a layoutcommit to the server */
++static int
++_pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data, int issync)
++{
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT],
++		.rpc_argp = &data->args,
++		.rpc_resp = &data->res,
++		.rpc_cred = data->cred,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.task = &data->task,
++		.rpc_client = NFS_CLIENT(data->args.inode),
++		.rpc_message = &msg,
++		.callback_ops = &pnfs_layoutcommit_ops,
++		.callback_data = data,
++		.flags = RPC_TASK_ASYNC,
++	};
++	struct rpc_task *task;
++	int status = 0;
++
++	dprintk("NFS: %4d initiating layoutcommit call. %llu@%llu lbw: %llu "
++		"type: %d issync %d\n",
++		data->task.tk_pid,
++		data->args.lseg.length,
++		data->args.lseg.offset,
++		data->args.lastbytewritten,
++		data->args.layout_type, issync);
++
++	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = data->status;
++out:
++	dprintk("%s: status %d\n", __func__, status);
++	rpc_put_task(task);
++	return 0;
++}
++
++int pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data, int issync)
++{
++	struct nfs4_exception exception = { };
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++					_pnfs4_proc_layoutcommit(data, issync),
++					&exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void
++nfs4_pnfs_layoutreturn_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
++				&lrp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_pnfs_layoutreturn_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_pnfs_layoutreturn_release(void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct pnfs_layout_type *lo = NFS_I(lrp->args.inode)->layout;
++
++	dprintk("--> %s return_type %d lo %p\n", __func__,
++		lrp->args.return_type, lo);
++
++	if (lrp->args.return_type == RETURN_FILE) {
++		if (!lrp->res.lrs_present)
++			pnfs_set_layout_stateid(lo, &zero_stateid);
++		pnfs_layout_release(lo, &lrp->args.lseg);
++	}
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_pnfs_layoutreturn_call_ops = {
++	.rpc_call_prepare = nfs4_pnfs_layoutreturn_prepare,
++	.rpc_call_done = nfs4_pnfs_layoutreturn_done,
++	.rpc_release = nfs4_pnfs_layoutreturn_release,
++};
++
++int _pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool issync)
++{
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTRETURN],
++		.rpc_argp = &lrp->args,
++		.rpc_resp = &lrp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_pnfs_layoutreturn_call_ops,
++		.callback_data = lrp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++	lrp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = task->tk_status;
++out:
++	dprintk("<-- %s\n", __func__);
++	rpc_put_task(task);
++	return status;
++}
++
++int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool issync)
++{
++	struct nfs_server *server = NFS_SERVER(lrp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server,
++				_pnfs4_proc_layoutreturn(lrp, issync),
++				&exception);
++	} while (exception.retry);
++
++	return err;
++}
++
++/*
++ * Retrieve the list of Data Server devices from the MDS.
++ */
++static int _nfs4_pnfs_getdevicelist(struct nfs_server *server,
++				    const struct nfs_fh *fh,
++				    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_pnfs_getdevicelist_arg arg = {
++		.fh = fh,
++		.layoutclass = server->pnfs_curr_ld->id,
++	};
++	struct nfs4_pnfs_getdevicelist_res res = {
++		.devlist = devlist,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_GETDEVICELIST],
++		.rpc_argp = &arg,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &arg, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_pnfs_getdevicelist(struct nfs_server *server,
++			    const struct nfs_fh *fh,
++			    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_exception exception = { };
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_pnfs_getdevicelist(server, fh, devlist),
++				&exception);
++	} while (exception.retry);
++
++	dprintk("nfs4_pnfs_getdevlist: err=%d, num_devs=%u\n",
++		err, devlist->num_devs);
++
++	return err;
++}
++
++int nfs4_pnfs_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
++{
++	struct nfs4_pnfs_getdeviceinfo_arg args = {
++		.pdev = pdev,
++	};
++	struct nfs4_pnfs_getdeviceinfo_res res = {
++		.pdev = pdev,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_GETDEVICEINFO],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++
++	return status;
++}
++
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+@@ -5325,28 +5908,30 @@ struct nfs4_state_maintenance_ops nfs41_
+ };
+ #endif
+ 
+-/*
+- * Per minor version reboot and network partition recovery ops
+- */
+-
+-struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
+-	&nfs40_reboot_recovery_ops,
+-#if defined(CONFIG_NFS_V4_1)
+-	&nfs41_reboot_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
++	.minor_version = 0,
++	.call_sync = _nfs4_call_sync,
++	.validate_stateid = nfs4_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
++	.state_renewal_ops = &nfs40_state_renewal_ops,
+ };
+ 
+-struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
+-	&nfs40_nograce_recovery_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_nograce_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
++	.minor_version = 1,
++	.call_sync = _nfs4_call_sync_session,
++	.validate_stateid = nfs41_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
++	.state_renewal_ops = &nfs41_state_renewal_ops,
+ };
++#endif
+ 
+-struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
+-	&nfs40_state_renewal_ops,
++const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
++	[0] = &nfs_v4_0_minor_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_state_renewal_ops,
++	[1] = &nfs_v4_1_minor_ops,
+ #endif
+ };
+ 
+@@ -5364,6 +5949,7 @@ const struct nfs_rpc_ops nfs_v4_clientop
+ 	.dentry_ops	= &nfs4_dentry_operations,
+ 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+ 	.file_inode_ops	= &nfs4_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs4_proc_get_root,
+ 	.getattr	= nfs4_proc_getattr,
+ 	.setattr	= nfs4_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
+@@ -54,17 +54,17 @@
+ void
+ nfs4_renew_state(struct work_struct *work)
+ {
+-	struct nfs4_state_maintenance_ops *ops;
++	const struct nfs4_state_maintenance_ops *ops;
+ 	struct nfs_client *clp =
+ 		container_of(work, struct nfs_client, cl_renewd.work);
+ 	struct rpc_cred *cred;
+ 	long lease;
+ 	unsigned long last, now;
+ 
+-	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
++	ops = clp->cl_mvops->state_renewal_ops;
+ 	dprintk("%s: start\n", __func__);
+ 	/* Are there any active superblocks? */
+-	if (list_empty(&clp->cl_superblocks))
++	if (list_empty(&clp->cl_superblocks) && !is_ds_only_client(clp))
+ 		goto out;
+ 	spin_lock(&clp->cl_lock);
+ 	lease = clp->cl_lease_time;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
+@@ -53,6 +53,9 @@
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++#include "pnfs.h"
+ 
+ #define OPENOWNER_POOL_SIZE	8
+ 
+@@ -126,6 +129,11 @@ static int nfs41_setup_state_renewal(str
+ 	int status;
+ 	struct nfs_fsinfo fsinfo;
+ 
++	if (is_ds_only_client(clp)) {
++		nfs4_schedule_state_renewal(clp);
++		return 0;
++	}
++
+ 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+ 	if (status == 0) {
+ 		/* Update lease time and schedule renewal */
+@@ -145,7 +153,9 @@ static void nfs4_end_drain_session(struc
+ 	struct nfs4_session *ses = clp->cl_session;
+ 	int max_slots;
+ 
+-	if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
++	if (ses == NULL)
++		return;
++	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		spin_lock(&ses->fc_slot_table.slot_tbl_lock);
+ 		max_slots = ses->fc_slot_table.max_slots;
+ 		while (max_slots--) {
+@@ -167,7 +177,7 @@ static int nfs4_begin_drain_session(stru
+ 	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
++	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ 	if (tbl->highest_used_slotid != -1) {
+ 		INIT_COMPLETION(ses->complete);
+ 		spin_unlock(&tbl->slot_tbl_lock);
+@@ -371,7 +381,6 @@ nfs4_alloc_state_owner(void)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+ 	INIT_LIST_HEAD(&sp->so_states);
+-	INIT_LIST_HEAD(&sp->so_delegations);
+ 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+ 	sp->so_seqid.sequence = &sp->so_sequence;
+ 	spin_lock_init(&sp->so_sequence.lock);
+@@ -384,7 +393,7 @@ static void
+ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+ {
+ 	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+-		struct nfs_client *clp = sp->so_client;
++		struct nfs_client *clp = sp->so_server->nfs_client;
+ 
+ 		spin_lock(&clp->cl_lock);
+ 		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+@@ -406,7 +415,6 @@ struct nfs4_state_owner *nfs4_get_state_
+ 	new = nfs4_alloc_state_owner();
+ 	if (new == NULL)
+ 		return NULL;
+-	new->so_client = clp;
+ 	new->so_server = server;
+ 	new->so_cred = cred;
+ 	spin_lock(&clp->cl_lock);
+@@ -423,7 +431,7 @@ struct nfs4_state_owner *nfs4_get_state_
+ 
+ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+ {
+-	struct nfs_client *clp = sp->so_client;
++	struct nfs_client *clp = sp->so_server->nfs_client;
+ 	struct rpc_cred *cred = sp->so_cred;
+ 
+ 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+@@ -583,8 +591,24 @@ static void __nfs4_close(struct path *pa
+ 	if (!call_close) {
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+-	} else
++	} else {
++		u32 roc_iomode;
++		struct nfs_inode *nfsi = NFS_I(state->inode);
++
++		if (has_layout(nfsi) &&
++		    (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
++			struct nfs4_pnfs_layout_segment range = {
++				.iomode = roc_iomode,
++				.offset = 0,
++				.length = NFS4_MAX_UINT64,
++			};
++
++			pnfs_return_layout(state->inode, &range, NULL,
++					   RETURN_FILE, wait);
++		}
++
+ 		nfs4_do_close(path, state, gfp_mask, wait);
++	}
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+@@ -602,12 +626,21 @@ void nfs4_close_sync(struct path *path, 
+  * that is compatible with current->files
+  */
+ static struct nfs4_lock_state *
+-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *pos;
+ 	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+-		if (pos->ls_owner != fl_owner)
++		if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ 			continue;
++		switch (pos->ls_owner.lo_type) {
++		case NFS4_POSIX_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.posix_owner != fl_owner)
++				continue;
++			break;
++		case NFS4_FLOCK_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.flock_owner != fl_pid)
++				continue;
++		}
+ 		atomic_inc(&pos->ls_count);
+ 		return pos;
+ 	}
+@@ -619,10 +652,10 @@ __nfs4_find_lock_state(struct nfs4_state
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp;
+-	struct nfs_client *clp = state->owner->so_client;
++	struct nfs_client *clp = state->owner->so_server->nfs_client;
+ 
+ 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+@@ -633,7 +666,18 @@ static struct nfs4_lock_state *nfs4_allo
+ 	lsp->ls_seqid.sequence = &lsp->ls_sequence;
+ 	atomic_set(&lsp->ls_count, 1);
+ 	lsp->ls_state = state;
+-	lsp->ls_owner = fl_owner;
++	lsp->ls_owner.lo_type = type;
++	switch (lsp->ls_owner.lo_type) {
++	case NFS4_FLOCK_LOCK_TYPE:
++		lsp->ls_owner.lo_u.flock_owner = fl_pid;
++		break;
++	case NFS4_POSIX_LOCK_TYPE:
++		lsp->ls_owner.lo_u.posix_owner = fl_owner;
++		break;
++	default:
++		kfree(lsp);
++		return NULL;
++	}
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ 	spin_unlock(&clp->cl_lock);
+@@ -643,7 +687,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 
+ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+ {
+-	struct nfs_client *clp = lsp->ls_state->owner->so_client;
++	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ 
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+@@ -657,13 +701,13 @@ static void nfs4_free_lock_state(struct 
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
++static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp, *new = NULL;
+ 	
+ 	for(;;) {
+ 		spin_lock(&state->state_lock);
+-		lsp = __nfs4_find_lock_state(state, owner);
++		lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ 		if (lsp != NULL)
+ 			break;
+ 		if (new != NULL) {
+@@ -674,7 +718,7 @@ static struct nfs4_lock_state *nfs4_get_
+ 			break;
+ 		}
+ 		spin_unlock(&state->state_lock);
+-		new = nfs4_alloc_lock_state(state, owner);
++		new = nfs4_alloc_lock_state(state, owner, pid, type);
+ 		if (new == NULL)
+ 			return NULL;
+ 	}
+@@ -701,6 +745,8 @@ void nfs4_put_lock_state(struct nfs4_loc
+ 	if (list_empty(&state->lock_states))
+ 		clear_bit(LK_STATE_IN_USE, &state->flags);
+ 	spin_unlock(&state->state_lock);
++	if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
++		nfs4_release_lockowner(lsp);
+ 	nfs4_free_lock_state(lsp);
+ }
+ 
+@@ -728,7 +774,12 @@ int nfs4_set_lock_state(struct nfs4_stat
+ 
+ 	if (fl->fl_ops != NULL)
+ 		return 0;
+-	lsp = nfs4_get_lock_state(state, fl->fl_owner);
++	if (fl->fl_flags & FL_POSIX)
++		lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
++	else if (fl->fl_flags & FL_FLOCK)
++		lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
++	else
++		return -EINVAL;
+ 	if (lsp == NULL)
+ 		return -ENOMEM;
+ 	fl->fl_u.nfs4_fl.owner = lsp;
+@@ -740,7 +791,7 @@ int nfs4_set_lock_state(struct nfs4_stat
+  * Byte-range lock aware utility to initialize the stateid of read/write
+  * requests.
+  */
+-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
++void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+ {
+ 	struct nfs4_lock_state *lsp;
+ 	int seq;
+@@ -753,7 +804,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 		return;
+ 
+ 	spin_lock(&state->state_lock);
+-	lsp = __nfs4_find_lock_state(state, fl_owner);
++	lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ 	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ 		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ 	spin_unlock(&state->state_lock);
+@@ -1031,8 +1082,8 @@ restart:
+ 				 * Open state on this file cannot be recovered
+ 				 * All we can do is revert to using the zero stateid.
+ 				 */
+-				memset(state->stateid.data, 0,
+-					sizeof(state->stateid.data));
++				memset(state->stateid.u.data, 0,
++					sizeof(state->stateid.u.data));
+ 				/* Mark the file as being 'closed' */
+ 				state->state = 0;
+ 				break;
+@@ -1041,11 +1092,11 @@ restart:
+ 			case -NFS4ERR_BAD_STATEID:
+ 			case -NFS4ERR_RECLAIM_BAD:
+ 			case -NFS4ERR_RECLAIM_CONFLICT:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 				break;
+ 			case -NFS4ERR_EXPIRED:
+ 			case -NFS4ERR_NO_GRACE:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 			case -NFS4ERR_STALE_CLIENTID:
+ 			case -NFS4ERR_BADSESSION:
+ 			case -NFS4ERR_BADSLOT:
+@@ -1120,8 +1171,7 @@ static void nfs4_state_end_reclaim_reboo
+ 	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ 		return;
+ 
+-	nfs4_reclaim_complete(clp,
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++	nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+ 
+ 	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ 		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+@@ -1211,8 +1261,8 @@ restart:
+ static int nfs4_check_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_maintenance_ops *ops =
+-		nfs4_state_renewal_ops[clp->cl_minorversion];
++	const struct nfs4_state_maintenance_ops *ops =
++		clp->cl_mvops->state_renewal_ops;
+ 	int status = -NFS4ERR_EXPIRED;
+ 
+ 	/* Is the client already known to have an expired lease? */
+@@ -1235,8 +1285,8 @@ out:
+ static int nfs4_reclaim_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_recovery_ops *ops =
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion];
++	const struct nfs4_state_recovery_ops *ops =
++		clp->cl_mvops->reboot_recovery_ops;
+ 	int status = -ENOENT;
+ 
+ 	cred = ops->get_clid_cred(clp);
+@@ -1421,6 +1471,7 @@ static void nfs4_state_manager(struct nf
+ 			}
+ 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ 			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
++			pnfs_destroy_all_layouts(clp);
+ 		}
+ 
+ 		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+@@ -1444,7 +1495,7 @@ static void nfs4_state_manager(struct nf
+ 		/* First recover reboot state... */
+ 		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->reboot_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ 				continue;
+@@ -1458,7 +1509,7 @@ static void nfs4_state_manager(struct nf
+ 		/* Now recover expired state... */
+ 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_nograce_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->nograce_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
+@@ -50,8 +50,11 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
+ #include "nfs4_fs.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+ 
+@@ -89,7 +92,7 @@ static int nfs4_stat_to_errno(int);
+ #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
+ #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
+ 				((3+NFS4_FHSIZE) >> 2))
+-#define nfs4_fattr_bitmap_maxsz 3
++#define nfs4_fattr_bitmap_maxsz 4
+ #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+ #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+ #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+@@ -111,7 +114,11 @@ static int nfs4_stat_to_errno(int);
+ #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+ #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
+ #define encode_fsinfo_maxsz	(encode_getattr_maxsz)
+-#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
++/* The 5 accounts for the PNFS attributes, and assumes that at most three
++ * layout types will be returned.
++ */
++#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + \
++				 nfs4_fattr_bitmap_maxsz + 8 + 5)
+ #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
+ #define decode_renew_maxsz	(op_decode_hdr_maxsz)
+ #define encode_setclientid_maxsz \
+@@ -202,14 +209,17 @@ static int nfs4_stat_to_errno(int);
+ #define encode_link_maxsz	(op_encode_hdr_maxsz + \
+ 				nfs4_name_maxsz)
+ #define decode_link_maxsz	(op_decode_hdr_maxsz + decode_change_info_maxsz)
++#define encode_lockowner_maxsz	(7)
+ #define encode_lock_maxsz	(op_encode_hdr_maxsz + \
+ 				 7 + \
+-				 1 + encode_stateid_maxsz + 8)
++				 1 + encode_stateid_maxsz + 1 + \
++				 encode_lockowner_maxsz)
+ #define decode_lock_denied_maxsz \
+ 				(8 + decode_lockowner_maxsz)
+ #define decode_lock_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+-#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 12)
++#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 5 + \
++				encode_lockowner_maxsz)
+ #define decode_lockt_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+ #define encode_locku_maxsz	(op_encode_hdr_maxsz + 3 + \
+@@ -217,6 +227,11 @@ static int nfs4_stat_to_errno(int);
+ 				 4)
+ #define decode_locku_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_stateid_maxsz)
++#define encode_release_lockowner_maxsz \
++				(op_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define decode_release_lockowner_maxsz \
++				(op_decode_hdr_maxsz)
+ #define encode_access_maxsz	(op_encode_hdr_maxsz + 1)
+ #define decode_access_maxsz	(op_decode_hdr_maxsz + 2)
+ #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
+@@ -302,6 +317,35 @@ static int nfs4_stat_to_errno(int);
+ 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+ #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
+ #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
++#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
++				encode_verifier_maxsz)
++#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 +  \
++				decode_verifier_maxsz +             \
++				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM *  \
++				NFS4_PNFS_DEVICEID4_SIZE))
++#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
++				XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE))
++#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
++				4 /*layout type */ + \
++				4 /* opaque devaddr4 length */ +\
++				4 /* notification bitmap length */ + \
++				4 /* notification bitmap */)
++#define encode_layoutget_sz	(op_encode_hdr_maxsz + 10 + \
++				encode_stateid_maxsz)
++#define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
++				decode_stateid_maxsz + \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
++#define encode_layoutcommit_sz	(18 +                           \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
++				op_encode_hdr_maxsz +          \
++				encode_stateid_maxsz)
++#define decode_layoutcommit_maxsz (3 + op_decode_hdr_maxsz)
++#define encode_layoutreturn_sz	(8 + op_encode_hdr_maxsz + \
++				encode_stateid_maxsz + \
++				1 /* FIXME: opaque lrf_body always empty at
++				   *the moment */)
++#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
++				1 + decode_stateid_maxsz)
+ #else /* CONFIG_NFS_V4_1 */
+ #define encode_sequence_maxsz	0
+ #define decode_sequence_maxsz	0
+@@ -471,6 +515,12 @@ static int nfs4_stat_to_errno(int);
+ 				decode_sequence_maxsz + \
+ 				decode_putfh_maxsz + \
+ 				decode_locku_maxsz)
++#define NFS4_enc_release_lockowner_sz \
++				(compound_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define NFS4_dec_release_lockowner_sz \
++				(compound_decode_hdr_maxsz + \
++				 decode_lockowner_maxsz)
+ #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+ 				encode_sequence_maxsz + \
+ 				encode_putfh_maxsz + \
+@@ -685,6 +735,60 @@ static int nfs4_stat_to_errno(int);
+ #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
+ 					 decode_sequence_maxsz + \
+ 					 decode_reclaim_complete_maxsz)
++#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_getdevicelist_maxsz)
++#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_getdevicelist_maxsz)
++#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
++				encode_sequence_maxsz +\
++				encode_getdeviceinfo_maxsz)
++#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz +    \
++				decode_sequence_maxsz + \
++				decode_getdeviceinfo_maxsz)
++#define NFS4_enc_layoutget_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz +        \
++				encode_layoutget_sz)
++#define NFS4_dec_layoutget_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz +        \
++				decode_layoutget_maxsz)
++#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_layoutcommit_sz + \
++				encode_getattr_maxsz)
++#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutcommit_maxsz + \
++				decode_getattr_maxsz)
++#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_layoutreturn_sz)
++#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutreturn_maxsz)
++#define NFS4_enc_dswrite_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_write_maxsz)
++#define NFS4_dec_dswrite_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_write_maxsz)
++#define NFS4_enc_dscommit_sz	(compound_encode_hdr_maxsz + \
++				encode_putfh_maxsz + \
++				encode_commit_maxsz)
++#define NFS4_dec_dscommit_sz	(compound_decode_hdr_maxsz + \
++				decode_putfh_maxsz + \
++				decode_commit_maxsz)
+ 
+ const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ 				      compound_encode_hdr_maxsz +
+@@ -915,7 +1019,7 @@ static void encode_close(struct xdr_stre
+ 	p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_CLOSE);
+ 	*p++ = cpu_to_be32(arg->seqid->sequence->counter);
+-	xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_close_maxsz;
+ }
+@@ -989,6 +1093,35 @@ static void encode_getattr_two(struct xd
+ 	hdr->replen += decode_getattr_maxsz;
+ }
+ 
++static void
++encode_getattr_three(struct xdr_stream *xdr,
++		     uint32_t bm0, uint32_t bm1, uint32_t bm2,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_GETATTR);
++	if (bm2) {
++		p = reserve_space(xdr, 16);
++		*p++ = cpu_to_be32(3);
++		*p++ = cpu_to_be32(bm0);
++		*p++ = cpu_to_be32(bm1);
++		*p = cpu_to_be32(bm2);
++	} else if (bm1) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(2);
++		*p++ = cpu_to_be32(bm0);
++		*p = cpu_to_be32(bm1);
++	} else {
++		p = reserve_space(xdr, 8);
++		*p++ = cpu_to_be32(1);
++		*p = cpu_to_be32(bm0);
++	}
++	hdr->nops++;
++	hdr->replen += decode_getattr_maxsz;
++}
++
+ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+ 	encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
+@@ -997,8 +1130,11 @@ static void encode_getfattr(struct xdr_s
+ 
+ static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+-	encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+-			   bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
++	encode_getattr_three(xdr,
++			     bitmask[0] & nfs4_fsinfo_bitmap[0],
++			     bitmask[1] & nfs4_fsinfo_bitmap[1],
++			     bitmask[2] & nfs4_fsinfo_bitmap[2],
++			     hdr);
+ }
+ 
+ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+@@ -1042,6 +1178,17 @@ static inline uint64_t nfs4_lock_length(
+ 	return fl->fl_end - fl->fl_start + 1;
+ }
+ 
++static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 28);
++	p = xdr_encode_hyper(p, lowner->clientid);
++	*p++ = cpu_to_be32(16);
++	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
++	xdr_encode_hyper(p, lowner->id);
++}
++
+ /*
+  * opcode,type,reclaim,offset,length,new_lock_owner = 32
+  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+@@ -1058,18 +1205,16 @@ static void encode_lock(struct xdr_strea
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	*p = cpu_to_be32(args->new_lock_owner);
+ 	if (args->new_lock_owner){
+-		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
++		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 		*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+-		p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
++					    NFS4_STATEID_SIZE);
+ 		*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+-		p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-		*p++ = cpu_to_be32(16);
+-		p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-		xdr_encode_hyper(p, args->lock_owner.id);
++		encode_lockowner(xdr, &args->lock_owner);
+ 	}
+ 	else {
+ 		p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+-		p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
+ 		*p = cpu_to_be32(args->lock_seqid->sequence->counter);
+ 	}
+ 	hdr->nops++;
+@@ -1080,15 +1225,12 @@ static void encode_lockt(struct xdr_stre
+ {
+ 	__be32 *p;
+ 
+-	p = reserve_space(xdr, 52);
++	p = reserve_space(xdr, 24);
+ 	*p++ = cpu_to_be32(OP_LOCKT);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+-	p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-	*p++ = cpu_to_be32(16);
+-	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-	xdr_encode_hyper(p, args->lock_owner.id);
++	encode_lockowner(xdr, &args->lock_owner);
+ 	hdr->nops++;
+ 	hdr->replen += decode_lockt_maxsz;
+ }
+@@ -1101,13 +1243,25 @@ static void encode_locku(struct xdr_stre
+ 	*p++ = cpu_to_be32(OP_LOCKU);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	*p++ = cpu_to_be32(args->seqid->sequence->counter);
+-	p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
++				    NFS4_STATEID_SIZE);
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	hdr->nops++;
+ 	hdr->replen += decode_locku_maxsz;
+ }
+ 
++static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
++	encode_lockowner(xdr, lowner);
++	hdr->nops++;
++	hdr->replen += decode_release_lockowner_maxsz;
++}
++
+ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
+ {
+ 	int len = name->len;
+@@ -1172,7 +1326,7 @@ static inline void encode_createmode(str
+ 		break;
+ 	default:
+ 		clp = arg->server->nfs_client;
+-		if (clp->cl_minorversion > 0) {
++		if (clp->cl_mvops->minor_version > 0) {
+ 			if (nfs4_has_persistent_session(clp)) {
+ 				*p = cpu_to_be32(NFS4_CREATE_GUARDED);
+ 				encode_attrs(xdr, arg->u.attrs, arg->server);
+@@ -1251,7 +1405,7 @@ static inline void encode_claim_delegate
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	encode_string(xdr, name->len, name->name);
+ }
+ 
+@@ -1282,7 +1436,7 @@ static void encode_open_confirm(struct x
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	hdr->nops++;
+ 	hdr->replen += decode_open_confirm_maxsz;
+@@ -1294,7 +1448,7 @@ static void encode_open_downgrade(struct
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	encode_share_access(xdr, arg->fmode);
+ 	hdr->nops++;
+@@ -1324,17 +1478,17 @@ static void encode_putrootfh(struct xdr_
+ 	hdr->replen += decode_putrootfh_maxsz;
+ }
+ 
+-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
++static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
+ {
+ 	nfs4_stateid stateid;
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, NFS4_STATEID_SIZE);
+ 	if (ctx->state != NULL) {
+-		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+-		xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
++		nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
++		xdr_encode_opaque_fixed(p, stateid.u.data, NFS4_STATEID_SIZE);
+ 	} else
+-		xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++		xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+@@ -1344,7 +1498,7 @@ static void encode_read(struct xdr_strea
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_READ);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 12);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1448,7 +1602,7 @@ encode_setacl(struct xdr_stream *xdr, st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ 	p = reserve_space(xdr, 2*4);
+ 	*p++ = cpu_to_be32(1);
+ 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
+@@ -1479,7 +1633,7 @@ static void encode_setattr(struct xdr_st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setattr_maxsz;
+ 	encode_attrs(xdr, arg->iap, server);
+@@ -1523,7 +1677,7 @@ static void encode_write(struct xdr_stre
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_WRITE);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 16);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1542,7 +1696,7 @@ static void encode_delegreturn(struct xd
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 
+ 	*p++ = cpu_to_be32(OP_DELEGRETURN);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_delegreturn_maxsz;
+ }
+@@ -1696,6 +1850,162 @@ static void encode_sequence(struct xdr_s
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#ifdef CONFIG_NFS_V4_1
++static void
++encode_getdevicelist(struct xdr_stream *xdr,
++		     const struct nfs4_pnfs_getdevicelist_arg *args,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++	nfs4_verifier dummy = {
++		.data = "dummmmmy",
++	};
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_GETDEVICELIST);
++	*p++ = cpu_to_be32(args->layoutclass);
++	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
++	xdr_encode_hyper(p, 0ULL);                          /* cookie */
++	encode_nfs4_verifier(xdr, &dummy);
++	hdr->nops++;
++}
++
++static void
++encode_getdeviceinfo(struct xdr_stream *xdr,
++		     const struct nfs4_pnfs_getdeviceinfo_arg *args,
++		     struct compound_hdr *hdr)
++{
++	int has_bitmap = (args->pdev->dev_notify_types != 0);
++	int len = 16 + NFS4_PNFS_DEVICEID4_SIZE + (has_bitmap * 4);
++	__be32 *p;
++
++	p = reserve_space(xdr, len);
++	*p++ = cpu_to_be32(OP_GETDEVICEINFO);
++	p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
++				    NFS4_PNFS_DEVICEID4_SIZE);
++	*p++ = cpu_to_be32(args->pdev->layout_type);
++	*p++ = cpu_to_be32(args->pdev->pglen + len);	/* gdia_maxcount */
++	*p++ = cpu_to_be32(has_bitmap);			/* bitmap length [01] */
++	if (has_bitmap)
++		*p = cpu_to_be32(args->pdev->dev_notify_types);
++	hdr->nops++;
++}
++
++static void
++encode_layoutget(struct xdr_stream *xdr,
++		      const struct nfs4_pnfs_layoutget_arg *args,
++		      struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTGET);
++	*p++ = cpu_to_be32(0);     /* Signal layout available */
++	*p++ = cpu_to_be32(args->type);
++	*p++ = cpu_to_be32(args->lseg.iomode);
++	p = xdr_encode_hyper(p, args->lseg.offset);
++	p = xdr_encode_hyper(p, args->lseg.length);
++	p = xdr_encode_hyper(p, args->minlength);
++	pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++	p = xdr_encode_opaque_fixed(p, &stateid.u.data, NFS4_STATEID_SIZE);
++	*p = cpu_to_be32(args->maxcount);
++
++	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
++		__func__,
++		args->type,
++		args->lseg.iomode,
++		(unsigned long)args->lseg.offset,
++		(unsigned long)args->lseg.length,
++		args->maxcount);
++	hdr->nops++;
++	hdr->replen += decode_layoutget_maxsz;
++}
++
++static int
++encode_layoutcommit(struct xdr_stream *xdr,
++		    const struct pnfs_layoutcommit_arg *args,
++		    struct compound_hdr *hdr)
++{
++	struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++	__be32 *p;
++
++	dprintk("%s: %llu@%llu lbw: %llu type: %d\n", __func__,
++		args->lseg.length, args->lseg.offset, args->lastbytewritten,
++		args->layout_type);
++
++	p = reserve_space(xdr, 40 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
++	p = xdr_encode_hyper(p, args->lseg.offset);
++	p = xdr_encode_hyper(p, args->lseg.length);
++	*p++ = cpu_to_be32(0);     /* reclaim */
++	p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(1);     /* newoffset = TRUE */
++	p = xdr_encode_hyper(p, args->lastbytewritten);
++	*p = cpu_to_be32(args->time_modify_changed != 0);
++	if (args->time_modify_changed) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(0);
++		*p++ = cpu_to_be32(args->time_modify.tv_sec);
++		*p = cpu_to_be32(args->time_modify.tv_nsec);
++	}
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(args->layout_type);
++
++	if (ld_io_ops->encode_layoutcommit) {
++		ld_io_ops->encode_layoutcommit(NFS_I(args->inode)->layout,
++					       xdr, args);
++	} else {
++		p = reserve_space(xdr, 4);
++		xdr_encode_opaque(p, NULL, 0);
++	}
++
++	hdr->nops++;
++	hdr->replen += decode_layoutcommit_maxsz;
++	return 0;
++}
++
++static void
++encode_layoutreturn(struct xdr_stream *xdr,
++		    const struct nfs4_pnfs_layoutreturn_arg *args,
++		    struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
++	*p++ = cpu_to_be32(args->reclaim);
++	*p++ = cpu_to_be32(args->layout_type);
++	*p++ = cpu_to_be32(args->lseg.iomode);
++	*p = cpu_to_be32(args->return_type);
++	if (args->return_type == RETURN_FILE) {
++		struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++
++		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
++		p = xdr_encode_hyper(p, args->lseg.offset);
++		p = xdr_encode_hyper(p, args->lseg.length);
++		pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++		p = xdr_encode_opaque_fixed(p, &stateid.u.data,
++					    NFS4_STATEID_SIZE);
++		dprintk("%s: call %pF\n", __func__,
++		ld_io_ops->encode_layoutreturn);
++		if (ld_io_ops->encode_layoutreturn) {
++			ld_io_ops->encode_layoutreturn(
++				NFS_I(args->inode)->layout, xdr, args);
++		} else {
++			p = reserve_space(xdr, 4);
++			*p = cpu_to_be32(0);
++		}
++	}
++	hdr->nops++;
++	hdr->replen += decode_layoutreturn_maxsz;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" ENCODE ROUTINES.
+  */
+@@ -1704,7 +2014,7 @@ static u32 nfs4_xdr_minorversion(const s
+ {
+ #if defined(CONFIG_NFS_V4_1)
+ 	if (args->sa_session)
+-		return args->sa_session->clp->cl_minorversion;
++		return args->sa_session->clp->cl_mvops->minor_version;
+ #endif /* CONFIG_NFS_V4_1 */
+ 	return 0;
+ }
+@@ -2048,6 +2358,20 @@ static int nfs4_xdr_enc_locku(struct rpc
+ 	return 0;
+ }
+ 
++static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = 0,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
+ /*
+  * Encode a READLINK request
+  */
+@@ -2330,7 +2654,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 	struct compound_hdr hdr = {
+ 		.nops	= 0,
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2395,7 +2719,7 @@ static int nfs4_xdr_enc_exchange_id(stru
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2413,7 +2737,7 @@ static int nfs4_xdr_enc_create_session(s
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2431,7 +2755,7 @@ static int nfs4_xdr_enc_destroy_session(
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = session->clp->cl_minorversion,
++		.minorversion = session->clp->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2469,7 +2793,7 @@ static int nfs4_xdr_enc_get_lease_time(s
+ 	struct compound_hdr hdr = {
+ 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2499,6 +2823,159 @@ static int nfs4_xdr_enc_reclaim_complete
+ 	return 0;
+ }
+ 
++/*
++ * Encode GETDEVICELIST request
++ */
++static int
++nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, uint32_t *p,
++			   struct nfs4_pnfs_getdevicelist_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_getdevicelist(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode GETDEVICEINFO request
++ */
++static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
++				      struct nfs4_pnfs_getdeviceinfo_arg *args)
++{
++	struct xdr_stream xdr;
++	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++	int replen;
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_getdeviceinfo(&xdr, args, &hdr);
++
++	/* set up reply kvec. Subtract notification bitmap max size (8)
++	 * so that notification bitmap is put in xdr_buf tail */
++	replen = (RPC_REPHDRSIZE + auth->au_rslack +
++		  NFS4_dec_getdeviceinfo_sz - 8) << 2;
++	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pdev->pages,
++			 args->pdev->pgbase, args->pdev->pglen);
++	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
++		__func__, replen, args->pdev->pages,
++		args->pdev->pgbase, args->pdev->pglen);
++
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTGET request
++ */
++static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
++				  struct nfs4_pnfs_layoutget_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutget(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTCOMMIT request
++ */
++static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, uint32_t *p,
++				     struct pnfs_layoutcommit_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_layoutcommit(&xdr, args, &hdr);
++	encode_getfattr(&xdr, args->bitmask, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode LAYOUTRETURN request
++ */
++static int nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_pnfs_layoutreturn_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutreturn(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server WRITE request
++ */
++static int nfs4_xdr_enc_dswrite(struct rpc_rqst *req, uint32_t *p,
++				struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_write(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server COMMIT request
++ */
++static int nfs4_xdr_enc_dscommit(struct rpc_rqst *req, uint32_t *p,
++				 struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_commit(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+@@ -2599,14 +3076,17 @@ static int decode_attr_bitmap(struct xdr
+ 		goto out_overflow;
+ 	bmlen = be32_to_cpup(p);
+ 
+-	bitmap[0] = bitmap[1] = 0;
++	bitmap[0] = bitmap[1] = bitmap[2] = 0;
+ 	p = xdr_inline_decode(xdr, (bmlen << 2));
+ 	if (unlikely(!p))
+ 		goto out_overflow;
+ 	if (bmlen > 0) {
+ 		bitmap[0] = be32_to_cpup(p++);
+-		if (bmlen > 1)
+-			bitmap[1] = be32_to_cpup(p);
++		if (bmlen > 1) {
++			bitmap[1] = be32_to_cpup(p++);
++			if (bmlen > 2)
++				bitmap[2] = be32_to_cpup(p);
++		}
+ 	}
+ 	return 0;
+ out_overflow:
+@@ -2635,8 +3115,9 @@ static int decode_attr_supported(struct 
+ 		decode_attr_bitmap(xdr, bitmask);
+ 		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+ 	} else
+-		bitmask[0] = bitmask[1] = 0;
+-	dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
++		bitmask[0] = bitmask[1] = bitmask[2] = 0;
++	dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__,
++		bitmask[0], bitmask[1], bitmask[2]);
+ 	return 0;
+ }
+ 
+@@ -3565,7 +4046,7 @@ static int decode_opaque_fixed(struct xd
+ 
+ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+ {
+-	return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
++	return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+@@ -3621,7 +4102,7 @@ out_overflow:
+ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3647,7 +4128,7 @@ xdr_error:
+ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3679,7 +4160,7 @@ xdr_error:
+ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3705,7 +4186,7 @@ static int decode_getfattr(struct xdr_st
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0},
++		 bitmap[3] = {0},
+ 		 type;
+ 	int status;
+ 	umode_t fmode = 0;
+@@ -3824,24 +4305,101 @@ xdr_error:
+ 	return status;
+ }
+ 
+-
+-static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * Decode potentially multiple layout types. Currently we only support
++ * one layout driver per file system.
++ */
++static int decode_pnfs_list(struct xdr_stream *xdr, uint32_t *layoutclass)
+ {
+-	__be32 *savep;
+-	uint32_t attrlen, bitmap[2];
+-	int status;
++	uint32_t *p;
++	int num;
+ 
+-	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+-		goto xdr_error;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	num = be32_to_cpup(p);
+ 
+-	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++	/* pNFS is not supported by the underlying file system */
++	if (num == 0) {
++		*layoutclass = 0;
++		return 0;
++	}
+ 
+-	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+-		goto xdr_error;
++	/* TODO: We will eventually support multiple layout drivers ? */
++	if (num > 1)
++		printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
++			"per filesystem not supported\n", __func__);
++
++	/* Decode and set first layout type */
++	p = xdr_inline_decode(xdr, num * 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	*layoutclass = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++/*
++ * The type of file system exported
++ */
++static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
++				uint32_t *layoutclass)
++{
++	int status = 0;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
++	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
++		return -EIO;
++	if (likely(bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES)) {
++		status = decode_pnfs_list(xdr, layoutclass);
++		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
++	}
++	return status;
++}
++
++/*
++ * The prefered block size for layout directed io
++ */
++static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
++				      uint32_t *res)
++{
++	__be32 *p;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
++	*res = 0;
++	if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p)) {
++			print_overflow_msg(__func__, xdr);
++			return -EIO;
++		}
++		*res = be32_to_cpup(p);
++		bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
++	}
++	return 0;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
++static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++{
++	__be32 *savep;
++	uint32_t attrlen, bitmap[3];
++	int status;
++
++	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
++		goto xdr_error;
++
++	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++
++	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
++		goto xdr_error;
+ 	if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+ 		goto xdr_error;
+ 	if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+@@ -3850,6 +4408,14 @@ static int decode_fsinfo(struct xdr_stre
+ 	if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+ 		goto xdr_error;
+ 	fsinfo->wtpref = fsinfo->wtmax;
++#if defined(CONFIG_NFS_V4_1)
++	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
++	if (status)
++		goto xdr_error;
++	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
++	if (status)
++		goto xdr_error;
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ 	status = verify_attr_len(xdr, savep, attrlen);
+ xdr_error:
+@@ -3973,6 +4539,11 @@ static int decode_locku(struct xdr_strea
+ 	return status;
+ }
+ 
++static int decode_release_lockowner(struct xdr_stream *xdr)
++{
++	return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
++}
++
+ static int decode_lookup(struct xdr_stream *xdr)
+ {
+ 	return decode_op_hdr(xdr, OP_LOOKUP);
+@@ -4333,7 +4904,7 @@ static int decode_getacl(struct xdr_stre
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0};
++		 bitmap[3] = {0};
+ 	struct kvec *iov = req->rq_rcv_buf.head;
+ 	int status;
+ 
+@@ -4682,6 +5253,226 @@ out_overflow:
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * TODO: Need to handle case when EOF != true;
++ */
++static int decode_getdevicelist(struct xdr_stream *xdr,
++				struct pnfs_devicelist *res)
++{
++	__be32 *p;
++	int status, i;
++	struct nfs_writeverf verftemp;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 8 + 8 + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++
++	/* TODO: Skip cookie for now */
++	p += 2;
++
++	/* Read verifier */
++	p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
++
++	res->num_devs = be32_to_cpup(p);
++
++	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
++
++	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM)
++		return -NFS4ERR_REP_TOO_BIG;
++
++	p = xdr_inline_decode(xdr,
++			      res->num_devs * NFS4_PNFS_DEVICEID4_SIZE + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	for (i = 0; i < res->num_devs; i++)
++		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
++					    NFS4_PNFS_DEVICEID4_SIZE);
++	res->eof = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_getdeviceinfo(struct xdr_stream *xdr,
++				struct pnfs_device *pdev)
++{
++	__be32 *p;
++	uint32_t len, type;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
++	if (status) {
++		if (status == -ETOOSMALL) {
++			p = xdr_inline_decode(xdr, 4);
++			if (unlikely(!p))
++				goto out_overflow;
++			pdev->mincount = be32_to_cpup(p);
++			dprintk("%s: Min count too small. mincnt = %u\n",
++				__func__, pdev->mincount);
++		}
++		return status;
++	}
++
++	p = xdr_inline_decode(xdr, 8);
++	if (unlikely(!p))
++		goto out_overflow;
++	type = be32_to_cpup(p++);
++	if (type != pdev->layout_type) {
++		dprintk("%s: layout mismatch req: %u pdev: %u\n",
++			__func__, pdev->layout_type, type);
++		return -EINVAL;
++	}
++	/*
++	 * Get the length of the opaque device_addr4. xdr_read_pages places
++	 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
++	 * and places the remaining xdr data in xdr_buf->tail
++	 */
++	pdev->mincount = be32_to_cpup(p);
++	xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
++
++	/* At most one bitmap word */
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	len = be32_to_cpup(p);
++	if (len) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p))
++			goto out_overflow;
++		pdev->dev_notify_types = be32_to_cpup(p);
++	} else
++		pdev->dev_notify_types = 0;
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
++			    struct nfs4_pnfs_layoutget_res *res)
++{
++	__be32 *p;
++	int status;
++	u32 layout_count, dummy;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTGET);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->return_on_close = be32_to_cpup(p++);
++	p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
++	layout_count = be32_to_cpup(p);
++	if (!layout_count) {
++		dprintk("%s: server responded with empty layout array\n",
++			__func__);
++		return -EINVAL;
++	}
++
++	p = xdr_inline_decode(xdr, 24);
++	if (unlikely(!p))
++		goto out_overflow;
++	p = xdr_decode_hyper(p, &res->lseg.offset);
++	p = xdr_decode_hyper(p, &res->lseg.length);
++	res->lseg.iomode = be32_to_cpup(p++);
++	res->type = be32_to_cpup(p++);
++
++	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
++	if (unlikely(status))
++		return status;
++
++	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
++		__func__,
++		(unsigned long)res->lseg.offset,
++		(unsigned long)res->lseg.length,
++		res->lseg.iomode,
++		res->type,
++		res->layout.len);
++
++	/* presuambly, pnfs4_proc_layoutget allocated a single page */
++	if (res->layout.len > PAGE_SIZE)
++		return -ENOMEM;
++	memcpy(res->layout.buf, p, res->layout.len);
++
++	/* FIXME: the whole layout array should be passed up to the pnfs
++	 * client */
++	if (layout_count > 1) {
++		dprintk("%s: server responded with %d layouts, dropping tail\n",
++			__func__, layout_count);
++
++		while (--layout_count) {
++			p = xdr_inline_decode(xdr, 24);
++			if (unlikely(!p))
++				goto out_overflow;
++			status = decode_opaque_inline(xdr, &dummy, (char **)&p);
++			if (unlikely(status))
++				return status;
++		}
++	}
++
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutreturn(struct xdr_stream *xdr,
++			       struct nfs4_pnfs_layoutreturn_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->lrs_present = be32_to_cpup(p);
++	if (res->lrs_present)
++		status = decode_stateid(xdr, &res->stateid);
++	return status;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutcommit(struct xdr_stream *xdr,
++				    struct rpc_rqst *req,
++				    struct pnfs_layoutcommit_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->sizechanged = be32_to_cpup(p);
++
++	if (res->sizechanged) {
++		p = xdr_inline_decode(xdr, 8);
++		if (unlikely(!p))
++			goto out_overflow;
++		xdr_decode_hyper(p, &res->newsize);
++	}
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" DECODE ROUTINES.
+  */
+@@ -5259,6 +6050,19 @@ out:
+ 	return status;
+ }
+ 
++static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (!status)
++		status = decode_release_lockowner(&xdr);
++	return status;
++}
++
+ /*
+  * Decode READLINK response
+  */
+@@ -5696,6 +6500,186 @@ static int nfs4_xdr_dec_reclaim_complete
+ 		status = decode_reclaim_complete(&xdr, (void *)NULL);
+ 	return status;
+ }
++
++/*
++ * Decode GETDEVICELIST response
++ */
++static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_pnfs_getdevicelist_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	dprintk("encoding getdevicelist!\n");
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status != 0)
++		goto out;
++	status = decode_getdevicelist(&xdr, res->devlist);
++out:
++	return status;
++}
++
++/*
++ * Decode GETDEVINFO response
++ */
++static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_pnfs_getdeviceinfo_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_getdeviceinfo(&xdr, res->pdev);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTGET response
++ */
++static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
++				  struct nfs4_pnfs_layoutget_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutget(&xdr, rqstp, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTRETURN response
++ */
++static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_pnfs_layoutreturn_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutreturn(&xdr, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTCOMMIT response
++ */
++static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct pnfs_layoutcommit_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutcommit(&xdr, rqstp, res);
++	if (status)
++		goto out;
++	decode_getfattr(&xdr, res->fattr, res->server,
++			!RPC_IS_ASYNC(rqstp->rq_task));
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server WRITE response
++ */
++static int nfs4_xdr_dec_dswrite(struct rpc_rqst *rqstp, uint32_t *p,
++				struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_write(&xdr, res);
++	if (!status)
++		return res->count;
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server COMMIT response
++ */
++static int nfs4_xdr_dec_dscommit(struct rpc_rqst *rqstp, uint32_t *p,
++				 struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_commit(&xdr, res);
++out:
++	return status;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+@@ -5866,6 +6850,7 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(GETACL,		enc_getacl,	dec_getacl),
+   PROC(SETACL,		enc_setacl,	dec_setacl),
+   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
++  PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ #if defined(CONFIG_NFS_V4_1)
+   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+   PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
+@@ -5873,6 +6858,13 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
+   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+   PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
++  PROC(PNFS_GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
++  PROC(PNFS_GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
++  PROC(PNFS_LAYOUTGET,  enc_layoutget,     dec_layoutget),
++  PROC(PNFS_LAYOUTCOMMIT, enc_layoutcommit,  dec_layoutcommit),
++  PROC(PNFS_LAYOUTRETURN, enc_layoutreturn,  dec_layoutreturn),
++  PROC(PNFS_WRITE, enc_dswrite,  dec_dswrite),
++  PROC(PNFS_COMMIT, enc_dscommit,  dec_dscommit),
+ #endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
+@@ -0,0 +1,11 @@
++#
++# Makefile for the pNFS Objects Layout Driver kernel module
++#
++objlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o objio_osd.o
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
++
++#
++# Panasas pNFS Layout Driver kernel module
++#
++panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
++obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
+@@ -0,0 +1,1087 @@
++/*
++ *  objio_osd.c
++ *
++ *  pNFS Objects layout implementation over open-osd initiator library
++ *
++ *  Copyright (C) 2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bharrosh@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <scsi/scsi_device.h>
++#include <scsi/osd_attributes.h>
++#include <scsi/osd_initiator.h>
++#include <scsi/osd_sec.h>
++#include <scsi/osd_sense.h>
++
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++#define _LLU(x) ((unsigned long long)x)
++
++enum { BIO_MAX_PAGES_KMALLOC =
++		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
++};
++
++/* A per mountpoint struct currently for device cache */
++struct objio_mount_type {
++	struct list_head dev_list;
++	spinlock_t dev_list_lock;
++};
++
++struct _dev_ent {
++	struct list_head list;
++	struct pnfs_deviceid d_id;
++	struct osd_dev *od;
++};
++
++static void _dev_list_remove_all(struct objio_mount_type *omt)
++{
++	spin_lock(&omt->dev_list_lock);
++
++	while (!list_empty(&omt->dev_list)) {
++		struct _dev_ent *de = list_entry(omt->dev_list.next,
++				 struct _dev_ent, list);
++
++		list_del_init(&de->list);
++		osduld_put_device(de->od);
++		kfree(de);
++	}
++
++	spin_unlock(&omt->dev_list_lock);
++}
++
++static struct osd_dev *___dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct list_head *le;
++
++	list_for_each(le, &omt->dev_list) {
++		struct _dev_ent *de = list_entry(le, struct _dev_ent, list);
++
++		if (0 == memcmp(&de->d_id, d_id, sizeof(*d_id)))
++			return de->od;
++	}
++
++	return NULL;
++}
++
++static struct osd_dev *_dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct osd_dev *od;
++
++	spin_lock(&omt->dev_list_lock);
++	od = ___dev_list_find(omt, d_id);
++	spin_unlock(&omt->dev_list_lock);
++	return od;
++}
++
++static int _dev_list_add(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id, struct osd_dev *od)
++{
++	struct _dev_ent *de = kzalloc(sizeof(*de), GFP_KERNEL);
++
++	if (!de)
++		return -ENOMEM;
++
++	spin_lock(&omt->dev_list_lock);
++
++	if (___dev_list_find(omt, d_id)) {
++		kfree(de);
++		goto out;
++	}
++
++	de->d_id = *d_id;
++	de->od = od;
++	list_add(&de->list, &omt->dev_list);
++
++out:
++	spin_unlock(&omt->dev_list_lock);
++	return 0;
++}
++
++struct objio_segment {
++	struct pnfs_osd_layout *layout;
++
++	unsigned mirrors_p1;
++	unsigned stripe_unit;
++	unsigned group_width;	/* Data stripe_units without integrity comps */
++	u64 group_depth;
++	unsigned group_count;
++
++	unsigned num_comps;
++	/* variable length */
++	struct osd_dev	*ods[1];
++};
++
++struct objio_state;
++typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
++
++struct objio_state {
++	/* Generic layer */
++	struct objlayout_io_state ol_state;
++
++	struct objio_segment *objio_seg;
++
++	struct kref kref;
++	objio_done_fn done;
++	void *private;
++
++	unsigned long length;
++	unsigned numdevs; /* Actually used devs in this IO */
++	/* A per-device variable array of size numdevs */
++	struct _objio_per_comp {
++		struct bio *bio;
++		struct osd_request *or;
++		unsigned long length;
++		u64 offset;
++		unsigned dev;
++	} per_dev[];
++};
++
++/* Send and wait for a get_device_info of devices in the layout,
++   then look them up with the osd_initiator library */
++static struct osd_dev *_device_lookup(struct pnfs_layout_type *pnfslay,
++			       struct objio_segment *objio_seg, unsigned comp)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	struct pnfs_osd_deviceaddr *deviceaddr;
++	struct pnfs_deviceid *d_id;
++	struct osd_dev *od;
++	struct osd_dev_info odi;
++	struct objio_mount_type *omt = PNFS_NFS_SERVER(pnfslay)->pnfs_ld_data;
++	int err;
++
++	d_id = &layout->olo_comps[comp].oc_object_id.oid_device_id;
++
++	od = _dev_list_find(omt, d_id);
++	if (od)
++		return od;
++
++	err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr);
++	if (unlikely(err)) {
++		dprintk("%s: objlayout_get_deviceinfo=>%d\n", __func__, err);
++		return ERR_PTR(err);
++	}
++
++	odi.systemid_len = deviceaddr->oda_systemid.len;
++	if (odi.systemid_len > sizeof(odi.systemid)) {
++		err = -EINVAL;
++		goto out;
++	} else if (odi.systemid_len)
++		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
++		       odi.systemid_len);
++	odi.osdname_len	 = deviceaddr->oda_osdname.len;
++	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
++
++	if (!odi.osdname_len && !odi.systemid_len) {
++		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
++			__func__);
++		err = -ENODEV;
++		goto out;
++	}
++
++	od = osduld_info_lookup(&odi);
++	if (unlikely(IS_ERR(od))) {
++		err = PTR_ERR(od);
++		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
++		goto out;
++	}
++
++	_dev_list_add(omt, d_id, od);
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	objlayout_put_deviceinfo(deviceaddr);
++	return err ? ERR_PTR(err) : od;
++}
++
++static int objio_devices_lookup(struct pnfs_layout_type *pnfslay,
++	struct objio_segment *objio_seg)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	unsigned i, num_comps = layout->olo_num_comps;
++	int err;
++
++	/* lookup all devices */
++	for (i = 0; i < num_comps; i++) {
++		struct osd_dev *od;
++
++		od = _device_lookup(pnfslay, objio_seg, i);
++		if (unlikely(IS_ERR(od))) {
++			err = PTR_ERR(od);
++			goto out;
++		}
++		objio_seg->ods[i] = od;
++	}
++	objio_seg->num_comps = num_comps;
++	err = 0;
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	return err;
++}
++
++static int _verify_data_map(struct pnfs_osd_layout *layout)
++{
++	struct pnfs_osd_data_map *data_map = &layout->olo_map;
++	u64 stripe_length;
++	u32 group_width;
++
++/* FIXME: Only raid0 for now. if not go through MDS */
++	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
++		printk(KERN_ERR "Only RAID_0 for now\n");
++		return -ENOTSUPP;
++	}
++	if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
++		printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
++			  data_map->odm_num_comps, data_map->odm_mirror_cnt);
++		return -EINVAL;
++	}
++
++	if (data_map->odm_group_width)
++		group_width = data_map->odm_group_width;
++	else
++		group_width = data_map->odm_num_comps /
++						(data_map->odm_mirror_cnt + 1);
++
++	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
++	if (stripe_length >= (1ULL << 32)) {
++		printk(KERN_ERR "Total Stripe length(0x%llx)"
++			  " >= 32bit is not supported\n", _LLU(stripe_length));
++		return -ENOTSUPP;
++	}
++
++	if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
++		printk(KERN_ERR "Stripe Unit(0x%llx)"
++			  " must be Multples of PAGE_SIZE(0x%lx)\n",
++			  _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
++		return -ENOTSUPP;
++	}
++
++	return 0;
++}
++
++int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	struct objio_segment *objio_seg;
++	int err;
++
++	err = _verify_data_map(layout);
++	if (unlikely(err))
++		return err;
++
++	objio_seg = kzalloc(sizeof(*objio_seg) +
++			(layout->olo_num_comps - 1) * sizeof(objio_seg->ods[0]),
++			GFP_KERNEL);
++	if (!objio_seg)
++		return -ENOMEM;
++
++	objio_seg->layout = layout;
++	err = objio_devices_lookup(pnfslay, objio_seg);
++	if (err)
++		goto free_seg;
++
++	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
++	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
++	if (layout->olo_map.odm_group_width) {
++		objio_seg->group_width = layout->olo_map.odm_group_width;
++		objio_seg->group_depth = layout->olo_map.odm_group_depth;
++		objio_seg->group_count = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1 /
++						objio_seg->group_width;
++	} else {
++		objio_seg->group_width = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1;
++		objio_seg->group_depth = -1;
++		objio_seg->group_count = 1;
++	}
++
++	*outp = objio_seg;
++	return 0;
++
++free_seg:
++	dprintk("%s: Error: return %d\n", __func__, err);
++	kfree(objio_seg);
++	*outp = NULL;
++	return err;
++}
++
++void objio_free_lseg(void *p)
++{
++	struct objio_segment *objio_seg = p;
++
++	kfree(objio_seg);
++}
++
++int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct objio_segment *objio_seg = seg;
++	struct objio_state *ios;
++	const unsigned first_size = sizeof(*ios) +
++				objio_seg->num_comps * sizeof(ios->per_dev[0]);
++	const unsigned sec_size = objio_seg->num_comps *
++						sizeof(ios->ol_state.ioerrs[0]);
++
++	dprintk("%s: num_comps=%d\n", __func__, objio_seg->num_comps);
++	ios = kzalloc(first_size + sec_size, GFP_KERNEL);
++	if (unlikely(!ios))
++		return -ENOMEM;
++
++	ios->objio_seg = objio_seg;
++	ios->ol_state.ioerrs = ((void *)ios) + first_size;
++	ios->ol_state.num_comps = objio_seg->num_comps;
++
++	*outp = &ios->ol_state;
++	return 0;
++}
++
++void objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++
++	kfree(ios);
++}
++
++enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
++{
++	switch (oep) {
++	case OSD_ERR_PRI_NO_ERROR:
++		return (enum pnfs_osd_errno)0;
++
++	case OSD_ERR_PRI_CLEAR_PAGES:
++		BUG_ON(1);
++		return 0;
++
++	case OSD_ERR_PRI_RESOURCE:
++		return PNFS_OSD_ERR_RESOURCE;
++	case OSD_ERR_PRI_BAD_CRED:
++		return PNFS_OSD_ERR_BAD_CRED;
++	case OSD_ERR_PRI_NO_ACCESS:
++		return PNFS_OSD_ERR_NO_ACCESS;
++	case OSD_ERR_PRI_UNREACHABLE:
++		return PNFS_OSD_ERR_UNREACHABLE;
++	case OSD_ERR_PRI_NOT_FOUND:
++		return PNFS_OSD_ERR_NOT_FOUND;
++	case OSD_ERR_PRI_NO_SPACE:
++		return PNFS_OSD_ERR_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case OSD_ERR_PRI_EIO:
++		return PNFS_OSD_ERR_EIO;
++	}
++}
++
++static void _clear_bio(struct bio *bio)
++{
++	struct bio_vec *bv;
++	unsigned i;
++
++	__bio_for_each_segment(bv, bio, i, 0) {
++		unsigned this_count = bv->bv_len;
++
++		if (likely(PAGE_SIZE == this_count))
++			clear_highpage(bv->bv_page);
++		else
++			zero_user(bv->bv_page, bv->bv_offset, this_count);
++	}
++}
++
++static int _io_check(struct objio_state *ios, bool is_write)
++{
++	enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
++	int lin_ret = 0;
++	int i;
++
++	for (i = 0; i <  ios->numdevs; i++) {
++		struct osd_sense_info osi;
++		struct osd_request *or = ios->per_dev[i].or;
++		int ret;
++
++		if (!or)
++			continue;
++
++		ret = osd_req_decode_sense(or, &osi);
++		if (likely(!ret))
++			continue;
++
++		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
++			/* start read offset passed endof file */
++			BUG_ON(is_write);
++			_clear_bio(ios->per_dev[i].bio);
++			dprintk("%s: start read offset passed end of file "
++				"offset=0x%llx, length=0x%lx\n", __func__,
++				_LLU(ios->per_dev[i].offset),
++				ios->per_dev[i].length);
++
++			continue; /* we recovered */
++		}
++		objlayout_io_set_result(&ios->ol_state, ios->per_dev[i].dev,
++					osd_pri_2_pnfs_err(osi.osd_err_pri),
++					ios->per_dev[i].offset,
++					ios->per_dev[i].length,
++					is_write);
++
++		if (osi.osd_err_pri >= oep) {
++			oep = osi.osd_err_pri;
++			lin_ret = ret;
++		}
++	}
++
++	return lin_ret;
++}
++
++/*
++ * Common IO state helpers.
++ */
++static void _io_free(struct objio_state *ios)
++{
++	unsigned i;
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[i];
++
++		if (per_dev->or) {
++			osd_end_request(per_dev->or);
++			per_dev->or = NULL;
++		}
++
++		if (per_dev->bio) {
++			bio_put(per_dev->bio);
++			per_dev->bio = NULL;
++		}
++	}
++}
++
++struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
++{
++	unsigned min_dev = ios->objio_seg->layout->olo_comps_index;
++	unsigned max_dev = min_dev + ios->ol_state.num_comps;
++
++	BUG_ON(dev < min_dev || max_dev <= dev);
++	return ios->objio_seg->ods[dev - min_dev];
++}
++
++struct _striping_info {
++	u64 obj_offset;
++	u64 group_length;
++	u64 total_group_length;
++	u64 Major;
++	unsigned dev;
++	unsigned unit_off;
++};
++
++static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
++			      struct _striping_info *si)
++{
++	u32	stripe_unit = ios->objio_seg->stripe_unit;
++	u32	group_width = ios->objio_seg->group_width;
++	u64	group_depth = ios->objio_seg->group_depth;
++	u32	U = stripe_unit * group_width;
++
++	u64	T = U * group_depth;
++	u64	S = T * ios->objio_seg->group_count;
++	u64	M = div64_u64(file_offset, S);
++
++	/*
++	G = (L - (M * S)) / T
++	H = (L - (M * S)) % T
++	*/
++	u64	LmodU = file_offset - M * S;
++	u32	G = div64_u64(LmodU, T);
++	u64	H = LmodU - G * T;
++
++	u32	N = div_u64(H, U);
++
++	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
++	si->obj_offset = si->unit_off + (N * stripe_unit) +
++				  (M * group_depth * stripe_unit);
++
++	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
++	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
++	si->dev *= ios->objio_seg->mirrors_p1;
++
++	si->group_length = T - H;
++	si->total_group_length = T;
++	si->Major = M;
++}
++
++static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
++		unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len)
++{
++	unsigned pg = *cur_pg;
++	struct request_queue *q =
++			osd_request_queue(_io_od(ios, per_dev->dev));
++
++	per_dev->length += cur_len;
++
++	if (per_dev->bio == NULL) {
++		unsigned stripes = ios->ol_state.num_comps /
++						     ios->objio_seg->mirrors_p1;
++		unsigned pages_in_stripe = stripes *
++				      (ios->objio_seg->stripe_unit / PAGE_SIZE);
++		unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
++				    stripes;
++
++		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
++		if (unlikely(!per_dev->bio)) {
++			dprintk("Faild to allocate BIO size=%u\n", bio_size);
++			return -ENOMEM;
++		}
++	}
++
++	while (cur_len > 0) {
++		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
++		unsigned added_len;
++
++		BUG_ON(ios->ol_state.nr_pages <= pg);
++		cur_len -= pglen;
++
++		added_len = bio_add_pc_page(q, per_dev->bio,
++					ios->ol_state.pages[pg], pglen, pgbase);
++		if (unlikely(pglen != added_len))
++			return -ENOMEM;
++		pgbase = 0;
++		++pg;
++	}
++	BUG_ON(cur_len);
++
++	*cur_pg = pg;
++	return 0;
++}
++
++static int _prepare_one_group(struct objio_state *ios, u64 length,
++			      struct _striping_info *si, unsigned first_comp,
++			      unsigned *last_pg)
++{
++	unsigned stripe_unit = ios->objio_seg->stripe_unit;
++	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
++	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
++	unsigned dev = si->dev;
++	unsigned first_dev = dev - (dev % devs_in_group);
++	unsigned comp = first_comp + (dev - first_dev);
++	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
++	unsigned cur_pg = *last_pg;
++	int ret = 0;
++
++	while (length) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[comp];
++		unsigned cur_len, page_off = 0;
++
++		if (!per_dev->length) {
++			per_dev->dev = dev;
++			if (dev < si->dev) {
++				per_dev->offset = si->obj_offset + stripe_unit -
++								   si->unit_off;
++				cur_len = stripe_unit;
++			} else if (dev == si->dev) {
++				per_dev->offset = si->obj_offset;
++				cur_len = stripe_unit - si->unit_off;
++				page_off = si->unit_off & ~PAGE_MASK;
++				BUG_ON(page_off &&
++				      (page_off != ios->ol_state.pgbase));
++			} else { /* dev > si->dev */
++				per_dev->offset = si->obj_offset - si->unit_off;
++				cur_len = stripe_unit;
++			}
++
++			if (max_comp < comp)
++				max_comp = comp;
++
++			dev += mirrors_p1;
++			dev = (dev % devs_in_group) + first_dev;
++		} else {
++			cur_len = stripe_unit;
++		}
++		if (cur_len >= length)
++			cur_len = length;
++
++		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
++				       cur_len);
++		if (unlikely(ret))
++			goto out;
++
++		comp += mirrors_p1;
++		comp = (comp % devs_in_group) + first_comp;
++
++		length -= cur_len;
++		ios->length += cur_len;
++	}
++out:
++	ios->numdevs = max_comp + mirrors_p1;
++	*last_pg = cur_pg;
++	return ret;
++}
++
++static int _io_rw_pagelist(struct objio_state *ios)
++{
++	u64 length = ios->ol_state.count;
++	struct _striping_info si;
++	unsigned devs_in_group = ios->objio_seg->group_width *
++				 ios->objio_seg->mirrors_p1;
++	unsigned first_comp = 0;
++	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
++	unsigned last_pg = 0;
++	int ret = 0;
++
++	_calc_stripe_info(ios, ios->ol_state.offset, &si);
++	while (length) {
++		if (length < si.group_length)
++			si.group_length = length;
++
++		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
++					 &last_pg);
++		if (unlikely(ret))
++			goto out;
++
++		length -= si.group_length;
++
++		si.group_length = si.total_group_length;
++		si.unit_off = 0;
++		++si.Major;
++		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
++						ios->objio_seg->group_depth;
++
++		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
++		si.dev %= num_comps;
++
++		first_comp += devs_in_group;
++		first_comp %= num_comps;
++	}
++
++out:
++	if (!ios->length)
++		return ret;
++
++	return 0;
++}
++
++static ssize_t _sync_done(struct objio_state *ios)
++{
++	struct completion *waiting = ios->private;
++
++	complete(waiting);
++	return 0;
++}
++
++static void _last_io(struct kref *kref)
++{
++	struct objio_state *ios = container_of(kref, struct objio_state, kref);
++
++	ios->done(ios);
++}
++
++static void _done_io(struct osd_request *or, void *p)
++{
++	struct objio_state *ios = p;
++
++	kref_put(&ios->kref, _last_io);
++}
++
++static ssize_t _io_exec(struct objio_state *ios)
++{
++	DECLARE_COMPLETION_ONSTACK(wait);
++	ssize_t status = 0; /* sync status */
++	unsigned i;
++	objio_done_fn saved_done_fn = ios->done;
++	bool sync = ios->ol_state.sync;
++
++	if (sync) {
++		ios->done = _sync_done;
++		ios->private = &wait;
++	}
++
++	kref_init(&ios->kref);
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct osd_request *or = ios->per_dev[i].or;
++
++		if (!or)
++			continue;
++
++		kref_get(&ios->kref);
++		osd_execute_request_async(or, _done_io, ios);
++	}
++
++	kref_put(&ios->kref, _last_io);
++
++	if (sync) {
++		wait_for_completion(&wait);
++		status = saved_done_fn(ios);
++	}
++
++	return status;
++}
++
++/*
++ * read
++ */
++static ssize_t _read_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, false);
++
++	_io_free(ios);
++
++	if (likely(!ret))
++		status = ios->length;
++	else
++		status = ret;
++
++	objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct osd_request *or = NULL;
++	struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++	unsigned dev = per_dev->dev;
++	struct pnfs_osd_object_cred *cred =
++			&ios->objio_seg->layout->olo_comps[dev];
++	struct osd_obj_id obj = {
++		.partition = cred->oc_object_id.oid_partition_id,
++		.id = cred->oc_object_id.oid_object_id,
++	};
++	int ret;
++
++	or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++	if (unlikely(!or)) {
++		ret = -ENOMEM;
++		goto err;
++	}
++	per_dev->or = or;
++
++	osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
++
++	ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++	if (ret) {
++		dprintk("%s: Faild to osd_finalize_request() => %d\n",
++			__func__, ret);
++		goto err;
++	}
++
++	dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++		__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++		per_dev->length);
++
++err:
++	return ret;
++}
++
++static ssize_t _read_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _read_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _read_done;
++	return _io_exec(ios); /* In sync mode exec returns the io status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _read_exec(ios);
++}
++
++/*
++ * write
++ */
++static ssize_t _write_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, true);
++
++	_io_free(ios);
++
++	if (likely(!ret)) {
++		/* FIXME: should be based on the OSD's persistence model
++		 * See OSD2r05 Section 4.13 Data persistence model */
++		ios->ol_state.committed = NFS_UNSTABLE; //NFS_FILE_SYNC;
++		status = ios->length;
++	} else {
++		status = ret;
++	}
++
++	objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
++	unsigned dev = ios->per_dev[cur_comp].dev;
++	unsigned last_comp = cur_comp + ios->objio_seg->mirrors_p1;
++	int ret;
++
++	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
++		struct osd_request *or = NULL;
++		struct pnfs_osd_object_cred *cred =
++					&ios->objio_seg->layout->olo_comps[dev];
++		struct osd_obj_id obj = {
++			.partition = cred->oc_object_id.oid_partition_id,
++			.id = cred->oc_object_id.oid_object_id,
++		};
++		struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++		struct bio *bio;
++
++		or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++		if (unlikely(!or)) {
++			ret = -ENOMEM;
++			goto err;
++		}
++		per_dev->or = or;
++
++		if (per_dev != master_dev) {
++			bio = bio_kmalloc(GFP_KERNEL,
++					  master_dev->bio->bi_max_vecs);
++			if (unlikely(!bio)) {
++				dprintk("Faild to allocate BIO size=%u\n",
++					master_dev->bio->bi_max_vecs);
++				ret = -ENOMEM;
++				goto err;
++			}
++
++			__bio_clone(bio, master_dev->bio);
++			bio->bi_bdev = NULL;
++			bio->bi_next = NULL;
++			per_dev->bio = bio;
++			per_dev->dev = dev;
++			per_dev->length = master_dev->length;
++			per_dev->offset =  master_dev->offset;
++		} else {
++			bio = master_dev->bio;
++			/* FIXME: bio_set_dir() */
++			bio->bi_rw |= (1 << BIO_RW);
++		}
++
++		osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
++
++		ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++		if (ret) {
++			dprintk("%s: Faild to osd_finalize_request() => %d\n",
++				__func__, ret);
++			goto err;
++		}
++
++		dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++			__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++			per_dev->length);
++	}
++
++err:
++	return ret;
++}
++
++static ssize_t _write_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _write_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _write_done;
++	return _io_exec(ios); /* In sync mode exec returns the io->status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	/* TODO: ios->stable = stable; */
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _write_exec(ios);
++}
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++objlayout_get_stripesize(struct pnfs_layout_type *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zx\n", __func__, maxsz);
++	return maxsz;
++}
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++objlayout_get_blocksize(void)
++{
++	ssize_t sz = BIO_MAX_PAGES_KMALLOC * PAGE_SIZE;
++
++	return sz;
++}
++
++static struct layoutdriver_policy_operations objlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = objlayout_get_stripesize,
++	.get_blocksize         = objlayout_get_blocksize,
++};
++
++static struct pnfs_layoutdriver_type objlayout_type = {
++	.id = LAYOUT_OSD2_OBJECTS,
++	.name = "LAYOUT_OSD2_OBJECTS",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &objlayout_policy_operations,
++};
++
++void *objio_init_mt(void)
++{
++	struct objio_mount_type *omt = kzalloc(sizeof(*omt), GFP_KERNEL);
++
++	if (!omt)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&omt->dev_list);
++	spin_lock_init(&omt->dev_list_lock);
++	return omt;
++}
++
++void objio_fini_mt(void *mountid)
++{
++	_dev_list_remove_all(mountid);
++	kfree(mountid);
++}
++
++MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++objlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++objlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(objlayout_init);
++module_exit(objlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
+@@ -0,0 +1,790 @@
++/*
++ *  objlayout.c
++ *
++ *  pNFS layout driver for Panasas OSDs
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <scsi/osd_initiator.h>
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct pnfs_client_operations *pnfs_client_ops;
++
++/*
++ * Create a objlayout layout structure for the given inode and return it.
++ */
++static struct pnfs_layout_type *
++objlayout_alloc_layout(struct inode *inode)
++{
++	struct objlayout *objlay;
++
++	objlay = kzalloc(sizeof(struct objlayout), GFP_KERNEL);
++	if (objlay) {
++		spin_lock_init(&objlay->lock);
++		INIT_LIST_HEAD(&objlay->err_list);
++	}
++	dprintk("%s: Return %p\n", __func__, objlay);
++	return &objlay->pnfs_layout;
++}
++
++/*
++ * Free an objlayout layout structure
++ */
++static void
++objlayout_free_layout(struct pnfs_layout_type *lo)
++{
++	struct objlayout *objlay = OBJLAYOUT(lo);
++
++	dprintk("%s: objlay %p\n", __func__, objlay);
++
++	WARN_ON(!list_empty(&objlay->err_list));
++	kfree(objlay);
++}
++
++/*
++ * Unmarshall layout and store it in pnfslay.
++ */
++static struct pnfs_layout_segment *
++objlayout_alloc_lseg(struct pnfs_layout_type *pnfslay,
++		     struct nfs4_pnfs_layoutget_res *lgr)
++{
++	int status;
++	void *layout = lgr->layout.buf;
++	struct pnfs_layout_segment *lseg;
++	struct objlayout_segment *objlseg;
++	struct pnfs_osd_layout *pnfs_osd_layout;
++
++	dprintk("%s: Begin pnfslay %p layout %p\n", __func__, pnfslay, layout);
++
++	BUG_ON(!layout);
++
++	status = -ENOMEM;
++	lseg = kzalloc(sizeof(*lseg) + sizeof(*objlseg) +
++		       pnfs_osd_layout_incore_sz(layout), GFP_KERNEL);
++	if (!lseg)
++		goto err;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	pnfs_osd_layout = (struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++	pnfs_osd_xdr_decode_layout(pnfs_osd_layout, layout);
++
++	status = objio_alloc_lseg(&objlseg->internal, pnfslay, lseg,
++				  pnfs_osd_layout);
++	if (status)
++		goto err;
++
++	dprintk("%s: Return %p\n", __func__, lseg);
++	return lseg;
++
++ err:
++	kfree(lseg);
++	return ERR_PTR(status);
++}
++
++/*
++ * Free a layout segement
++ */
++static void
++objlayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	struct objlayout_segment *objlseg;
++
++	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
++
++	if (unlikely(!lseg))
++		return;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	objio_free_lseg(objlseg->internal);
++	kfree(lseg);
++}
++
++/*
++ * I/O Operations
++ */
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
++static struct objlayout_io_state *
++objlayout_alloc_io_state(struct pnfs_layout_type *pnfs_layout_type,
++			struct page **pages,
++			unsigned pgbase,
++			unsigned nr_pages,
++			loff_t offset,
++			size_t count,
++			struct pnfs_layout_segment *lseg,
++			void *rpcdata)
++{
++	struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++	struct objlayout_io_state *state;
++	u64 lseg_end_offset;
++	size_t size_nr_pages;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	if (objio_alloc_io_state(objlseg->internal, &state))
++		return NULL;
++
++	BUG_ON(offset < lseg->range.offset);
++	lseg_end_offset = end_offset(lseg->range.offset, lseg->range.length);
++	BUG_ON(offset >= lseg_end_offset);
++	if (offset + count > lseg_end_offset) {
++		count = lseg->range.length - (offset - lseg->range.offset);
++		dprintk("%s: truncated count %Zd\n", __func__, count);
++	}
++
++	if (pgbase > PAGE_SIZE) {
++		unsigned n = pgbase >> PAGE_SHIFT;
++
++		pgbase &= ~PAGE_MASK;
++		pages += n;
++		nr_pages -= n;
++	}
++
++	size_nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	BUG_ON(nr_pages < size_nr_pages);
++	if (nr_pages > size_nr_pages)
++		nr_pages = size_nr_pages;
++
++	INIT_LIST_HEAD(&state->err_list);
++	state->lseg = lseg;
++	state->rpcdata = rpcdata;
++	state->pages = pages;
++	state->pgbase = pgbase;
++	state->nr_pages = nr_pages;
++	state->offset = offset;
++	state->count = count;
++	state->sync = 0;
++
++	return state;
++}
++
++static void
++objlayout_free_io_state(struct objlayout_io_state *state)
++{
++	dprintk("%s: freeing io_state\n", __func__);
++	if (unlikely(!state))
++		return;
++
++	objio_free_io_state(state);
++}
++
++/*
++ * I/O done common code
++ */
++static void
++objlayout_iodone(struct objlayout_io_state *state)
++{
++	dprintk("%s: state %p status\n", __func__, state);
++
++	if (likely(state->status >= 0)) {
++		objlayout_free_io_state(state);
++	} else {
++		struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++		spin_lock(&objlay->lock);
++		objlay->delta_space_valid = OBJ_DSU_INVALID;
++		list_add(&objlay->err_list, &state->err_list);
++		spin_unlock(&objlay->lock);
++	}
++}
++
++/*
++ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
++ *
++ * The @index component IO failed (error returned from target). Register
++ * the error for later reporting at layout-return.
++ */
++void
++objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
++			int osd_error, u64 offset, u64 length, bool is_write)
++{
++	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
++
++	BUG_ON(index >= state->num_comps);
++	if (osd_error) {
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(state->lseg);
++		struct pnfs_osd_layout *layout =
++				(typeof(layout))objlseg->pnfs_osd_layout;
++
++		ioerr->oer_component = layout->olo_comps[index].oc_object_id;
++		ioerr->oer_comp_offset = offset;
++		ioerr->oer_comp_length = length;
++		ioerr->oer_iswrite = is_write;
++		ioerr->oer_errno = osd_error;
++
++		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
++			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
++			__func__, index, ioerr->oer_errno,
++			ioerr->oer_iswrite,
++			_DEVID_LO(&ioerr->oer_component.oid_device_id),
++			_DEVID_HI(&ioerr->oer_component.oid_device_id),
++			ioerr->oer_component.oid_partition_id,
++			ioerr->oer_component.oid_object_id,
++			ioerr->oer_comp_offset,
++			ioerr->oer_comp_length);
++	} else {
++		/* User need not call if no error is reported */
++		ioerr->oer_errno = 0;
++	}
++}
++
++static void _rpc_commit_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_commit_complete(wdata);
++}
++
++/*
++ * Commit data remotely on OSDs
++ */
++enum pnfs_try_status
++objlayout_commit(struct nfs_write_data *wdata, int how)
++{
++	int status = PNFS_ATTEMPTED;
++
++	INIT_WORK(&wdata->task.u.tk_work, _rpc_commit_complete);
++	schedule_work(&wdata->task.u.tk_work);
++	dprintk("%s: Return %d\n", __func__, status);
++	return status;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_read_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++
++	pnfs_client_ops->nfs_readlist_complete(rdata);
++}
++
++void
++objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
++{
++	int eof = state->eof;
++	struct nfs_read_data *rdata;
++
++	state->status = status;
++	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
++	rdata = state->rpcdata;
++	rdata->task.tk_status = status;
++	if (status >= 0) {
++		rdata->res.count = status;
++		rdata->res.eof = eof;
++	}
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_readlist_complete(rdata);
++	else {
++		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
++		schedule_work(&rdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async reads.
++ */
++enum pnfs_try_status
++objlayout_read_pagelist(struct nfs_read_data *rdata, unsigned nr_pages)
++{
++	loff_t offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct objlayout_io_state *state;
++	ssize_t status = 0;
++	loff_t eof;
++
++	dprintk("%s: Begin inode %p offset %llu count %d\n",
++		__func__, rdata->inode, offset, (int)count);
++
++	eof = i_size_read(rdata->inode);
++	if (unlikely(offset + count > eof)) {
++		if (offset >= eof) {
++			status = 0;
++			rdata->res.count = 0;
++			rdata->res.eof = 1;
++			goto out;
++		}
++		count = eof - offset;
++	}
++
++	state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
++					 rdata->args.pages, rdata->args.pgbase,
++					 nr_pages, offset, count,
++					 rdata->pdata.lseg, rdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->eof = state->offset + state->count >= eof;
++
++	status = objio_read_pagelist(state);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	rdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_write_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_writelist_complete(wdata);
++}
++
++void
++objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
++		     bool sync)
++{
++	struct nfs_write_data *wdata;
++
++	dprintk("%s: Begin\n", __func__);
++	wdata = state->rpcdata;
++	state->status = status;
++	wdata->task.tk_status = status;
++	if (status >= 0) {
++		wdata->res.count = status;
++		wdata->verf.committed = state->committed;
++		dprintk("%s: Return status %d committed %d\n",
++			__func__, wdata->task.tk_status,
++			wdata->verf.committed);
++	} else
++		dprintk("%s: Return status %d\n",
++			__func__, wdata->task.tk_status);
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_writelist_complete(wdata);
++	else {
++		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
++		schedule_work(&wdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async writes.
++ */
++enum pnfs_try_status
++objlayout_write_pagelist(struct nfs_write_data *wdata,
++			 unsigned nr_pages,
++			 int how)
++{
++	struct objlayout_io_state *state;
++	ssize_t status;
++
++	dprintk("%s: Begin inode %p offset %llu count %u\n",
++		__func__, wdata->inode, wdata->args.offset, wdata->args.count);
++
++	state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
++					 wdata->args.pages,
++					 wdata->args.pgbase,
++					 nr_pages,
++					 wdata->args.offset,
++					 wdata->args.count,
++					 wdata->pdata.lseg, wdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->sync = how & FLUSH_SYNC;
++
++	status = objio_write_pagelist(state, how & FLUSH_STABLE);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	wdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++void
++objlayout_encode_layoutcommit(struct pnfs_layout_type *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct pnfs_layoutcommit_arg *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct pnfs_osd_layoutupdate lou;
++	__be32 *start;
++
++	dprintk("%s: Begin\n", __func__);
++
++	spin_lock(&objlay->lock);
++	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
++	lou.dsu_delta = objlay->delta_space_used;
++	objlay->delta_space_used = 0;
++	objlay->delta_space_valid = OBJ_DSU_INIT;
++	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
++	spin_unlock(&objlay->lock);
++
++	start = xdr_reserve_space(xdr, 4);
++
++	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++
++	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
++		lou.dsu_delta, lou.olu_ioerr_flag);
++}
++
++static int
++err_prio(u32 oer_errno)
++{
++	switch (oer_errno) {
++	case 0:
++		return 0;
++
++	case PNFS_OSD_ERR_RESOURCE:
++		return OSD_ERR_PRI_RESOURCE;
++	case PNFS_OSD_ERR_BAD_CRED:
++		return OSD_ERR_PRI_BAD_CRED;
++	case PNFS_OSD_ERR_NO_ACCESS:
++		return OSD_ERR_PRI_NO_ACCESS;
++	case PNFS_OSD_ERR_UNREACHABLE:
++		return OSD_ERR_PRI_UNREACHABLE;
++	case PNFS_OSD_ERR_NOT_FOUND:
++		return OSD_ERR_PRI_NOT_FOUND;
++	case PNFS_OSD_ERR_NO_SPACE:
++		return OSD_ERR_PRI_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case PNFS_OSD_ERR_EIO:
++		return OSD_ERR_PRI_EIO;
++	}
++}
++
++static void
++merge_ioerr(struct pnfs_osd_ioerr *dest_err,
++	    const struct pnfs_osd_ioerr *src_err)
++{
++	u64 dest_end, src_end;
++
++	if (!dest_err->oer_errno) {
++		*dest_err = *src_err;
++		/* accumulated device must be blank */
++		memset(&dest_err->oer_component.oid_device_id, 0,
++			sizeof(dest_err->oer_component.oid_device_id));
++
++		return;
++	}
++
++	if (dest_err->oer_component.oid_partition_id !=
++				src_err->oer_component.oid_partition_id)
++		dest_err->oer_component.oid_partition_id = 0;
++
++	if (dest_err->oer_component.oid_object_id !=
++				src_err->oer_component.oid_object_id)
++		dest_err->oer_component.oid_object_id = 0;
++
++	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
++		dest_err->oer_comp_offset = src_err->oer_comp_offset;
++
++	dest_end = end_offset(dest_err->oer_comp_offset,
++			      dest_err->oer_comp_length);
++	src_end =  end_offset(src_err->oer_comp_offset,
++			      src_err->oer_comp_length);
++	if (dest_end < src_end)
++		dest_end = src_end;
++
++	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
++
++	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
++	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
++			dest_err->oer_errno = src_err->oer_errno;
++	} else if (src_err->oer_iswrite) {
++		dest_err->oer_iswrite = true;
++		dest_err->oer_errno = src_err->oer_errno;
++	}
++}
++
++static void
++encode_accumulated_error(struct objlayout *objlay, struct xdr_stream *xdr)
++{
++	struct objlayout_io_state *state, *tmp;
++	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++
++		for (i = 0; i < state->num_comps; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			merge_ioerr(&accumulated_err, ioerr);
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++
++	BUG_ON(pnfs_osd_xdr_encode_ioerr(xdr, &accumulated_err));
++}
++
++void
++objlayout_encode_layoutreturn(struct pnfs_layout_type *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_pnfs_layoutreturn_arg *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct objlayout_io_state *state, *tmp;
++	__be32 *start, *uninitialized_var(last_xdr);
++
++	dprintk("%s: Begin\n", __func__);
++	start = xdr_reserve_space(xdr, 4);
++	BUG_ON(!start);
++
++	spin_lock(&objlay->lock);
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++		int res = 0;
++
++		for (i = 0; i < state->num_comps && !res; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			dprintk("%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			last_xdr = xdr->p;
++			res = pnfs_osd_xdr_encode_ioerr(xdr, &state->ioerrs[i]);
++		}
++		if (unlikely(res)) {
++			/* no space for even one error descriptor */
++			BUG_ON(last_xdr == start + 1);
++
++			/* we've encountered a situation with lots and lots of
++			 * errors and no space to encode them all. Use the last
++			 * available slot to report the union of all the
++			 * remaining errors.
++			 */
++			xdr_rewind_stream(xdr, last_xdr -
++					       pnfs_osd_ioerr_xdr_sz() / 4);
++			encode_accumulated_error(objlay, xdr);
++			goto loop_done;
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++loop_done:
++	spin_unlock(&objlay->lock);
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++	dprintk("%s: Return\n", __func__);
++}
++
++struct objlayout_deviceinfo {
++	struct page *page;
++	struct pnfs_osd_deviceaddr da; /* This must be last */
++};
++
++/* Initialize and call nfs_getdeviceinfo, then decode and return a
++ * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
++ * should be called.
++ */
++int objlayout_get_deviceinfo(struct pnfs_layout_type *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr)
++{
++	struct objlayout_deviceinfo *odi;
++	struct pnfs_device pd;
++	struct super_block *sb;
++	struct page *page;
++	size_t sz;
++	u32 *p;
++	int err;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	pd.area = page_address(page);
++
++	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
++	pd.layout_type = LAYOUT_OSD2_OBJECTS;
++	pd.dev_notify_types = 0;
++	pd.pages = &page;
++	pd.pgbase = 0;
++	pd.pglen = PAGE_SIZE;
++	pd.mincount = 0;
++
++	sb = PNFS_INODE(pnfslay)->i_sb;
++	err = pnfs_client_ops->nfs_getdeviceinfo(PNFS_NFS_SERVER(pnfslay), &pd);
++	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
++	if (err)
++		goto err_out;
++
++	p = pd.area;
++	sz = pnfs_osd_xdr_deviceaddr_incore_sz(p);
++	odi = kzalloc(sz + (sizeof(*odi) - sizeof(odi->da)), GFP_KERNEL);
++	if (!odi) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
++	odi->page = page;
++	*deviceaddr = &odi->da;
++	return 0;
++
++err_out:
++	__free_page(page);
++	return err;
++}
++
++void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
++{
++	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
++						struct objlayout_deviceinfo,
++						da);
++
++	__free_page(odi->page);
++	kfree(odi);
++}
++
++/*
++ * Initialize a mountpoint by retrieving the list of
++ * available devices for it.
++ * Return the pnfs_mount_type structure so the
++ * pNFS_client can refer to the mount point later on.
++ */
++static int
++objlayout_initialize_mountpoint(struct nfs_server *server,
++				const struct nfs_fh *mntfh)
++{
++	void *data;
++
++	data = objio_init_mt();
++	if (IS_ERR(data)) {
++		printk(KERN_INFO "%s: objlayout lib not ready err=%ld\n",
++		       __func__, PTR_ERR(data));
++		return PTR_ERR(data);
++	}
++	server->pnfs_ld_data = data;
++
++	dprintk("%s: Return data=%p\n", __func__, data);
++	return 0;
++}
++
++/*
++ * Uninitialize a mountpoint
++ */
++static int
++objlayout_uninitialize_mountpoint(struct nfs_server *server)
++{
++	dprintk("%s: Begin %p\n", __func__, server->pnfs_ld_data);
++	objio_fini_mt(server->pnfs_ld_data);
++	return 0;
++}
++
++struct layoutdriver_io_operations objlayout_io_operations = {
++	.commit                  = objlayout_commit,
++	.read_pagelist           = objlayout_read_pagelist,
++	.write_pagelist          = objlayout_write_pagelist,
++	.alloc_layout            = objlayout_alloc_layout,
++	.free_layout             = objlayout_free_layout,
++	.alloc_lseg              = objlayout_alloc_lseg,
++	.free_lseg               = objlayout_free_lseg,
++	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
++	.encode_layoutreturn     = objlayout_encode_layoutreturn,
++	.initialize_mountpoint   = objlayout_initialize_mountpoint,
++	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
++};
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
+@@ -0,0 +1,171 @@
++/*
++ *  objlayout.h
++ *
++ *  Data types and function declerations for interfacing with the
++ *  pNFS standard object layout driver.
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef _OBJLAYOUT_H
++#define _OBJLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * in-core layout segment
++ */
++struct objlayout_segment {
++	void *internal;    /* for provider internal use */
++	u8 pnfs_osd_layout[];
++};
++
++/*
++ * per-inode layout
++ */
++struct objlayout {
++	struct pnfs_layout_type pnfs_layout;
++
++	 /* for layout_commit */
++	enum osd_delta_space_valid_enum {
++		OBJ_DSU_INIT = 0,
++		OBJ_DSU_VALID,
++		OBJ_DSU_INVALID,
++	} delta_space_valid;
++	s64 delta_space_used;  /* consumed by write ops */
++
++	 /* for layout_return */
++	spinlock_t lock;
++	struct list_head err_list;
++};
++
++static inline struct objlayout *
++OBJLAYOUT(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct objlayout, pnfs_layout);
++}
++
++/*
++ * per-I/O operation state
++ * embedded in objects provider io_state data structure
++ */
++struct objlayout_io_state {
++	struct pnfs_layout_segment *lseg;
++
++	struct page **pages;
++	unsigned pgbase;
++	unsigned nr_pages;
++	unsigned long count;
++	loff_t offset;
++	bool sync;
++
++	void *rpcdata;
++	int status;             /* res */
++	int eof;                /* res */
++	int committed;          /* res */
++
++	/* Error reporting (layout_return) */
++	struct list_head err_list;
++	unsigned num_comps;
++	/* Pointer to array of error descriptors of size num_comps.
++	 * It should contain as many entries as devices in the osd_layout
++	 * that participate in the I/O. It is up to the io_engine to allocate
++	 * needed space and set num_comps.
++	 */
++	struct pnfs_osd_ioerr *ioerrs;
++};
++
++/*
++ * Raid engine I/O API
++ */
++extern void *objio_init_mt(void);
++extern void objio_fini_mt(void *mt);
++
++extern int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout);
++extern void objio_free_lseg(void *p);
++
++extern int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp);
++extern void objio_free_io_state(struct objlayout_io_state *state);
++
++extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
++extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
++				    bool stable);
++
++/*
++ * callback API
++ */
++extern void objlayout_io_set_result(struct objlayout_io_state *state,
++				    unsigned index, int osd_error,
++				    u64 offset, u64 length, bool is_write);
++
++static inline void
++objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
++{
++	struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++	/* If one of the I/Os errored out and the delta_space_used was
++	 * invalid we render the complete report as invalid. Protocol mandate
++	 * the DSU be accurate or not reported.
++	 */
++	spin_lock(&objlay->lock);
++	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
++		objlay->delta_space_valid = OBJ_DSU_VALID;
++		objlay->delta_space_used += space_used;
++	}
++	spin_unlock(&objlay->lock);
++}
++
++extern void objlayout_read_done(struct objlayout_io_state *state,
++				ssize_t status, bool sync);
++extern void objlayout_write_done(struct objlayout_io_state *state,
++				 ssize_t status, bool sync);
++
++extern int objlayout_get_deviceinfo(struct pnfs_layout_type *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr);
++extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
++
++/*
++ * exported generic objects function vectors
++ */
++extern struct layoutdriver_io_operations objlayout_io_operations;
++extern struct pnfs_client_operations *pnfs_client_ops;
++
++#endif /* _OBJLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
+@@ -0,0 +1,734 @@
++/*
++ *  panfs_shim.c
++ *
++ *  Shim layer for interfacing with the Panasas DirectFlow module I/O stack
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <asm/byteorder.h>
++
++#include "objlayout.h"
++#include "panfs_shim.h"
++
++#include <linux/panfs_shim_api.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct panfs_export_operations *panfs_export_ops;
++
++void *
++objio_init_mt(void)
++{
++	return panfs_export_ops == NULL ? ERR_PTR(-EAGAIN) : NULL;
++}
++
++void objio_fini_mt(void *mountid)
++{
++}
++
++static int
++panfs_shim_conv_raid01(struct pnfs_osd_layout *layout,
++		       struct pnfs_osd_data_map *lo_map,
++		       pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt) {
++		hdr->type = PAN_AGG_RAID1;
++		hdr->hdr.raid1.num_comps = lo_map->odm_mirror_cnt + 1;
++	} else if (layout->olo_num_comps > 1) {
++		hdr->type = PAN_AGG_RAID0;
++		hdr->hdr.raid0.num_comps = layout->olo_num_comps;
++		hdr->hdr.raid0.stripe_unit = lo_map->odm_stripe_unit;
++	} else
++		hdr->type = PAN_AGG_SIMPLE;
++	return 0;
++}
++
++static int
++panfs_shim_conv_raid5(struct pnfs_osd_layout *layout,
++		      struct pnfs_osd_data_map *lo_map,
++		      pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt)
++		goto err;
++
++	if (lo_map->odm_group_width || lo_map->odm_group_depth) {
++		if (!lo_map->odm_group_width || !lo_map->odm_group_depth)
++			goto err;
++
++		hdr->type = PAN_AGG_GRP_RAID5_LEFT;
++		hdr->hdr.grp_raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.grp_raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.grp_raid5_left.stripe_unit = lo_map->odm_stripe_unit;
++		hdr->hdr.grp_raid5_left.rg_width = lo_map->odm_group_width;
++		hdr->hdr.grp_raid5_left.rg_depth = lo_map->odm_group_depth;
++		/* this is a guess, panasas server is not supposed to
++		   hand out layotu otherwise */
++		hdr->hdr.grp_raid5_left.group_layout_policy =
++			PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN;
++	} else {
++		hdr->type = PAN_AGG_RAID5_LEFT;
++		hdr->hdr.raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.raid5_left.stripe_unit2 =
++		hdr->hdr.raid5_left.stripe_unit1 =
++		hdr->hdr.raid5_left.stripe_unit0 = lo_map->odm_stripe_unit;
++	}
++
++	return 0;
++err:
++	return -EINVAL;
++}
++
++/*
++ * Convert a pnfs_osd data map into Panasas aggregation layout header
++ */
++static int
++panfs_shim_conv_pnfs_osd_data_map(
++	struct pnfs_osd_layout *layout,
++	pan_agg_layout_hdr_t *hdr)
++{
++	int status = -EINVAL;
++	struct pnfs_osd_data_map *lo_map = &layout->olo_map;
++
++	if (!layout->olo_num_comps) {
++		dprintk("%s: !!layout.n_comps(%u)\n", __func__,
++			layout->olo_num_comps);
++		goto err;
++	}
++
++	switch (lo_map->odm_raid_algorithm) {
++	case PNFS_OSD_RAID_0:
++		if (layout->olo_num_comps != lo_map->odm_num_comps ||
++		    layout->olo_comps_index) {
++			dprintk("%s: !!PNFS_OSD_RAID_0 "
++				"layout.n_comps(%u) map.n_comps(%u) "
++				"comps_index(%u)\n", __func__,
++				layout->olo_num_comps,
++				lo_map->odm_num_comps,
++				layout->olo_comps_index);
++			goto err;
++		}
++		status = panfs_shim_conv_raid01(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_5:
++		if (!lo_map->odm_group_width) {
++			if (layout->olo_num_comps != lo_map->odm_num_comps ||
++			    layout->olo_comps_index) {
++				dprintk("%s: !!PNFS_OSD_RAID_5 !group_width "
++					"layout.n_comps(%u)!=map.n_comps(%u) "
++					"|| comps_index(%u)\n", __func__,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		} else if ((layout->olo_num_comps != lo_map->odm_num_comps &&
++			    layout->olo_num_comps > lo_map->odm_group_width) ||
++			   (layout->olo_comps_index % lo_map->odm_group_width)){
++				dprintk("%s: !!PNFS_OSD_RAID_5 group_width(%u) "
++					"layout.n_comps(%u) map.n_comps(%u) "
++					"comps_index(%u)\n", __func__,
++					lo_map->odm_group_width,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		status = panfs_shim_conv_raid5(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_4:
++	case PNFS_OSD_RAID_PQ:
++	default:
++		dprintk("%s: !!PNFS_OSD_RAID_(%d)\n", __func__,
++			lo_map->odm_raid_algorithm);
++		goto err;
++	}
++
++	return 0;
++
++err:
++	return status;
++}
++
++/*
++ * Convert pnfs_osd layout into Panasas map and caps type
++ */
++int
++objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	int i, total_comps;
++	int status;
++	struct pnfs_osd_object_cred *lo_comp;
++	pan_size_t alloc_sz, local_sz;
++	pan_sm_map_cap_t *mcs = NULL;
++	u8 *buf;
++	pan_agg_comp_obj_t *pan_comp;
++	pan_sm_sec_t *pan_sec;
++
++	status = -EINVAL;
++	if (layout->olo_num_comps < layout->olo_map.odm_group_width) {
++		total_comps = layout->olo_comps_index + layout->olo_num_comps;
++	} else {
++		/* allocate full map, otherwise SAM gets confused */
++		total_comps = layout->olo_map.odm_num_comps;
++	}
++	alloc_sz = total_comps *
++		   (sizeof(pan_agg_comp_obj_t) + sizeof(pan_sm_sec_t));
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p = layout->olo_comps[i].oc_cap.cred;
++		if (panfs_export_ops->sm_sec_t_get_size_otw(
++			(pan_sm_sec_otw_t *)&p, &local_sz, NULL, NULL))
++			goto err;
++		alloc_sz += local_sz;
++	}
++
++	status = -ENOMEM;
++	mcs = kzalloc(sizeof(*mcs) + alloc_sz, GFP_KERNEL);
++	if (!mcs)
++		goto err;
++	buf = (u8 *)&mcs[1];
++
++	mcs->offset = lseg->range.offset;
++	mcs->length = lseg->range.length;
++#if 0
++	/* FIXME: for now */
++	mcs->expiration_time.ts_sec  = 0;
++	mcs->expiration_time.ts_nsec = 0;
++#endif
++	mcs->full_map.map_hdr.avail_state = PAN_AGG_OBJ_STATE_NORMAL;
++	status = panfs_shim_conv_pnfs_osd_data_map(layout,
++						   &mcs->full_map.layout_hdr);
++	if (status)
++		goto err;
++
++	mcs->full_map.components.size = total_comps;
++	mcs->full_map.components.data = (pan_agg_comp_obj_t *)buf;
++	buf += total_comps * sizeof(pan_agg_comp_obj_t);
++
++	mcs->secs.size = total_comps;
++	mcs->secs.data = (pan_sm_sec_t *)buf;
++	buf += total_comps * sizeof(pan_sm_sec_t);
++
++	lo_comp = layout->olo_comps;
++	pan_comp = mcs->full_map.components.data + layout->olo_comps_index;
++	pan_sec = mcs->secs.data + layout->olo_comps_index;
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p;
++		pan_stor_obj_id_t *obj_id = &mcs->full_map.map_hdr.obj_id;
++		struct pnfs_osd_objid *oc_obj_id = &lo_comp->oc_object_id;
++		u64 dev_id = __be64_to_cpup(
++			(__be64 *)oc_obj_id->oid_device_id.data + 1);
++
++		dprintk("%s: i=%d deviceid=%Lx:%Lx partition=%Lx object=%Lx\n",
++			__func__, i,
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data),
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data + 1),
++			oc_obj_id->oid_partition_id, oc_obj_id->oid_object_id);
++
++		if (i == 0) {
++			/* make up mgr_id to calm sam down */
++			pan_mgr_id_construct_artificial(PAN_MGR_SM, 0,
++							&obj_id->dev_id);
++			obj_id->grp_id = oc_obj_id->oid_partition_id;
++			obj_id->obj_id = oc_obj_id->oid_object_id;
++		}
++
++		if (obj_id->grp_id != lo_comp->oc_object_id.oid_partition_id) {
++			dprintk("%s: i=%d grp_id=0x%Lx oid_partition_id=0x%Lx\n",
++				__func__, i, (u64)obj_id->grp_id,
++				lo_comp->oc_object_id.oid_partition_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		if (obj_id->obj_id != lo_comp->oc_object_id.oid_object_id) {
++			dprintk("%s: i=%d obj_id=0x%Lx oid_object_id=0x%Lx\n",
++				__func__, i, obj_id->obj_id,
++				lo_comp->oc_object_id.oid_object_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		pan_comp->dev_id = dev_id;
++		if (!pan_stor_is_device_id_an_obsd_id(pan_comp->dev_id)) {
++			dprintk("%s: i=%d dev_id=0x%Lx not an obsd_id\n",
++				__func__, i, obj_id->dev_id);
++			status = -EINVAL;
++			goto err;
++		}
++		if (lo_comp->oc_osd_version == PNFS_OSD_MISSING) {
++			dprintk("%s: degraded maps not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++		pan_comp->avail_state = PAN_AGG_COMP_STATE_NORMAL;
++		if (lo_comp->oc_cap_key_sec != PNFS_OSD_CAP_KEY_SEC_NONE) {
++			dprintk("%s: cap key security not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++
++		p = lo_comp->oc_cap.cred;
++		panfs_export_ops->sm_sec_t_unmarshall(
++			(pan_sm_sec_otw_t *)&p,
++			pan_sec,
++			buf,
++			alloc_sz,
++			NULL,
++			&local_sz);
++		buf += local_sz;
++		alloc_sz -= local_sz;
++
++		lo_comp++;
++		pan_comp++;
++		pan_sec++;
++	}
++
++	*outp = mcs;
++	dprintk("%s:Return mcs=%p\n", __func__, mcs);
++	return 0;
++
++err:
++	objio_free_lseg(mcs);
++	dprintk("%s:Error %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * Free a Panasas map and caps type
++ */
++void
++objio_free_lseg(void *p)
++{
++	kfree(p);
++}
++
++/*
++ * I/O routines
++ */
++int
++objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct panfs_shim_io_state *p;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	*outp = &p->ol_state;
++	return 0;
++}
++
++/*
++ * Free an I/O state
++ */
++void
++objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	int i;
++
++	dprintk("%s: freeing io_state\n", __func__);
++	for (i = 0; i < state->ol_state.nr_pages; i++)
++		kunmap(state->ol_state.pages[i]);
++
++	if (state->ucreds)
++		panfs_export_ops->ucreds_put(state->ucreds);
++	kfree(state->sg_list);
++	kfree(state);
++}
++
++static int
++panfs_shim_pages_to_sg(
++	struct panfs_shim_io_state *state,
++	struct page **pages,
++	unsigned int pgbase,
++	unsigned nr_pages,
++	size_t count)
++{
++	unsigned i, n;
++	pan_sg_entry_t *sg;
++
++	dprintk("%s pgbase %u nr_pages %u count %d "
++		"pg0 %p flags 0x%x index %llu\n",
++		__func__, pgbase, nr_pages, (int)count, pages[0],
++		(unsigned)pages[0]->flags, (unsigned long long)pages[0]->index);
++
++	sg = kmalloc(nr_pages * sizeof(*sg), GFP_KERNEL);
++	if (sg == NULL)
++		return -ENOMEM;
++
++	dprintk("%s sg_list %p pages %p pgbase %u nr_pages %u\n",
++		__func__, sg, pages, pgbase, nr_pages);
++
++	for (i = 0; i < nr_pages; i++) {
++		sg[i].buffer = (char *)kmap(pages[i]) + pgbase;
++		n = PAGE_SIZE - pgbase;
++		pgbase = 0;
++		if (n > count)
++			n = count;
++		sg[i].chunk_size = n;
++		count -= n;
++		if (likely(count)) {
++			sg[i].next = &sg[i+1];
++		} else {
++			/* we're done */
++			sg[i].next = NULL;
++			break;
++		}
++	}
++	BUG_ON(count);
++
++	state->sg_list = sg;
++	return 0;
++}
++
++/*
++ * Callback function for async reads
++ */
++static void
++panfs_shim_read_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_read_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.read.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++		status = res_p->length;
++		WARN_ON(status < 0);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_read rc %d: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_read_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.read.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.read.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_read(PAN_SAM_ACCESS_BYPASS_TIMESTAMP,
++					&state->u.read.args,
++					&state->obj_sec,
++					state->sg_list,
++					state->ucreds,
++					ol_state->sync ?
++						NULL : panfs_shim_read_done,
++					state, NULL,
++					&state->u.read.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_read_done(state, NULL, &state->u.read.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++/*
++ * Callback function for async writes
++ */
++static void
++panfs_shim_write_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_write_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.write.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++/*		state->ol_state.committed = NFS_FILE_SYNC;*/
++		state->ol_state.committed = NFS_UNSTABLE;
++		status = res_p->length;
++		WARN_ON(status < 0);
++
++		objlayout_add_delta_space_used(&state->ol_state,
++					       res_p->delta_capacity_used);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_write rc %u: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_write_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_write_pagelist(struct objlayout_io_state *ol_state,
++		     bool stable /* unused, PanOSD writes are stable */)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.write.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.write.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_write(PAN_SAM_ACCESS_NONE,
++					 &state->u.write.args,
++					 &state->obj_sec,
++					 state->sg_list,
++					 state->ucreds,
++					 ol_state->sync ?
++						NULL : panfs_shim_write_done,
++					 state,
++					 NULL,
++					 &state->u.write.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_write_done(state, NULL, &state->u.write.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++int
++panfs_shim_register(struct panfs_export_operations *ops)
++{
++	if (panfs_export_ops) {
++		printk(KERN_INFO
++		       "%s: panfs already registered (panfs ops %p)\n",
++		       __func__, panfs_export_ops);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: registering panfs ops %p\n",
++	       __func__, ops);
++
++	panfs_export_ops = ops;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_register);
++
++int
++panfs_shim_unregister(void)
++{
++	if (!panfs_export_ops) {
++		printk(KERN_INFO "%s: panfs is not registered\n", __func__);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: unregistering panfs ops %p\n",
++	       __func__, panfs_export_ops);
++
++	panfs_export_ops = NULL;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_unregister);
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++panlayout_get_stripesize(struct pnfs_layout_type *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	dprintk("%s: Begin\n", __func__);
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *panlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)panlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			n *= 8;	/* FIXME: until we have 2-D coalescing */
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zd\n", __func__, maxsz);
++	return maxsz;
++}
++
++#define PANLAYOUT_DEF_STRIPE_UNIT    (64*1024)
++#define PANLAYOUT_DEF_STRIPE_WIDTH   9
++#define PANLAYOUT_MAX_STRIPE_WIDTH   11
++#define PANLAYOUT_MAX_GATHER_STRIPES 8
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++panlayout_get_blocksize(void)
++{
++	ssize_t sz = (PANLAYOUT_MAX_STRIPE_WIDTH-1) *
++		      PANLAYOUT_DEF_STRIPE_UNIT *
++		      PANLAYOUT_MAX_GATHER_STRIPES;
++	dprintk("%s: Return %Zd\n", __func__, sz);
++	return sz;
++}
++
++static struct layoutdriver_policy_operations panlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = panlayout_get_stripesize,
++	.get_blocksize         = panlayout_get_blocksize,
++};
++
++#define PNFS_LAYOUT_PANOSD (NFS4_PNFS_PRIVATE_LAYOUT | LAYOUT_OSD2_OBJECTS)
++
++static struct pnfs_layoutdriver_type panlayout_type = {
++	.id = PNFS_LAYOUT_PANOSD,
++	.name = "PNFS_LAYOUT_PANOSD",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &panlayout_policy_operations,
++};
++
++MODULE_DESCRIPTION("pNFS Layout Driver for Panasas OSDs");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++panlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Registered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++panlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Unregistered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(panlayout_init);
++module_exit(panlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
+@@ -0,0 +1,482 @@
++/*
++ *  panfs_shim.h
++ *
++ *  Data types and external function declerations for interfacing with
++ *  panfs (Panasas DirectFlow) I/O stack
++ *
++ *  Copyright (C) 2007 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#ifndef _PANLAYOUT_PANFS_SHIM_H
++#define _PANLAYOUT_PANFS_SHIM_H
++
++typedef s8 pan_int8_t;
++typedef u8 pan_uint8_t;
++typedef s16 pan_int16_t;
++typedef u16 pan_uint16_t;
++typedef s32 pan_int32_t;
++typedef u32 pan_uint32_t;
++typedef s64 pan_int64_t;
++typedef u64 pan_uint64_t;
++
++/*
++ * from pan_base_types.h
++ */
++typedef  pan_uint64_t pan_rpc_none_t;
++typedef pan_uint32_t  pan_rpc_arrdim_t;
++typedef pan_uint32_t  pan_status_t;
++typedef pan_uint8_t   pan_otw_t;
++typedef pan_uint8_t   pan_pad_t;
++
++typedef pan_uint32_t  pan_timespec_sec_t;
++typedef pan_uint32_t  pan_timespec_nsec_t;
++
++typedef  struct pan_timespec_s  pan_timespec_t;
++struct pan_timespec_s {
++  pan_timespec_sec_t   ts_sec;
++  pan_timespec_nsec_t  ts_nsec;
++};
++
++/*
++ * from pan_std_types.h
++ */
++typedef pan_uint32_t pan_size_t;
++typedef  int  pan_bool_t;
++
++/*
++ * from pan_common_error.h
++ */
++#define PAN_SUCCESS                                         ((pan_status_t)0)
++#define PAN_ERR_IN_PROGRESS                                 ((pan_status_t)55)
++
++/*
++ * from pan_sg.h
++ */
++typedef struct pan_sg_entry_s pan_sg_entry_t;
++struct pan_sg_entry_s {
++  void                  *buffer;       /* pointer to memory */
++  pan_uint32_t           chunk_size;   /* size of each chunk (bytes) */
++  pan_sg_entry_t        *next;
++};
++
++/*
++ * from pan_storage.h
++ */
++typedef pan_uint64_t pan_stor_dev_id_t;
++typedef pan_uint32_t pan_stor_obj_grp_id_t;
++typedef pan_uint64_t pan_stor_obj_uniq_t;
++typedef pan_uint32_t pan_stor_action_t;
++typedef pan_uint8_t pan_stor_cap_key_t[20];
++
++typedef pan_uint8_t pan_stor_key_type_t;
++typedef pan_uint64_t pan_stor_len_t;
++typedef pan_int64_t pan_stor_delta_len_t;
++typedef pan_uint64_t pan_stor_offset_t;
++typedef pan_uint16_t pan_stor_op_t;
++
++typedef pan_uint16_t pan_stor_sec_level_t;
++
++struct pan_stor_obj_id_s {
++  pan_stor_dev_id_t      dev_id;
++  pan_stor_obj_uniq_t    obj_id;
++  pan_stor_obj_grp_id_t  grp_id;
++};
++
++typedef struct pan_stor_obj_id_s pan_stor_obj_id_t;
++
++#define PAN_STOR_OP_NONE ((pan_stor_op_t) 0U)
++#define PAN_STOR_OP_READ ((pan_stor_op_t) 8U)
++#define PAN_STOR_OP_WRITE ((pan_stor_op_t) 9U)
++#define PAN_STOR_OP_APPEND ((pan_stor_op_t) 10U)
++#define PAN_STOR_OP_GETATTR ((pan_stor_op_t) 11U)
++#define PAN_STOR_OP_SETATTR ((pan_stor_op_t) 12U)
++#define PAN_STOR_OP_FLUSH ((pan_stor_op_t) 13U)
++#define PAN_STOR_OP_CLEAR ((pan_stor_op_t) 14U)
++
++/*
++ * from pan_aggregation_map.h
++ */
++typedef pan_uint8_t pan_agg_type_t;
++typedef pan_uint64_t pan_agg_map_version_t;
++typedef pan_uint8_t pan_agg_obj_state_t;
++typedef pan_uint8_t pan_agg_comp_state_t;
++typedef pan_uint8_t pan_agg_comp_flag_t;
++
++#define PAN_AGG_OBJ_STATE_INVALID ((pan_agg_obj_state_t) 0x00)
++#define PAN_AGG_OBJ_STATE_NORMAL ((pan_agg_obj_state_t) 0x01)
++#define PAN_AGG_OBJ_STATE_DEGRADED ((pan_agg_obj_state_t) 0x02)
++#define PAN_AGG_OBJ_STATE_RECONSTRUCT ((pan_agg_obj_state_t) 0x03)
++#define PAN_AGG_OBJ_STATE_COPYBACK ((pan_agg_obj_state_t) 0x04)
++#define PAN_AGG_OBJ_STATE_UNAVAILABLE ((pan_agg_obj_state_t) 0x05)
++#define PAN_AGG_OBJ_STATE_CREATING ((pan_agg_obj_state_t) 0x06)
++#define PAN_AGG_OBJ_STATE_DELETED ((pan_agg_obj_state_t) 0x07)
++#define PAN_AGG_COMP_STATE_INVALID ((pan_agg_comp_state_t) 0x00)
++#define PAN_AGG_COMP_STATE_NORMAL ((pan_agg_comp_state_t) 0x01)
++#define PAN_AGG_COMP_STATE_UNAVAILABLE ((pan_agg_comp_state_t) 0x02)
++#define PAN_AGG_COMP_STATE_COPYBACK ((pan_agg_comp_state_t) 0x03)
++#define PAN_AGG_COMP_F_NONE ((pan_agg_comp_flag_t) 0x00)
++#define PAN_AGG_COMP_F_ATTR_STORING ((pan_agg_comp_flag_t) 0x01)
++#define PAN_AGG_COMP_F_OBJ_CORRUPT_OBS ((pan_agg_comp_flag_t) 0x02)
++#define PAN_AGG_COMP_F_TEMP ((pan_agg_comp_flag_t) 0x04)
++
++struct pan_aggregation_map_s {
++  pan_agg_map_version_t  version;
++  pan_agg_obj_state_t    avail_state;
++  pan_stor_obj_id_t      obj_id;
++};
++
++typedef struct pan_aggregation_map_s pan_aggregation_map_t;
++
++struct pan_agg_comp_obj_s {
++  pan_stor_dev_id_t     dev_id;
++  pan_agg_comp_state_t  avail_state;
++  pan_agg_comp_flag_t   comp_flags;
++};
++
++typedef struct pan_agg_comp_obj_s pan_agg_comp_obj_t;
++
++struct pan_agg_simple_header_s {
++  pan_uint8_t  unused;
++};
++
++typedef struct pan_agg_simple_header_s pan_agg_simple_header_t;
++
++struct pan_agg_raid1_header_s {
++  pan_uint16_t  num_comps;
++};
++
++typedef struct pan_agg_raid1_header_s pan_agg_raid1_header_t;
++
++struct pan_agg_raid0_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++};
++
++typedef struct pan_agg_raid0_header_s pan_agg_raid0_header_t;
++
++struct pan_agg_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit0;
++  pan_uint32_t  stripe_unit1;
++  pan_uint32_t  stripe_unit2;
++};
++
++typedef struct pan_agg_raid5_left_header_s pan_agg_raid5_left_header_t;
++
++typedef struct pan_agg_grp_raid5_left_header_s pan_agg_grp_raid5_left_header_t;
++
++struct pan_agg_grp_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++  pan_uint16_t  rg_width;
++  pan_uint16_t  rg_depth;
++  pan_uint8_t   group_layout_policy;
++};
++
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_INVALID ((pan_uint8_t) 0x00)
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN ((pan_uint8_t) 0x01)
++
++#define PAN_AGG_NULL_MAP ((pan_agg_type_t) 0x00)
++#define PAN_AGG_SIMPLE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_RAID1 ((pan_agg_type_t) 0x02)
++#define PAN_AGG_RAID0 ((pan_agg_type_t) 0x03)
++#define PAN_AGG_RAID5_LEFT ((pan_agg_type_t) 0x04)
++#define PAN_AGG_GRP_RAID5_LEFT ((pan_agg_type_t) 0x06)
++#define PAN_AGG_MINTYPE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_MAXTYPE ((pan_agg_type_t) 0x06)
++
++struct pan_agg_layout_hdr_s {
++  pan_agg_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_uint64_t                        null;
++    pan_agg_simple_header_t             simple;
++    pan_agg_raid1_header_t              raid1;
++    pan_agg_raid0_header_t              raid0;
++    pan_agg_raid5_left_header_t         raid5_left;
++    pan_agg_grp_raid5_left_header_t     grp_raid5_left;
++  } hdr;
++};
++
++typedef struct pan_agg_layout_hdr_s pan_agg_layout_hdr_t;
++
++struct pan_agg_comp_obj_a_s {
++  pan_rpc_arrdim_t size;
++  pan_agg_comp_obj_t *data;
++};
++typedef struct pan_agg_comp_obj_a_s pan_agg_comp_obj_a;
++
++struct pan_agg_full_map_s {
++  pan_aggregation_map_t  map_hdr;
++  pan_agg_layout_hdr_t   layout_hdr;
++  pan_agg_comp_obj_a     components;
++};
++
++typedef struct pan_agg_full_map_s pan_agg_full_map_t;
++
++/*
++ * from pan_obsd_rpc_types.h
++ */
++typedef pan_uint8_t pan_obsd_security_key_a[16];
++
++typedef pan_uint8_t pan_obsd_capability_key_a[20];
++
++typedef pan_uint8_t pan_obsd_key_holder_id_t;
++
++#define PAN_OBSD_KEY_HOLDER_BASIS_KEY ((pan_obsd_key_holder_id_t) 0x01)
++#define PAN_OBSD_KEY_HOLDER_CAP_KEY ((pan_obsd_key_holder_id_t) 0x02)
++
++struct pan_obsd_key_holder_s {
++  pan_obsd_key_holder_id_t select;
++  pan_pad_t pad[3];
++  union {
++    pan_obsd_security_key_a    basis_key;
++    pan_obsd_capability_key_a  cap_key;
++  } key;
++};
++
++typedef struct pan_obsd_key_holder_s pan_obsd_key_holder_t;
++
++/*
++ * from pan_sm_sec.h
++ */
++typedef pan_uint8_t pan_sm_sec_type_t;
++typedef pan_uint8_t pan_sm_sec_otw_allo_mode_t;
++
++struct pan_obsd_capability_generic_otw_t_s {
++  pan_rpc_arrdim_t size;
++  pan_uint8_t *data;
++};
++typedef struct pan_obsd_capability_generic_otw_t_s
++				pan_obsd_capability_generic_otw_t;
++
++struct pan_sm_sec_obsd_s {
++  pan_obsd_key_holder_t              key;
++  pan_obsd_capability_generic_otw_t  cap_otw;
++  pan_sm_sec_otw_allo_mode_t         allo_mode;
++};
++
++typedef struct pan_sm_sec_obsd_s pan_sm_sec_obsd_t;
++
++struct pan_sm_sec_s {
++  pan_sm_sec_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_rpc_none_t     none;
++    pan_sm_sec_obsd_t  obsd;
++  } variant;
++};
++
++typedef struct pan_sm_sec_s pan_sm_sec_t;
++
++struct pan_sm_sec_a_s {
++  pan_rpc_arrdim_t size;
++  pan_sm_sec_t *data;
++};
++typedef struct pan_sm_sec_a_s pan_sm_sec_a;
++typedef pan_otw_t *pan_sm_sec_otw_t;
++
++/*
++ * from pan_sm_types.h
++ */
++typedef pan_uint64_t pan_sm_cap_handle_t;
++
++struct pan_sm_map_cap_s {
++  pan_agg_full_map_t   full_map;
++  pan_stor_offset_t    offset;
++  pan_stor_len_t       length;
++  pan_sm_sec_a         secs;
++  pan_sm_cap_handle_t  handle;
++  pan_timespec_t       expiration_time;
++  pan_stor_action_t    action_mask;
++  pan_uint32_t         flags;
++};
++
++typedef struct pan_sm_map_cap_s pan_sm_map_cap_t;
++
++/*
++ * from pan_sm_ops.h
++ */
++typedef pan_rpc_none_t pan_sm_cache_ptr_t;
++
++/*
++ * from pan_sam_api.h
++ */
++typedef pan_uint32_t    pan_sam_access_flags_t;
++
++typedef struct pan_sam_dev_error_s  pan_sam_dev_error_t;
++struct pan_sam_dev_error_s {
++    pan_stor_dev_id_t       dev_id;
++    pan_stor_op_t           stor_op;
++    pan_status_t            error;
++};
++
++typedef struct pan_sam_ext_status_s pan_sam_ext_status_t;
++struct pan_sam_ext_status_s {
++    pan_uint32_t        available;
++    pan_uint32_t        size;
++    pan_sam_dev_error_t *errors;
++};
++
++enum pan_sam_rpc_sec_sel_e {
++    PAN_SAM_RPC_SEC_DEFAULT,
++    PAN_SAM_RPC_SEC_ATLEAST,
++    PAN_SAM_RPC_SEC_EXACTLY
++};
++typedef enum pan_sam_rpc_sec_sel_e pan_sam_rpc_sec_sel_t;
++
++typedef struct pan_sam_obj_sec_s pan_sam_obj_sec_t;
++struct pan_sam_obj_sec_s {
++    pan_stor_sec_level_t    min_security;
++    pan_sm_map_cap_t        *map_ccaps;
++};
++
++typedef struct  pan_sam_rpc_sec_s   pan_sam_rpc_sec_t;
++struct pan_sam_rpc_sec_s {
++    pan_sam_rpc_sec_sel_t   selector;
++};
++
++typedef struct pan_sam_read_args_s pan_sam_read_args_t;
++struct pan_sam_read_args_s {
++    pan_stor_obj_id_t                obj_id;
++    pan_sm_cache_ptr_t               obj_ent;
++    void                            *return_attr;
++    void                            *checksum;
++    pan_stor_offset_t                offset;
++    pan_uint16_t                     sm_options;
++    void                            *callout;
++    void                            *callout_arg;
++};
++
++typedef struct pan_sam_read_res_s pan_sam_read_res_t;
++struct pan_sam_read_res_s {
++    pan_status_t             result;
++    pan_sam_ext_status_t     ext_status;
++    pan_stor_len_t           length;
++    void                    *attr;
++    void                    *checksum;
++};
++
++typedef void (*pan_sam_read_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_read_res_t  *res_p,
++    pan_status_t        status);
++
++#define PAN_SAM_ACCESS_NONE                             0x0000
++#define PAN_SAM_ACCESS_BYPASS_TIMESTAMP                 0x0020
++
++typedef struct pan_sam_write_args_s pan_sam_write_args_t;
++struct pan_sam_write_args_s {
++    pan_stor_obj_id_t   obj_id;
++    pan_sm_cache_ptr_t  obj_ent;
++    pan_stor_offset_t   offset;
++    void                *attr;
++    void                *return_attr;
++};
++
++typedef struct pan_sam_write_res_s pan_sam_write_res_t;
++struct pan_sam_write_res_s {
++    pan_status_t            result;
++    pan_sam_ext_status_t    ext_status;
++    pan_stor_len_t          length;
++    pan_stor_delta_len_t    delta_capacity_used;
++    pan_bool_t              parity_dirty;
++    void                   *attr;
++};
++
++typedef void (*pan_sam_write_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_write_res_t *res_p,
++    pan_status_t        status);
++
++/*
++ * from pan_mgr_types.h
++ */
++#define PAN_MGR_ID_TYPE_SHIFT 56
++#define PAN_MGR_ID_TYPE_MASK ((pan_mgr_id_t)18374686479671623680ULL)
++#define PAN_MGR_ID_UNIQ_MASK ((pan_mgr_id_t)72057594037927935ULL)
++
++typedef pan_uint16_t pan_mgr_type_t;
++typedef pan_uint64_t pan_mgr_id_t;
++
++#define PAN_MGR_SM ((pan_mgr_type_t) 2U)
++#define PAN_MGR_OBSD ((pan_mgr_type_t) 6U)
++
++/*
++ * from pan_mgr_types_c.h
++ */
++#define pan_mgr_id_construct_artificial(_mgr_type_, _mgr_uniq_, _mgr_id_p_) { \
++  pan_mgr_id_t  _id1, _id2; \
++\
++  _id1 = (_mgr_type_); \
++  _id1 <<= PAN_MGR_ID_TYPE_SHIFT; \
++  _id1 &= PAN_MGR_ID_TYPE_MASK; \
++  _id2 = (_mgr_uniq_); \
++  _id2 &= PAN_MGR_ID_UNIQ_MASK; \
++  _id1 |= _id2; \
++  *(_mgr_id_p_) = _id1; \
++}
++
++/*
++ * from pan_storage_c.h
++ */
++#define pan_stor_is_device_id_an_obsd_id(_device_id_) \
++    ((((_device_id_) & PAN_MGR_ID_TYPE_MASK) >> PAN_MGR_ID_TYPE_SHIFT) \
++	== PAN_MGR_OBSD)
++
++/*
++ * pnfs_shim internal definitions
++ */
++
++struct panfs_shim_io_state {
++	struct objlayout_io_state ol_state;
++
++	pan_sg_entry_t *sg_list;
++	pan_sam_obj_sec_t obj_sec;
++	void *ucreds;
++	union {
++		struct {
++			pan_sam_read_args_t args;
++			pan_sam_read_res_t res;
++		} read;
++		struct {
++			pan_sam_write_args_t args;
++			pan_sam_write_res_t res;
++		} write;
++	} u;
++};
++
++#endif /* _PANLAYOUT_PANFS_SHIM_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
+@@ -0,0 +1,435 @@
++/*
++ *  pnfs_osd_xdr.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/pnfs_osd_xdr.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/*
++ * The following implementation is based on these Internet Drafts:
++ *
++ * draft-ietf-nfsv4-minorversion-21
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_objid(u32 *p, struct pnfs_osd_objid *objid)
++{
++	COPYMEM(objid->oid_device_id.data, sizeof(objid->oid_device_id.data));
++	READ64(objid->oid_partition_id);
++	READ64(objid->oid_object_id);
++	return p;
++}
++
++static inline u32 *
++pnfs_osd_xdr_decode_opaque_cred(u32 *p,
++				struct pnfs_osd_opaque_cred *opaque_cred)
++{
++	READ32(opaque_cred->cred_len);
++	COPYMEM(opaque_cred->cred, opaque_cred->cred_len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_object_cred(u32 *p, struct pnfs_osd_object_cred *comp,
++				u8 **credp)
++{
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_objid(p, &comp->oc_object_id);
++	READ32(comp->oc_osd_version);
++	READ32(comp->oc_cap_key_sec);
++
++	cred = *credp;
++	comp->oc_cap_key.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap_key);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap_key.cred_len));
++	comp->oc_cap.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap.cred_len));
++	*credp = cred;
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_data_map(u32 *p, struct pnfs_osd_data_map *data_map)
++{
++	READ32(data_map->odm_num_comps);
++	READ64(data_map->odm_stripe_unit);
++	READ32(data_map->odm_group_width);
++	READ32(data_map->odm_group_depth);
++	READ32(data_map->odm_mirror_cnt);
++	READ32(data_map->odm_raid_algorithm);
++	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
++		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
++		__func__,
++		data_map->odm_num_comps,
++		(unsigned long long)data_map->odm_stripe_unit,
++		data_map->odm_group_width,
++		data_map->odm_group_depth,
++		data_map->odm_mirror_cnt,
++		data_map->odm_raid_algorithm);
++	return p;
++}
++
++struct pnfs_osd_layout *
++pnfs_osd_xdr_decode_layout(struct pnfs_osd_layout *layout, u32 *p)
++{
++	int i;
++	u32 *start = p;
++	struct pnfs_osd_object_cred *comp;
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_data_map(p, &layout->olo_map);
++	READ32(layout->olo_comps_index);
++	READ32(layout->olo_num_comps);
++	layout->olo_comps = (struct pnfs_osd_object_cred *)(layout + 1);
++	comp = layout->olo_comps;
++	cred = (u8 *)(comp + layout->olo_num_comps);
++	dprintk("%s: comps_index=%u num_comps=%u\n",
++		__func__, layout->olo_comps_index, layout->olo_num_comps);
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		p = pnfs_osd_xdr_decode_object_cred(p, comp, &cred);
++		dprintk("%s: comp[%d]=dev(%llx:%llx) par=0x%llx obj=0x%llx "
++			"key_len=%u cap_len=%u\n",
++			__func__, i,
++			_DEVID_LO(&comp->oc_object_id.oid_device_id),
++			_DEVID_HI(&comp->oc_object_id.oid_device_id),
++			comp->oc_object_id.oid_partition_id,
++			comp->oc_object_id.oid_object_id,
++			comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
++		comp++;
++	}
++	dprintk("%s: xdr_size=%Zd end=%p in_core_size=%Zd\n", __func__,
++	       (char *)p - (char *)start, cred, (char *)cred - (char *)layout);
++	return layout;
++}
++
++/*
++ * Get Device Information Decoding
++ *
++ * Note: since Device Information is currently done synchronously, most
++ *       of the actual fields are left inside the rpc buffer and are only
++ *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
++ *       should not be freed while the returned information is in use.
++ */
++
++u32 *__xdr_read_calc_nfs4_string(
++	u32 *p, struct nfs4_string *str, u8 **freespace)
++{
++	u32 len;
++	char *data;
++	bool need_copy;
++
++	READ32(len);
++	data = (char *)p;
++
++	if (data[len]) { /* Not null terminated we'll need extra space */
++		data = *freespace;
++		*freespace += len + 1;
++		need_copy = true;
++	} else {
++		need_copy = false;
++	}
++
++	if (str) {
++		str->len = len;
++		str->data = data;
++		if (need_copy) {
++			memcpy(data, p, len);
++			data[len] = 0;
++		}
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++u32 *__xdr_read_calc_u8_opaque(
++	u32 *p, struct nfs4_string *str)
++{
++	u32 len;
++
++	READ32(len);
++
++	if (str) {
++		str->len = len;
++		str->data = (char *)p;
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetid {
++ * 	u32			oti_type;
++ * 	struct nfs4_string	oti_scsi_device_id;
++ * };
++ */
++u32 *__xdr_read_calc_targetid(
++	u32 *p, struct pnfs_osd_targetid* targetid, u8 **freespace)
++{
++	u32 oti_type;
++
++	READ32(oti_type);
++	if (targetid)
++		targetid->oti_type = oti_type;
++
++	switch (oti_type) {
++	case OBJ_TARGET_SCSI_NAME:
++	case OBJ_TARGET_SCSI_DEVICE_ID:
++		p = __xdr_read_calc_u8_opaque(p,
++			targetid ? &targetid->oti_scsi_device_id : NULL);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_net_addr {
++ * 	struct nfs4_string	r_netid;
++ * 	struct nfs4_string	r_addr;
++ * };
++ */
++u32 *__xdr_read_calc_net_addr(
++	u32 *p, struct pnfs_osd_net_addr* netaddr, u8 **freespace)
++{
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_netid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_addr : NULL,
++			freespace);
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetaddr {
++ * 	u32				ota_available;
++ * 	struct pnfs_osd_net_addr	ota_netaddr;
++ * };
++ */
++u32 *__xdr_read_calc_targetaddr(
++	u32 *p, struct pnfs_osd_targetaddr *targetaddr, u8 **freespace)
++{
++	u32 ota_available;
++
++	READ32(ota_available);
++	if (targetaddr)
++		targetaddr->ota_available = ota_available;
++
++	if (ota_available) {
++		p = __xdr_read_calc_net_addr(p,
++				targetaddr ? &targetaddr->ota_netaddr : NULL,
++				freespace);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++u32 *__xdr_read_calc_deviceaddr(
++	u32 *p, struct pnfs_osd_deviceaddr *deviceaddr, u8 **freespace)
++{
++	p = __xdr_read_calc_targetid(p,
++			deviceaddr ? &deviceaddr->oda_targetid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_targetaddr(p,
++			deviceaddr ? &deviceaddr->oda_targetaddr : NULL,
++			freespace);
++
++	if (deviceaddr)
++		COPYMEM(deviceaddr->oda_lun, sizeof(deviceaddr->oda_lun));
++	else
++		p += XDR_QUADLEN(sizeof(deviceaddr->oda_lun));
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_systemid : NULL);
++
++	if (deviceaddr) {
++		p = pnfs_osd_xdr_decode_object_cred(p,
++				&deviceaddr->oda_root_obj_cred, freespace);
++	} else {
++		*freespace += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_osdname : NULL);
++
++	return p;
++}
++
++size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p)
++{
++	u8 *null_freespace = NULL;
++	size_t sz;
++
++	__xdr_read_calc_deviceaddr(p, NULL, &null_freespace);
++	sz = sizeof(struct pnfs_osd_deviceaddr) + (size_t)null_freespace;
++
++	return sz;
++}
++
++void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p)
++{
++	u8 *freespace = (u8 *)(deviceaddr + 1);
++
++	__xdr_read_calc_deviceaddr(p, deviceaddr, &freespace);
++}
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou)
++{
++	__be32 *p = xdr_reserve_space(xdr, 16);
++
++	if (!p)
++		return -E2BIG;
++
++	*p++ = cpu_to_be32(lou->dsu_valid);
++	if (lou->dsu_valid)
++		p = xdr_encode_hyper(p, lou->dsu_delta);
++	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ */
++static inline int pnfs_osd_xdr_encode_objid(struct xdr_stream *xdr,
++					    struct pnfs_osd_objid *object_id)
++{
++	__be32 *p;
++
++	p = xdr_reserve_space(xdr, 32);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
++				    sizeof(object_id->oid_device_id.data));
++	p = xdr_encode_hyper(p, object_id->oid_partition_id);
++	p = xdr_encode_hyper(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++int pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr,
++			      struct pnfs_osd_ioerr *ioerr)
++{
++	__be32 *p;
++	int ret;
++
++	ret = pnfs_osd_xdr_encode_objid(xdr, &ioerr->oer_component);
++	if (ret)
++		return ret;
++
++	p = xdr_reserve_space(xdr, 24);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
++	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
++	*p++ = cpu_to_be32(ioerr->oer_iswrite);
++	*p   = cpu_to_be32(ioerr->oer_errno);
++
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ 
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ static struct kmem_cache *nfs_page_cachep;
+ 
+@@ -56,7 +57,8 @@ nfs_page_free(struct nfs_page *p)
+ struct nfs_page *
+ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ 		   struct page *page,
+-		   unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page		*req;
+ 
+@@ -79,7 +81,11 @@ nfs_create_request(struct nfs_open_conte
+ 	req->wb_pgbase	= offset;
+ 	req->wb_bytes   = count;
+ 	req->wb_context = get_nfs_open_context(ctx);
++	req->wb_lock_context = nfs_get_lock_context(ctx);
+ 	kref_init(&req->wb_kref);
++	req->wb_lseg    = lseg;
++	if (lseg)
++		get_lseg(lseg);
+ 	return req;
+ }
+ 
+@@ -141,18 +147,26 @@ void nfs_clear_request(struct nfs_page *
+ {
+ 	struct page *page = req->wb_page;
+ 	struct nfs_open_context *ctx = req->wb_context;
++	struct nfs_lock_context *l_ctx = req->wb_lock_context;
+ 
+ 	if (page != NULL) {
+ 		page_cache_release(page);
+ 		req->wb_page = NULL;
+ 	}
++	if (l_ctx != NULL) {
++		nfs_put_lock_context(l_ctx);
++		req->wb_lock_context = NULL;
++	}
+ 	if (ctx != NULL) {
+ 		put_nfs_open_context(ctx);
+ 		req->wb_context = NULL;
+ 	}
++	if (req->wb_lseg != NULL) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
+ }
+ 
+-
+ /**
+  * nfs_release_request - Release the count on an NFS read/write request
+  * @req: request to release
+@@ -231,11 +245,12 @@ void nfs_pageio_init(struct nfs_pageio_d
+  * Return 'true' if this is the case, else return 'false'.
+  */
+ static int nfs_can_coalesce_requests(struct nfs_page *prev,
+-				     struct nfs_page *req)
++				     struct nfs_page *req,
++				     struct nfs_pageio_descriptor *pgio)
+ {
+ 	if (req->wb_context->cred != prev->wb_context->cred)
+ 		return 0;
+-	if (req->wb_context->lockowner != prev->wb_context->lockowner)
++	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
+ 		return 0;
+ 	if (req->wb_context->state != prev->wb_context->state)
+ 		return 0;
+@@ -245,6 +260,12 @@ static int nfs_can_coalesce_requests(str
+ 		return 0;
+ 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ 		return 0;
++	if (req->wb_lseg != prev->wb_lseg)
++		return 0;
++#ifdef CONFIG_NFS_V4_1
++	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
++		return 0;
++#endif /* CONFIG_NFS_V4_1 */
+ 	return 1;
+ }
+ 
+@@ -277,7 +298,7 @@ static int nfs_pageio_do_add_request(str
+ 		if (newlen > desc->pg_bsize)
+ 			return 0;
+ 		prev = nfs_list_entry(desc->pg_list.prev);
+-		if (!nfs_can_coalesce_requests(prev, req))
++		if (!nfs_can_coalesce_requests(prev, req, desc))
+ 			return 0;
+ 	} else
+ 		desc->pg_base = req->wb_pgbase;
+@@ -366,6 +387,7 @@ void nfs_pageio_cond_complete(struct nfs
+  * @idx_start: lower bound of page->index to scan
+  * @npages: idx_start + npages sets the upper bound to scan.
+  * @tag: tag to scan for
++ * @use_pnfs: will be set TRUE if commit needs to be handled by layout driver
+  *
+  * Moves elements from one of the inode request lists.
+  * If the number of requests is set to 0, the entire address_space
+@@ -375,7 +397,7 @@ void nfs_pageio_cond_complete(struct nfs
+  */
+ int nfs_scan_list(struct nfs_inode *nfsi,
+ 		struct list_head *dst, pgoff_t idx_start,
+-		unsigned int npages, int tag)
++		  unsigned int npages, int tag, int *use_pnfs)
+ {
+ 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ 	struct nfs_page *req;
+@@ -406,6 +428,8 @@ int nfs_scan_list(struct nfs_inode *nfsi
+ 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+ 						req->wb_index, tag);
+ 				nfs_list_add_request(req, dst);
++				if (req->wb_lseg)
++					*use_pnfs = 1;
+ 				res++;
+ 				if (res == INT_MAX)
+ 					goto out;
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
+@@ -0,0 +1,2027 @@
++/*
++ *  linux/fs/nfs/pnfs.c
++ *
++ *  pNFS functions to call and manage layout drivers.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/smp_lock.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_mount.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/rculist.h>
++
++#include "internal.h"
++#include "nfs4_fs.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS
++
++#define MIN_POOL_LC		(4)
++
++static int pnfs_initialized;
++
++static void pnfs_free_layout(struct pnfs_layout_type *lo,
++			     struct nfs4_pnfs_layout_segment *range);
++static inline void get_layout(struct pnfs_layout_type *lo);
++
++/* Locking:
++ *
++ * pnfs_spinlock:
++ * 	protects pnfs_modules_tbl.
++ */
++static spinlock_t pnfs_spinlock = __SPIN_LOCK_UNLOCKED(pnfs_spinlock);
++
++/*
++ * pnfs_modules_tbl holds all pnfs modules
++ */
++static struct list_head	pnfs_modules_tbl;
++static struct kmem_cache *pnfs_cachep;
++static mempool_t *pnfs_layoutcommit_mempool;
++
++static inline struct pnfs_layoutcommit_data *pnfs_layoutcommit_alloc(void)
++{
++	struct pnfs_layoutcommit_data *p =
++			mempool_alloc(pnfs_layoutcommit_mempool, GFP_NOFS);
++	if (p)
++		memset(p, 0, sizeof(*p));
++
++	return p;
++}
++
++void pnfs_layoutcommit_free(struct pnfs_layoutcommit_data *p)
++{
++	mempool_free(p, pnfs_layoutcommit_mempool);
++}
++
++/*
++ * struct pnfs_module - One per pNFS device module.
++ */
++struct pnfs_module {
++	struct pnfs_layoutdriver_type *pnfs_ld_type;
++	struct list_head        pnfs_tblid;
++};
++
++int
++pnfs_initialize(void)
++{
++	INIT_LIST_HEAD(&pnfs_modules_tbl);
++
++	pnfs_cachep = kmem_cache_create("pnfs_layoutcommit_data",
++					sizeof(struct pnfs_layoutcommit_data),
++					0, SLAB_HWCACHE_ALIGN, NULL);
++	if (pnfs_cachep == NULL)
++		return -ENOMEM;
++
++	pnfs_layoutcommit_mempool = mempool_create(MIN_POOL_LC,
++						   mempool_alloc_slab,
++						   mempool_free_slab,
++						   pnfs_cachep);
++	if (pnfs_layoutcommit_mempool == NULL) {
++		kmem_cache_destroy(pnfs_cachep);
++		return -ENOMEM;
++	}
++
++	pnfs_initialized = 1;
++	return 0;
++}
++
++void pnfs_uninitialize(void)
++{
++	mempool_destroy(pnfs_layoutcommit_mempool);
++	kmem_cache_destroy(pnfs_cachep);
++}
++
++/* search pnfs_modules_tbl for right pnfs module */
++static int
++find_pnfs(u32 id, struct pnfs_module **module) {
++	struct  pnfs_module *local = NULL;
++
++	dprintk("PNFS: %s: Searching for %u\n", __func__, id);
++	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) {
++		if (local->pnfs_ld_type->id == id) {
++			*module = local;
++			return(1);
++		}
++	}
++	return 0;
++}
++
++/* Set lo_cred to indicate we require a layoutcommit
++ * If we don't even have a layout, we don't need to commit it.
++ */
++void
++pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
++{
++	dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (has_layout(nfsi) &&
++	    !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state)) {
++		nfsi->layout->lo_cred = get_rpccred(ctx->state->owner->so_cred);
++		__set_bit(NFS_INO_LAYOUTCOMMIT,
++			  &nfsi->layout->pnfs_layout_state);
++		nfsi->change_attr++;
++		spin_unlock(&nfsi->vfs_inode.i_lock);
++		dprintk("%s: Set layoutcommit\n", __func__);
++		return;
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Update last_write_offset for layoutcommit.
++ * TODO: We should only use commited extents, but the current nfs
++ * implementation does not calculate the written range in nfs_commit_done.
++ * We therefore update this field in writeback_done.
++ */
++void
++pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
++{
++	loff_t end_pos;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (offset < nfsi->layout->pnfs_write_begin_pos)
++		nfsi->layout->pnfs_write_begin_pos = offset;
++	end_pos = offset + extent - 1; /* I'm being inclusive */
++	if (end_pos > nfsi->layout->pnfs_write_end_pos)
++		nfsi->layout->pnfs_write_end_pos = end_pos;
++	dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
++		__func__,
++		(unsigned long) extent,
++		(unsigned long) offset ,
++		(unsigned long) nfsi->layout->pnfs_write_begin_pos,
++		(unsigned long) nfsi->layout->pnfs_write_end_pos);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Unitialize a mountpoint in a layout driver */
++void
++unmount_pnfs_layoutdriver(struct nfs_server *nfss)
++{
++	if (PNFS_EXISTS_LDIO_OP(nfss, uninitialize_mountpoint))
++		nfss->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint(nfss);
++}
++
++/*
++ * Set the server pnfs module to the first registered pnfs_type.
++ * Only one pNFS layout driver is supported.
++ */
++void
++set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
++		      u32 id)
++{
++	struct pnfs_module *mod = NULL;
++
++	if (server->pnfs_curr_ld)
++		return;
++
++	if (!find_pnfs(id, &mod)) {
++		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
++		find_pnfs(id, &mod);
++	}
++
++	if (!mod) {
++		dprintk("%s: No pNFS module found for %u. ", __func__, id);
++		goto out_err;
++	}
++
++	server->pnfs_curr_ld = mod->pnfs_ld_type;
++	if (mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
++							server, mntfh)) {
++		printk(KERN_ERR "%s: Error initializing mount point "
++		       "for layout driver %u. ", __func__, id);
++		goto out_err;
++	}
++
++	dprintk("%s: pNFS module for %u set\n", __func__, id);
++	return;
++
++out_err:
++	dprintk("Using NFSv4 I/O\n");
++	server->pnfs_curr_ld = NULL;
++}
++
++/* Allow I/O module to set its functions structure */
++struct pnfs_client_operations*
++pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++	struct layoutdriver_io_operations *io_ops = ld_type->ld_io_ops;
++
++	if (!pnfs_initialized) {
++		printk(KERN_ERR "%s Registration failure. "
++		       "pNFS not initialized.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops || !io_ops->alloc_layout || !io_ops->free_layout) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_layout and free_layout.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->alloc_lseg || !io_ops->free_lseg) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_lseg and free_lseg.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->read_pagelist || !io_ops->write_pagelist ||
++	    !io_ops->commit) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "read_pagelist, write_pagelist, and commit.\n",
++		       __func__);
++		return NULL;
++	}
++
++	pnfs_mod = kmalloc(sizeof(struct pnfs_module), GFP_KERNEL);
++	if (pnfs_mod != NULL) {
++		dprintk("%s Registering id:%u name:%s\n",
++			__func__,
++			ld_type->id,
++			ld_type->name);
++		pnfs_mod->pnfs_ld_type = ld_type;
++		INIT_LIST_HEAD(&pnfs_mod->pnfs_tblid);
++
++		spin_lock(&pnfs_spinlock);
++		list_add(&pnfs_mod->pnfs_tblid, &pnfs_modules_tbl);
++		spin_unlock(&pnfs_spinlock);
++	}
++
++	return &pnfs_ops;
++}
++
++/*  Allow I/O module to set its functions structure */
++void
++pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++
++	if (find_pnfs(ld_type->id, &pnfs_mod)) {
++		dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
++		spin_lock(&pnfs_spinlock);
++		list_del(&pnfs_mod->pnfs_tblid);
++		spin_unlock(&pnfs_spinlock);
++		kfree(pnfs_mod);
++	}
++}
++
++/*
++ * pNFS client layout cache
++ */
++#if defined(CONFIG_SMP)
++#define BUG_ON_UNLOCKED_INO(ino) \
++	BUG_ON(!spin_is_locked(&ino->i_lock))
++#define BUG_ON_UNLOCKED_LO(lo) \
++	BUG_ON_UNLOCKED_INO(PNFS_INODE(lo))
++#else /* CONFIG_SMP */
++#define BUG_ON_UNLOCKED_INO(lo) do {} while (0)
++#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
++#endif /* CONFIG_SMP */
++
++static inline void
++get_layout(struct pnfs_layout_type *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	lo->refcount++;
++}
++
++static inline void
++put_layout_locked(struct pnfs_layout_type *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	BUG_ON(lo->refcount <= 0);
++
++	lo->refcount--;
++	if (!lo->refcount) {
++		struct layoutdriver_io_operations *io_ops = PNFS_LD_IO_OPS(lo);
++		struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++		dprintk("%s: freeing layout cache %p\n", __func__, lo);
++		WARN_ON(!list_empty(&lo->lo_layouts));
++		io_ops->free_layout(lo);
++		nfsi->layout = NULL;
++	}
++}
++
++void
++put_layout(struct inode *inode)
++{
++	spin_lock(&inode->i_lock);
++	put_layout_locked(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++
++}
++
++void
++pnfs_layout_release(struct pnfs_layout_type *lo,
++		    struct nfs4_pnfs_layout_segment *range)
++{
++	struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (range)
++		pnfs_free_layout(lo, range);
++	/*
++	 * Matched in _pnfs_update_layout for layoutget
++	 * and by get_layout in _pnfs_return_layout for layoutreturn
++	 */
++	put_layout_locked(lo);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	wake_up_all(&nfsi->lo_waitq);
++}
++
++void
++pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++	struct pnfs_layout_type *lo;
++	struct nfs4_pnfs_layout_segment range = {
++		.iomode = IOMODE_ANY,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	lo = nfsi->layout;
++	if (lo) {
++		pnfs_free_layout(lo, &range);
++		WARN_ON(!list_empty(&nfsi->layout->segs));
++		WARN_ON(!list_empty(&nfsi->layout->lo_layouts));
++
++		if (nfsi->layout->refcount != 1)
++			printk(KERN_WARNING "%s: layout refcount not=1 %d\n",
++				__func__, nfsi->layout->refcount);
++		WARN_ON(nfsi->layout->refcount != 1);
++
++		/* Matched by refcount set to 1 in alloc_init_layout */
++		put_layout_locked(lo);
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/*
++ * Called by the state manger to remove all layouts established under an
++ * expired lease.
++ */
++void
++pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++	struct pnfs_layout_type *lo;
++
++	while (!list_empty(&clp->cl_layouts)) {
++		lo = list_entry(clp->cl_layouts.next, struct pnfs_layout_type,
++				lo_layouts);
++		dprintk("%s freeing layout for inode %lu\n", __func__,
++			lo->lo_inode->i_ino);
++		pnfs_destroy_layout(NFS_I(lo->lo_inode));
++	}
++}
++
++static inline void
++init_lseg(struct pnfs_layout_type *lo, struct pnfs_layout_segment *lseg)
++{
++	INIT_LIST_HEAD(&lseg->fi_list);
++	kref_init(&lseg->kref);
++	lseg->valid = true;
++	lseg->layout = lo;
++}
++
++static void
++destroy_lseg(struct kref *kref)
++{
++	struct pnfs_layout_segment *lseg =
++		container_of(kref, struct pnfs_layout_segment, kref);
++
++	dprintk("--> %s\n", __func__);
++	/* Matched by get_layout in pnfs_insert_layout */
++	put_layout_locked(lseg->layout);
++	PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
++}
++
++static void
++put_lseg_locked(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	kref_put(&lseg->kref, destroy_lseg);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++
++void
++put_lseg(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	kref_put(&lseg->kref, destroy_lseg);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++EXPORT_SYMBOL(put_lseg);
++
++void get_lseg(struct pnfs_layout_segment *lseg)
++{
++	kref_get(&lseg->kref);
++}
++EXPORT_SYMBOL(get_lseg);
++
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end: NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1: NFS4_MAX_UINT64;
++}
++
++/*
++ * is l2 fully contained in l1?
++ *   start1                             end1
++ *   [----------------------------------)
++ *           start2           end2
++ *           [----------------)
++ */
++static inline int
++lo_seg_contained(struct nfs4_pnfs_layout_segment *l1,
++		 struct nfs4_pnfs_layout_segment *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (start1 <= start2) && (end1 >= end2);
++}
++
++/*
++ * is l1 and l2 intersecting?
++ *   start1                             end1
++ *   [----------------------------------)
++ *                              start2           end2
++ *                              [----------------)
++ */
++static inline int
++lo_seg_intersecting(struct nfs4_pnfs_layout_segment *l1,
++		    struct nfs4_pnfs_layout_segment *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
++	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
++}
++
++void
++pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
++			const nfs4_stateid *stateid)
++{
++	write_seqlock(&lo->seqlock);
++	memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
++	write_sequnlock(&lo->seqlock);
++}
++
++void
++pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	do {
++		seq = read_seqbegin(&lo->seqlock);
++		memcpy(dst->u.data, lo->stateid.u.data,
++		       sizeof(lo->stateid.u.data));
++	} while (read_seqretry(&lo->seqlock, seq));
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void
++pnfs_layout_from_open_stateid(struct pnfs_layout_type *lo,
++			      struct nfs4_state *state)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	write_seqlock(&lo->seqlock);
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE))
++		do {
++			seq = read_seqbegin(&state->seqlock);
++			memcpy(lo->stateid.u.data, state->stateid.u.data,
++					sizeof(state->stateid.u.data));
++		} while (read_seqretry(&state->seqlock, seq));
++	write_sequnlock(&lo->seqlock);
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++* Get layout from server.
++*    for now, assume that whole file layouts are requested.
++*    arg->offset: 0
++*    arg->length: all ones
++*/
++static int
++send_layoutget(struct inode *ino,
++	   struct nfs_open_context *ctx,
++	   struct nfs4_pnfs_layout_segment *range,
++	   struct pnfs_layout_segment **lsegpp,
++	   struct pnfs_layout_type *lo)
++{
++	int status;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct nfs4_pnfs_layoutget *lgp;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
++	if (lgp == NULL) {
++		pnfs_layout_release(lo, NULL);
++		return -ENOMEM;
++	}
++	lgp->args.minlength = NFS4_MAX_UINT64;
++	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
++	lgp->args.lseg.iomode = range->iomode;
++	lgp->args.lseg.offset = 0;
++	lgp->args.lseg.length = NFS4_MAX_UINT64;
++	lgp->args.type = server->pnfs_curr_ld->id;
++	lgp->args.inode = ino;
++	lgp->lsegpp = lsegpp;
++
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
++		struct nfs_open_context *oldctx = ctx;
++
++		if (!oldctx) {
++			ctx = nfs_find_open_context(ino, NULL,
++					(range->iomode == IOMODE_READ) ?
++					FMODE_READ: FMODE_WRITE);
++			BUG_ON(!ctx);
++		}
++		/* Set the layout stateid from the open stateid */
++		pnfs_layout_from_open_stateid(NFS_I(ino)->layout, ctx->state);
++		if (!oldctx)
++			put_nfs_open_context(ctx);
++	}
++
++	/* Retrieve layout information from server */
++	status = pnfs4_proc_layoutget(lgp);
++
++	dprintk("<-- %s status %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	false
++ */
++static inline int
++should_free_lseg(struct pnfs_layout_segment *lseg,
++		   struct nfs4_pnfs_layout_segment *range)
++{
++	return (range->iomode == IOMODE_ANY ||
++		lseg->range.iomode == range->iomode) &&
++	       lo_seg_intersecting(&lseg->range, range);
++}
++
++static struct pnfs_layout_segment *
++has_layout_to_return(struct pnfs_layout_type *lo,
++		     struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *out = NULL, *lseg;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list)
++		if (should_free_lseg(lseg, range)) {
++			out = lseg;
++			break;
++		}
++
++	dprintk("%s:Return lseg=%p\n", __func__, out);
++	return out;
++}
++
++static inline bool
++_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
++{
++	return atomic_read(&lseg->kref.refcount) == 1;
++}
++
++
++static void
++pnfs_free_layout(struct pnfs_layout_type *lo,
++		 struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *lseg, *next;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
++		if (!should_free_lseg(lseg, range) ||
++		    !_pnfs_can_return_lseg(lseg))
++			continue;
++		dprintk("%s: freeing lseg %p iomode %d "
++			"offset %llu length %llu\n", __func__,
++			lseg, lseg->range.iomode, lseg->range.offset,
++			lseg->range.length);
++		list_del(&lseg->fi_list);
++		put_lseg_locked(lseg);
++	}
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp;
++
++		clp = PNFS_NFS_SERVER(lo)->nfs_client;
++		spin_lock(&clp->cl_lock);
++		list_del_init(&lo->lo_layouts);
++		spin_unlock(&clp->cl_lock);
++		pnfs_set_layout_stateid(lo, &zero_stateid);
++	}
++
++	dprintk("%s:Return\n", __func__);
++}
++
++static bool
++pnfs_return_layout_barrier(struct nfs_inode *nfsi,
++			   struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *lseg;
++	bool ret = false;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
++		if (!should_free_lseg(lseg, range))
++			continue;
++		lseg->valid = false;
++		if (!_pnfs_can_return_lseg(lseg)) {
++			dprintk("%s: wait on lseg %p refcount %d\n",
++				__func__, lseg,
++				atomic_read(&lseg->kref.refcount));
++			ret = true;
++		}
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	dprintk("%s:Return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
++	      enum pnfs_layoutreturn_type type, struct pnfs_layout_type *lo,
++	      bool wait)
++{
++	struct nfs4_pnfs_layoutreturn *lrp;
++	struct nfs_server *server = NFS_SERVER(ino);
++	int status = -ENOMEM;
++
++	dprintk("--> %s\n", __func__);
++
++	BUG_ON(type != RETURN_FILE);
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (lrp == NULL) {
++		if (lo && (type == RETURN_FILE))
++			pnfs_layout_release(lo, NULL);
++		goto out;
++	}
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = server->pnfs_curr_ld->id;
++	lrp->args.return_type = type;
++	lrp->args.lseg = *range;
++	lrp->args.inode = ino;
++
++	status = pnfs4_proc_layoutreturn(lrp, wait);
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++}
++
++int
++_pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
++		    const nfs4_stateid *stateid, /* optional */
++		    enum pnfs_layoutreturn_type type,
++		    bool wait)
++{
++	struct pnfs_layout_type *lo = NULL;
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs4_pnfs_layout_segment arg;
++	int status = 0;
++
++	dprintk("--> %s type %d\n", __func__, type);
++
++
++	arg.iomode = range ? range->iomode : IOMODE_ANY;
++	arg.offset = 0;
++	arg.length = NFS4_MAX_UINT64;
++
++	if (type == RETURN_FILE) {
++		spin_lock(&ino->i_lock);
++		lo = nfsi->layout;
++		if (lo && !has_layout_to_return(lo, &arg)) {
++			lo = NULL;
++		}
++		if (!lo) {
++			spin_unlock(&ino->i_lock);
++			dprintk("%s: no layout segments to return\n", __func__);
++			goto out;
++		}
++
++		/* Reference for layoutreturn matched in pnfs_layout_release */
++		get_layout(lo);
++
++		spin_unlock(&ino->i_lock);
++
++		if (pnfs_return_layout_barrier(nfsi, &arg)) {
++			if (stateid) { /* callback */
++				status = -EAGAIN;
++				goto out_put;
++			}
++			dprintk("%s: waiting\n", __func__);
++			wait_event(nfsi->lo_waitq,
++				   !pnfs_return_layout_barrier(nfsi, &arg));
++		}
++
++		if (layoutcommit_needed(nfsi)) {
++			if (stateid && !wait) { /* callback */
++				dprintk("%s: layoutcommit pending\n", __func__);
++				status = -EAGAIN;
++				goto out_put;
++			}
++			status = pnfs_layoutcommit_inode(ino, wait);
++			if (status) {
++				/* Return layout even if layoutcommit fails */
++				dprintk("%s: layoutcommit failed, status=%d. "
++					"Returning layout anyway\n",
++					__func__, status);
++			}
++		}
++
++		if (!stateid)
++			status = return_layout(ino, &arg, type, lo, wait);
++		else
++			pnfs_layout_release(lo, &arg);
++	}
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++out_put:
++	put_layout(ino);
++	goto out;
++}
++
++/*
++ * cmp two layout segments for sorting into layout cache
++ */
++static inline s64
++cmp_layout(struct nfs4_pnfs_layout_segment *l1,
++	   struct nfs4_pnfs_layout_segment *l2)
++{
++	s64 d;
++
++	/* higher offset > lower offset */
++	d = l1->offset - l2->offset;
++	if (d)
++		return d;
++
++	/* longer length > shorter length */
++	d = l1->length - l2->length;
++	if (d)
++		return d;
++
++	/* read > read/write */
++	return (int)(l1->iomode == IOMODE_READ) -
++	       (int)(l2->iomode == IOMODE_READ);
++}
++
++static void
++pnfs_insert_layout(struct pnfs_layout_type *lo,
++		   struct pnfs_layout_segment *lseg)
++{
++	struct pnfs_layout_segment *lp;
++	int found = 0;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp = PNFS_NFS_SERVER(lo)->nfs_client;
++
++		spin_lock(&clp->cl_lock);
++		BUG_ON(!list_empty(&lo->lo_layouts));
++		list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
++		spin_unlock(&clp->cl_lock);
++	}
++	list_for_each_entry (lp, &lo->segs, fi_list) {
++		if (cmp_layout(&lp->range, &lseg->range) > 0)
++			continue;
++		list_add_tail(&lseg->fi_list, &lp->fi_list);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu before "
++			"lp %p iomode %d offset %llu length %llu\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length,
++			lp, lp->range.iomode, lp->range.offset,
++			lp->range.length);
++		found = 1;
++		break;
++	}
++	if (!found) {
++		list_add_tail(&lseg->fi_list, &lo->segs);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu at tail\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length);
++	}
++	get_layout(lo);
++
++	dprintk("%s:Return\n", __func__);
++}
++
++/*
++ * Each layoutdriver embeds pnfs_layout_type as the first field in it's
++ * per-layout type layout cache structure and returns it ZEROed
++ * from layoutdriver_io_ops->alloc_layout
++ */
++static struct pnfs_layout_type *
++alloc_init_layout(struct inode *ino)
++{
++	struct pnfs_layout_type *lo;
++	struct layoutdriver_io_operations *io_ops;
++
++	io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
++	lo = io_ops->alloc_layout(ino);
++	if (!lo) {
++		printk(KERN_ERR
++			"%s: out of memory: io_ops->alloc_layout failed\n",
++			__func__);
++		return NULL;
++	}
++	lo->refcount = 1;
++	INIT_LIST_HEAD(&lo->lo_layouts);
++	INIT_LIST_HEAD(&lo->segs);
++	seqlock_init(&lo->seqlock);
++	lo->lo_inode = ino;
++	return lo;
++}
++
++/*
++ * Retrieve and possibly allocate the inode layout
++ *
++ * ino->i_lock must be taken by the caller.
++ */
++static struct pnfs_layout_type *
++pnfs_alloc_layout(struct inode *ino)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_type *new = NULL;
++
++	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
++
++	BUG_ON_UNLOCKED_INO(ino);
++	if (likely(nfsi->layout))
++		return nfsi->layout;
++
++	spin_unlock(&ino->i_lock);
++	new = alloc_init_layout(ino);
++	spin_lock(&ino->i_lock);
++
++	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
++		nfsi->layout = new;
++	} else if (new) {
++		/* Reference the layout accross i_lock release and grab */
++		get_layout(nfsi->layout);
++		spin_unlock(&ino->i_lock);
++		NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
++		spin_lock(&ino->i_lock);
++		put_layout_locked(nfsi->layout);
++	}
++	return nfsi->layout;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	true
++ */
++static inline int
++has_matching_lseg(struct pnfs_layout_segment *lseg,
++		  struct nfs4_pnfs_layout_segment *range)
++{
++	struct nfs4_pnfs_layout_segment range1;
++
++	if ((range->iomode == IOMODE_RW && lseg->range.iomode != IOMODE_RW) ||
++	    !lo_seg_intersecting(&lseg->range, range))
++		return 0;
++
++	/* range1 covers only the first byte in the range */
++	range1 = *range;
++	range1.length = 1;
++	return lo_seg_contained(&lseg->range, &range1);
++}
++
++/*
++ * lookup range in layout
++ */
++static struct pnfs_layout_segment *
++pnfs_has_layout(struct pnfs_layout_type *lo,
++		struct nfs4_pnfs_layout_segment *range,
++		bool take_ref,
++		bool only_valid)
++{
++	struct pnfs_layout_segment *lseg, *ret = NULL;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list) {
++		if (has_matching_lseg(lseg, range) &&
++		    (lseg->valid || !only_valid)) {
++			ret = lseg;
++			if (take_ref)
++				get_lseg(ret);
++			break;
++		}
++		if (cmp_layout(range, &lseg->range) > 0)
++			break;
++	}
++
++	dprintk("%s:Return lseg %p take_ref %d ref %d valid %d\n",
++		__func__, ret, take_ref,
++		ret ? atomic_read(&ret->kref.refcount) : 0,
++		ret ? ret->valid : 0);
++	return ret;
++}
++
++/* Update the file's layout for the given range and iomode.
++ * Layout is retreived from the server if needed.
++ * If lsegpp is given, the appropriate layout segment is referenced and
++ * returned to the caller.
++ */
++void
++_pnfs_update_layout(struct inode *ino,
++		   struct nfs_open_context *ctx,
++		   loff_t pos,
++		   u64 count,
++		   enum pnfs_iomode iomode,
++		   struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs4_pnfs_layout_segment arg = {
++		.iomode = iomode,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_type *lo;
++	struct pnfs_layout_segment *lseg = NULL;
++	bool take_ref = (lsegpp != NULL);
++
++	if (take_ref)
++		*lsegpp = NULL;
++	spin_lock(&ino->i_lock);
++	lo = pnfs_alloc_layout(ino);
++	if (lo == NULL) {
++		dprintk("%s ERROR: can't get pnfs_layout_type\n", __func__);
++		goto out_unlock;
++	}
++
++	/* Check to see if the layout for the given range already exists */
++	lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
++	if (lseg && !lseg->valid) {
++		if (take_ref)
++			put_lseg_locked(lseg);
++		/* someone is cleaning the layout */
++		lseg = NULL;
++		goto out_unlock;
++	}
++
++	if (lseg) {
++		dprintk("%s: Using cached lseg %p for %llu@%llu iomode %d)\n",
++			__func__,
++			lseg,
++			arg.length,
++			arg.offset,
++			arg.iomode);
++
++		goto out_unlock;
++	}
++
++	/* if get layout already failed once goto out */
++	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->pnfs_layout_state)) {
++		if (unlikely(nfsi->pnfs_layout_suspend &&
++		    get_seconds() >= nfsi->pnfs_layout_suspend)) {
++			dprintk("%s: layout_get resumed\n", __func__);
++			clear_bit(lo_fail_bit(iomode),
++				  &nfsi->layout->pnfs_layout_state);
++			nfsi->pnfs_layout_suspend = 0;
++		} else
++			goto out_unlock;
++	}
++
++	/* Reference the layout for layoutget matched in pnfs_layout_release */
++	get_layout(lo);
++	spin_unlock(&ino->i_lock);
++
++	send_layoutget(ino, ctx, &arg, lsegpp, lo);
++out:
++	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
++		nfsi->layout->pnfs_layout_state, lseg);
++	return;
++out_unlock:
++	if (lsegpp)
++		*lsegpp = lseg;
++	spin_unlock(&ino->i_lock);
++	goto out;
++}
++
++void
++pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
++{
++	struct pnfs_layout_segment *lseg = NULL;
++	struct nfs_inode *nfsi = NFS_I(lgp->args.inode);
++	time_t suspend = 0;
++
++	dprintk("-->%s\n", __func__);
++
++	lgp->status = rpc_status;
++	if (likely(!rpc_status)) {
++		if (unlikely(lgp->res.layout.len < 0)) {
++			printk(KERN_ERR
++			       "%s: ERROR Returned layout size is ZERO\n", __func__);
++			lgp->status = -EIO;
++		}
++		goto out;
++	}
++
++	dprintk("%s: ERROR retrieving layout %d\n", __func__, rpc_status);
++	switch (rpc_status) {
++	case -NFS4ERR_BADLAYOUT:
++		lgp->status = -ENOENT;
++		/* FALLTHROUGH */
++	case -EACCES:	/* NFS4ERR_ACCESS */
++		/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
++		goto out;
++
++	case -NFS4ERR_LAYOUTTRYLATER:
++	case -NFS4ERR_RECALLCONFLICT:
++	case -NFS4ERR_OLD_STATEID:
++	case -EAGAIN:	/* NFS4ERR_LOCKED */
++		lgp->status = -NFS4ERR_DELAY;	/* for nfs4_handle_exception */
++		/* FALLTHROUGH */
++	case -NFS4ERR_GRACE:
++	case -NFS4ERR_DELAY:
++		goto out;
++
++	case -NFS4ERR_ADMIN_REVOKED:
++	case -NFS4ERR_DELEG_REVOKED:
++		/* The layout is expected to be returned at this point.
++		 * This should clear the layout stateid as well */
++		suspend = get_seconds() + 1;
++		break;
++
++	case -NFS4ERR_LAYOUTUNAVAILABLE:
++		lgp->status = -ENOTSUPP;
++		break;
++
++	case -NFS4ERR_REP_TOO_BIG:
++	case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
++		lgp->status = -E2BIG;
++		break;
++
++	/* Leave the following errors untranslated */
++	case -NFS4ERR_DEADSESSION:
++	case -NFS4ERR_DQUOT:
++	case -EINVAL:		/* NFS4ERR_INVAL */
++	case -EIO:		/* NFS4ERR_IO */
++	case -NFS4ERR_FHEXPIRED:
++	case -NFS4ERR_MOVED:
++	case -NFS4ERR_NOSPC:
++	case -ESERVERFAULT:	/* NFS4ERR_SERVERFAULT */
++	case -ESTALE:		/* NFS4ERR_STALE */
++	case -ETOOSMALL:	/* NFS4ERR_TOOSMALL */
++		break;
++
++	/* The following errors are our fault and should never happen */
++	case -NFS4ERR_BADIOMODE:
++	case -NFS4ERR_BADXDR:
++	case -NFS4ERR_REQ_TOO_BIG:
++	case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
++	case -NFS4ERR_WRONG_TYPE:
++		lgp->status = -EINVAL;
++		/* FALLTHROUGH */
++	case -NFS4ERR_BAD_STATEID:
++	case -NFS4ERR_NOFILEHANDLE:
++	case -ENOTSUPP:	/* NFS4ERR_NOTSUPP */
++	case -NFS4ERR_OPENMODE:
++	case -NFS4ERR_OP_NOT_IN_SESSION:
++	case -NFS4ERR_TOO_MANY_OPS:
++		dprintk("%s: error %d: should never happen\n", __func__,
++			rpc_status);
++		break;
++
++	/* The following errors are the server's fault */
++	default:
++		dprintk("%s: illegal error %d\n", __func__, rpc_status);
++		lgp->status = -EIO;
++		break;
++	}
++
++	/* remember that get layout failed and suspend trying */
++	nfsi->pnfs_layout_suspend = suspend;
++	set_bit(lo_fail_bit(lgp->args.lseg.iomode),
++		&nfsi->layout->pnfs_layout_state);
++	dprintk("%s: layout_get suspended until %ld\n",
++		__func__, suspend);
++out:
++	dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
++		__func__, lgp->status, nfsi->layout->pnfs_layout_state, lseg);
++	return;
++}
++
++int
++pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct pnfs_layout_type *lo = NFS_I(lgp->args.inode)->layout;
++	struct nfs4_pnfs_layoutget_res *res = &lgp->res;
++	struct pnfs_layout_segment *lseg;
++	struct inode *ino = PNFS_INODE(lo);
++	int status = 0;
++
++	/* Inject layout blob into I/O device driver */
++	lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, res);
++	if (!lseg || IS_ERR(lseg)) {
++		if (!lseg)
++			status = -ENOMEM;
++		else
++			status = PTR_ERR(lseg);
++		dprintk("%s: Could not allocate layout: error %d\n",
++		       __func__, status);
++		goto out;
++	}
++
++	spin_lock(&ino->i_lock);
++	init_lseg(lo, lseg);
++	lseg->range = res->lseg;
++	if (lgp->lsegpp) {
++		get_lseg(lseg);
++		*lgp->lsegpp = lseg;
++	}
++	pnfs_insert_layout(lo, lseg);
++
++	if (res->return_on_close) {
++		lo->roc_iomode |= res->lseg.iomode;
++		if (!lo->roc_iomode)
++			lo->roc_iomode = IOMODE_ANY;
++	}
++
++	/* Done processing layoutget. Set the layout stateid */
++	pnfs_set_layout_stateid(lo, &res->stateid);
++	spin_unlock(&ino->i_lock);
++out:
++	return status;
++}
++
++void
++readahead_range(struct inode *inode, struct list_head *pages, loff_t *offset,
++		size_t *count)
++{
++	struct page *first, *last;
++	loff_t foff, i_size = i_size_read(inode);
++	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
++	size_t range;
++
++
++	first = list_entry((pages)->prev, struct page, lru);
++	last = list_entry((pages)->next, struct page, lru);
++
++	foff = (loff_t)first->index << PAGE_CACHE_SHIFT;
++
++	range = (last->index - first->index) * PAGE_CACHE_SIZE;
++	if (last->index == end_index)
++		range += ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
++	else
++		range += PAGE_CACHE_SIZE;
++	dprintk("%s foff %lu, range %Zu\n", __func__, (unsigned long)foff,
++		range);
++	*offset = foff;
++	*count = range;
++}
++
++void
++pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)
++{
++	struct pnfs_layout_type *laytype;
++	struct pnfs_layoutdriver_type *ld;
++
++	pgio->pg_test = NULL;
++
++	laytype = NFS_I(inode)->layout;
++	ld = NFS_SERVER(inode)->pnfs_curr_ld;
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !laytype)
++		return;
++
++	if (ld->ld_policy_ops)
++		pgio->pg_test = ld->ld_policy_ops->pg_test;
++}
++
++static u32
++pnfs_getboundary(struct inode *inode)
++{
++	u32 stripe_size = 0;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct layoutdriver_policy_operations *policy_ops;
++
++	if (!nfss->pnfs_curr_ld)
++		goto out;
++
++	policy_ops = nfss->pnfs_curr_ld->ld_policy_ops;
++	if (!policy_ops || !policy_ops->get_stripesize)
++		goto out;
++
++	/* The default is to not gather across stripes */
++	if (pnfs_ld_gather_across_stripes(nfss->pnfs_curr_ld))
++		goto out;
++
++	spin_lock(&inode->i_lock);
++	if (NFS_I(inode)->layout)
++		stripe_size = policy_ops->get_stripesize(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++out:
++	return stripe_size;
++}
++
++/*
++ * rsize is already set by caller to MDS rsize.
++ */
++void
++pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
++		  struct inode *inode,
++		  struct nfs_open_context *ctx,
++		  struct list_head *pages,
++		  size_t *rsize)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	size_t count = 0;
++	loff_t loff;
++
++	pgio->pg_iswrite = 0;
++	pgio->pg_boundary = 0;
++	pgio->pg_test = NULL;
++	pgio->pg_lseg = NULL;
++
++	if (!pnfs_enabled_sb(nfss))
++		return;
++
++	/* Calculate the total read-ahead count */
++	readahead_range(inode, pages, &loff, &count);
++
++	if (count > 0) {
++		_pnfs_update_layout(inode, ctx, loff, count, IOMODE_READ,
++				    &pgio->pg_lseg);
++		if (!pgio->pg_lseg)
++			return;
++
++		*rsize = NFS_SERVER(inode)->ds_rsize;
++		pgio->pg_boundary = pnfs_getboundary(inode);
++		if (pgio->pg_boundary)
++			pnfs_set_pg_test(inode, pgio);
++	}
++}
++
++void
++pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
++		       size_t *wsize)
++{
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	pgio->pg_iswrite = 1;
++	if (!pnfs_enabled_sb(server)) {
++		pgio->pg_boundary = 0;
++		pgio->pg_test = NULL;
++		return;
++	}
++	pgio->pg_boundary = pnfs_getboundary(inode);
++	pnfs_set_pg_test(inode, pgio);
++	*wsize = server->ds_wsize;
++}
++
++/* Return I/O buffer size for a layout driver
++ * This value will determine what size reads and writes
++ * will be gathered into and sent to the data servers.
++ * blocksize must be a multiple of the page cache size.
++ */
++unsigned int
++pnfs_getiosize(struct nfs_server *server)
++{
++	if (!PNFS_EXISTS_LDPOLICY_OP(server, get_blocksize))
++		return 0;
++	return server->pnfs_curr_ld->ld_policy_ops->get_blocksize();
++}
++
++void
++pnfs_set_ds_iosize(struct nfs_server *server)
++{
++	unsigned dssize = pnfs_getiosize(server);
++
++	/* Set buffer size for data servers */
++	if (dssize > 0) {
++		server->ds_rsize = server->ds_wsize =
++			nfs_block_size(dssize, NULL);
++	} else {
++		server->ds_wsize = server->wsize;
++		server->ds_rsize = server->rsize;
++	}
++}
++
++static int
++pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
++{
++	put_lseg(pdata->lseg);
++	pdata->lseg = NULL;
++	pdata->call_ops->rpc_call_done(task, data);
++	if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
++		return -EAGAIN;
++	if (pdata->pnfsflags & PNFS_NO_RPC) {
++		pdata->call_ops->rpc_release(data);
++	} else {
++		/*
++		 * just restore original rpc call ops
++		 * rpc_release will be called later by the rpc scheduling layer.
++		 */
++		task->tk_ops = pdata->call_ops;
++	}
++	return 0;
++}
++
++/* Post-write completion function
++ * Invoked by all layout drivers when write_pagelist is done.
++ *
++ * NOTE: callers set data->pnfsflags PNFS_NO_RPC
++ * so that the NFS cleanup routines perform only the page cache
++ * cleanup.
++ */
++static void
++pnfs_write_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	struct nfs4_pnfs_layout_segment range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = wdata->args.offset;
++	range.length = wdata->args.count;
++	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
++			    wdata->pdata.call_ops, wdata->pdata.how);
++}
++
++static void
++pnfs_writeback_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	/* update last write offset and need layout commit
++	 * for non-files layout types (files layout calls
++	 * pnfs4_write_done for this)
++	 */
++	if ((pdata->pnfsflags & PNFS_NO_RPC) &&
++	    data->task.tk_status >= 0 && data->res.count > 0) {
++		struct nfs_inode *nfsi = NFS_I(data->inode);
++
++		pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++		pnfs_need_layoutcommit(nfsi, data->args.context);
++	}
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++static void _pnfs_clear_lseg_from_pages(struct list_head *head)
++{
++	struct nfs_page *req;
++
++	list_for_each_entry(req, head, wb_list) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem write function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ *
++ * TODO: Is wdata->how and wdata->args.stable always the same value?
++ * TODO: It seems in NFS, the server may not do a stable write even
++ * though it was requested (and vice-versa?).  To check, it looks
++ * in data->res.verf->committed.  Do we need this ability
++ * for non-file layout drivers?
++ */
++enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *wdata,
++			const struct rpc_call_ops *call_ops, int how)
++{
++	struct inode *inode = wdata->inode;
++	enum pnfs_try_status trypnfs;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = wdata->req->wb_lseg;
++
++	wdata->pdata.call_ops = call_ops;
++	wdata->pdata.pnfs_error = 0;
++	wdata->pdata.how = how;
++
++	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
++		inode->i_ino, wdata->args.count, wdata->args.offset, how);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		wdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	wdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->write_pagelist(wdata,
++		nfs_page_array_len(wdata->args.pgbase, wdata->args.count),
++								how);
++
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		wdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		wdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&wdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/* Post-read completion function.  Invoked by all layout drivers when
++ * read_pagelist is done
++ */
++static void
++pnfs_read_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	struct nfs4_pnfs_layout_segment range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = rdata->args.offset;
++	range.length = rdata->args.count;
++	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
++			   rdata->pdata.call_ops);
++}
++
++static void
++pnfs_read_done(struct nfs_read_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem read function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ */
++enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *rdata,
++		       const struct rpc_call_ops *call_ops)
++{
++	struct inode *inode = rdata->inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = rdata->req->wb_lseg;
++	enum pnfs_try_status trypnfs;
++
++	rdata->pdata.call_ops = call_ops;
++	rdata->pdata.pnfs_error = 0;
++
++	dprintk("%s: Reading ino:%lu %u@%llu\n",
++		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		rdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	rdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->read_pagelist(rdata,
++		nfs_page_array_len(rdata->args.pgbase, rdata->args.count));
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		rdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		rdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&rdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/*
++ * This gives the layout driver an opportunity to read in page "around"
++ * the data to be written.  It returns 0 on success, otherwise an error code
++ * which will either be passed up to user, or ignored if
++ * some previous part of write succeeded.
++ * Note the range [pos, pos+len-1] is entirely within the page.
++ */
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata)
++{
++	struct pnfs_fsdata *data;
++	int status = 0;
++
++	dprintk("--> %s: pos=%llu len=%u\n",
++		__func__, (unsigned long long)pos, len);
++	data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
++	if (!data) {
++		status = -ENOMEM;
++		goto out;
++	}
++	data->lseg = lseg; /* refcount passed into data to be managed there */
++	status = NFS_SERVER(inode)->pnfs_curr_ld->ld_io_ops->write_begin(
++						lseg, page, pos, len, data);
++	if (status) {
++		kfree(data);
++		data = NULL;
++	}
++out:
++	*fsdata = data;
++	dprintk("<-- %s: status=%d\n", __func__, status);
++	return status;
++}
++
++/* Return 0 on succes, negative on failure */
++/* CAREFUL - what happens if copied < len??? */
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status;
++
++	status = nfss->pnfs_curr_ld->ld_io_ops->write_end(inode, page,
++						pos, len, copied, lseg);
++	return status;
++}
++
++/* pNFS Commit callback function for all layout drivers */
++static void
++pnfs_commit_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		struct nfs4_pnfs_layout_segment range = {
++			.iomode = IOMODE_RW,
++			.offset = data->args.offset,
++			.length = data->args.count,
++		};
++		dprintk("%s: retrying\n", __func__);
++		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
++				    true);
++		pnfs_initiate_commit(data, NFS_CLIENT(data->inode),
++				     pdata->call_ops, pdata->how, 1);
++	}
++}
++
++enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		    const struct rpc_call_ops *call_ops, int sync)
++{
++	struct inode *inode = data->inode;
++	struct nfs_server *nfss = NFS_SERVER(data->inode);
++	enum pnfs_try_status trypnfs;
++
++	dprintk("%s: Begin\n", __func__);
++
++	/* We need to account for possibility that
++	 * each nfs_page can point to a different lseg (or be NULL).
++	 * For the immediate case of whole-file-only layouts, we at
++	 * least know there can be only a single lseg.
++	 * We still have to account for the possibility of some being NULL.
++	 * This will be done by passing the buck to the layout driver.
++	 */
++	data->pdata.call_ops = call_ops;
++	data->pdata.pnfs_error = 0;
++	data->pdata.how = sync;
++	data->pdata.lseg = NULL;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(data, sync);
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		data->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		_pnfs_clear_lseg_from_pages(&data->pages);
++	} else
++		nfs_inc_stats(inode, NFSIOS_PNFS_COMMIT);
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++void pnfs_cleanup_layoutcommit(struct pnfs_layoutcommit_data *data)
++{
++	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
++
++	/* TODO: Maybe we should avoid this by allowing the layout driver
++	* to directly xdr its layout on the wire.
++	*/
++	if (nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit)
++		nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit(
++					NFS_I(data->args.inode)->layout,
++					&data->args, data->status);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int
++pnfs_layoutcommit_setup(struct inode *inode,
++			struct pnfs_layoutcommit_data *data,
++			loff_t write_begin_pos, loff_t write_end_pos)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int result = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	data->args.inode = inode;
++	data->args.fh = NFS_FH(inode);
++	data->args.layout_type = nfss->pnfs_curr_ld->id;
++	data->res.fattr = &data->fattr;
++	nfs_fattr_init(&data->fattr);
++
++	/* TODO: Need to determine the correct values */
++	data->args.time_modify_changed = 0;
++
++	/* Set values from inode so it can be reset
++	 */
++	data->args.lseg.iomode = IOMODE_RW;
++	data->args.lseg.offset = write_begin_pos;
++	data->args.lseg.length = write_end_pos - write_begin_pos + 1;
++	data->args.lastbytewritten =  min(write_end_pos,
++					  i_size_read(inode) - 1);
++	data->args.bitmask = nfss->attr_bitmask;
++	data->res.server = nfss;
++
++	/* Call layout driver to set the arguments */
++	if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit)
++		result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
++				NFS_I(inode)->layout, &data->args);
++
++	dprintk("<-- %s Status %d\n", __func__, result);
++	return result;
++}
++
++/* Issue a async layoutcommit for an inode.
++ */
++int
++pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	struct pnfs_layoutcommit_data *data;
++	struct nfs_inode *nfsi = NFS_I(inode);
++	loff_t write_begin_pos;
++	loff_t write_end_pos;
++
++	int status = 0;
++
++	dprintk("%s Begin (sync:%d)\n", __func__, sync);
++
++	BUG_ON(!has_layout(nfsi));
++
++	data = pnfs_layoutcommit_alloc();
++	if (!data)
++		return -ENOMEM;
++
++	spin_lock(&inode->i_lock);
++	if (!layoutcommit_needed(nfsi)) {
++		spin_unlock(&inode->i_lock);
++		goto out_free;
++	}
++
++	/* Clear layoutcommit properties in the inode so
++	 * new lc info can be generated
++	 */
++	write_begin_pos = nfsi->layout->pnfs_write_begin_pos;
++	write_end_pos = nfsi->layout->pnfs_write_end_pos;
++	data->cred = nfsi->layout->lo_cred;
++	nfsi->layout->pnfs_write_begin_pos = 0;
++	nfsi->layout->pnfs_write_end_pos = 0;
++	nfsi->layout->lo_cred = NULL;
++	__clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state);
++	pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout);
++
++	/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
++	get_layout(NFS_I(inode)->layout);
++
++	spin_unlock(&inode->i_lock);
++
++	/* Set up layout commit args */
++	status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
++					 write_end_pos);
++	if (status) {
++		/* The layout driver failed to setup the layoutcommit */
++		put_rpccred(data->cred);
++		put_layout(inode);
++		goto out_free;
++	}
++	status = pnfs4_proc_layoutcommit(data, sync);
++out:
++	dprintk("%s end (err:%d)\n", __func__, status);
++	return status;
++out_free:
++	pnfs_layoutcommit_free(data);
++	goto out;
++}
++
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
++{
++	if (fsdata) {
++		/* lseg refcounting handled directly in nfs_Write_end */
++		kfree(fsdata);
++	}
++}
++
++/* Callback operations for layout drivers.
++ */
++struct pnfs_client_operations pnfs_ops = {
++	.nfs_getdevicelist = nfs4_pnfs_getdevicelist,
++	.nfs_getdeviceinfo = nfs4_pnfs_getdeviceinfo,
++	.nfs_readlist_complete = pnfs_read_done,
++	.nfs_writelist_complete = pnfs_writeback_done,
++	.nfs_commit_complete = pnfs_commit_done,
++};
++
++EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
++EXPORT_SYMBOL(pnfs_register_layoutdriver);
++
++
++/* Device ID cache. Supports one layout type per struct nfs_client */
++int
++nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
++			 void (*free_callback)(struct kref *))
++{
++	struct nfs4_deviceid_cache *c;
++
++	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
++	if (!c)
++		return -ENOMEM;
++	spin_lock(&clp->cl_lock);
++	if (clp->cl_devid_cache != NULL) {
++		kref_get(&clp->cl_devid_cache->dc_kref);
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [kref [%d]]\n", __func__,
++			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
++		kfree(c);
++	} else {
++		int i;
++
++		spin_lock_init(&c->dc_lock);
++		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
++			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
++		kref_init(&c->dc_kref);
++		c->dc_free_callback = free_callback;
++		clp->cl_devid_cache = c;
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [new]\n", __func__);
++	}
++	return 0;
++}
++EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
++
++void
++nfs4_init_deviceid_node(struct nfs4_deviceid *d)
++{
++	INIT_HLIST_NODE(&d->de_node);
++	kref_init(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_init_deviceid_node);
++
++/* Called from layoutdriver_io_operations->alloc_lseg */
++void
++nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = d;
++	kref_get(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_set_layout_deviceid);
++
++/* Called from layoutdriver_io_operations->free_lseg */
++void
++nfs4_unset_layout_deviceid(struct pnfs_layout_segment *l,
++			   struct nfs4_deviceid *d,
++			   void (*free_callback)(struct kref *))
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = NULL;
++	kref_put(&d->de_kref, free_callback);
++}
++EXPORT_SYMBOL(nfs4_unset_layout_deviceid);
++
++struct nfs4_deviceid *
++nfs4_find_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			rcu_read_unlock();
++			return d;
++		}
++	}
++	rcu_read_unlock();
++	return NULL;
++}
++EXPORT_SYMBOL(nfs4_find_deviceid);
++
++/*
++ * Add or kref_get a deviceid.
++ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
++ */
++struct nfs4_deviceid *
++nfs4_add_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(&new->de_id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			spin_unlock(&c->dc_lock);
++			dprintk("%s [discard]\n", __func__);
++			c->dc_free_callback(&new->de_kref);
++			return d;
++		}
++	}
++	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
++	spin_unlock(&c->dc_lock);
++	dprintk("%s [new]\n", __func__);
++	return new;
++}
++EXPORT_SYMBOL(nfs4_add_deviceid);
++
++static int
++nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash,
++		     struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (id && memcmp(id, &d->de_id, NFS4_PNFS_DEVICEID4_SIZE))
++			continue;
++		hlist_del_rcu(&d->de_node);
++		spin_unlock(&c->dc_lock);
++		synchronize_rcu();
++		dprintk("%s [%d]\n", __func__,
++			atomic_read(&d->de_kref.refcount));
++		kref_put(&d->de_kref, c->dc_free_callback);
++		return 1;
++	}
++	spin_unlock(&c->dc_lock);
++	return 0;
++}
++
++void
++nfs4_delete_device(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	long hash = nfs4_deviceid_hash(id);
++
++	nfs4_remove_deviceid(c, hash, id);
++}
++EXPORT_SYMBOL(nfs4_delete_device);
++
++static void
++nfs4_free_deviceid_cache(struct kref *kref)
++{
++	struct nfs4_deviceid_cache *cache =
++		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
++	long i;
++
++	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
++		while (nfs4_remove_deviceid(cache, i, NULL))
++			;
++	kfree(cache);
++}
++
++void
++nfs4_put_deviceid_cache(struct nfs_client *clp)
++{
++	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
++	int refcount;
++
++	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
++	spin_lock(&clp->cl_lock);
++	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
++	if (refcount == 1)
++		clp->cl_devid_cache = NULL;
++	spin_unlock(&clp->cl_lock);
++	dprintk("%s [%d]\n", __func__, refcount);
++	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
++}
++EXPORT_SYMBOL(nfs4_put_deviceid_cache);
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
+@@ -0,0 +1,355 @@
++/*
++ *  fs/nfs/pnfs.h
++ *
++ *  pNFS client data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_PNFS_H
++#define FS_NFS_PNFS_H
++
++#include <linux/nfs4_pnfs.h>
++
++#ifdef CONFIG_NFS_V4_1
++
++#include <linux/nfs_page.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs_iostat.h>
++#include "iostat.h"
++
++/* nfs4proc.c */
++extern int nfs4_pnfs_getdevicelist(struct nfs_server *server,
++				   const struct nfs_fh *fh,
++				   struct pnfs_devicelist *devlist);
++extern int nfs4_pnfs_getdeviceinfo(struct nfs_server *server,
++				   struct pnfs_device *dev);
++extern int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp);
++extern int pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data,
++				   int issync);
++extern int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool wait);
++
++/* pnfs.c */
++extern const nfs4_stateid zero_stateid;
++
++void _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp);
++
++int _pnfs_return_layout(struct inode *, struct nfs4_pnfs_layout_segment *,
++			const nfs4_stateid *stateid, /* optional */
++			enum pnfs_layoutreturn_type, bool wait);
++void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *mntfh, u32 id);
++void unmount_pnfs_layoutdriver(struct nfs_server *);
++enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
++					     const struct rpc_call_ops *, int);
++enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
++					    const struct rpc_call_ops *);
++int pnfs_initialize(void);
++void pnfs_uninitialize(void);
++void pnfs_layoutcommit_free(struct pnfs_layoutcommit_data *data);
++void pnfs_cleanup_layoutcommit(struct pnfs_layoutcommit_data *data);
++int pnfs_layoutcommit_inode(struct inode *inode, int sync);
++void pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent);
++void pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx);
++unsigned int pnfs_getiosize(struct nfs_server *server);
++void pnfs_set_ds_iosize(struct nfs_server *server);
++enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
++					 const struct rpc_call_ops *, int);
++void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
++			   struct nfs_open_context *, struct list_head *,
++			   size_t *);
++void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
++			    size_t *);
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
++void pnfs_get_layout_done(struct nfs4_pnfs_layoutget *, int rpc_status);
++int pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp);
++void pnfs_layout_release(struct pnfs_layout_type *, struct nfs4_pnfs_layout_segment *range);
++void pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
++			     const nfs4_stateid *stateid);
++void pnfs_destroy_layout(struct nfs_inode *);
++void pnfs_destroy_all_layouts(struct nfs_client *);
++void put_layout(struct inode *inode);
++void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo);
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata);
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg);
++
++#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops->opname)
++#define PNFS_EXISTS_LDPOLICY_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_policy_ops && \
++				     (srv)->pnfs_curr_ld->ld_policy_ops->opname)
++
++#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
++
++static inline int lo_fail_bit(u32 iomode)
++{
++	return iomode == IOMODE_RW ?
++			 NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED;
++}
++
++/* Return true if a layout driver is being used for this mountpoint */
++static inline int pnfs_enabled_sb(struct nfs_server *nfss)
++{
++	return nfss->pnfs_curr_ld != NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return !fsdata  || ((struct pnfs_layout_segment *)fsdata == lseg) ||
++		!fsdata->bypass_eof;
++}
++
++/* Should the pNFS client commit and return the layout upon a setattr */
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
++		return false;
++	return NFS_SERVER(inode)->pnfs_curr_ld->ld_policy_ops->flags &
++		PNFS_LAYOUTRET_ON_SETATTR;
++}
++
++/* Should the pNFS client commit and return the layout on close
++ */
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return nfsi->layout->roc_iomode;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status = 0;
++
++	*fsdata = lseg;
++	if (lseg && PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++		status = _pnfs_write_begin(inode, page, pos, len, lseg,
++					   (struct pnfs_fsdata **) fsdata);
++	return status;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++
++	if (PNFS_EXISTS_LDIO_OP(nfss, write_end))
++		return _pnfs_write_end(inode, page, pos, len, copied, lseg);
++	else
++		return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_end_cleanup))
++			nfss->pnfs_curr_ld->ld_io_ops->write_end_cleanup(filp, fsdata);
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			pnfs_free_fsdata(fsdata);
++	}
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct nfs4_pnfs_layout_segment *lseg,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss) &&
++	    (type != RETURN_FILE || has_layout(nfsi)))
++		return _pnfs_return_layout(ino, lseg, stateid, type, wait);
++
++	return 0;
++}
++
++static inline void pnfs_update_layout(struct inode *ino,
++	struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss))
++		_pnfs_update_layout(ino, ctx, pos, count, access_type, lsegpp);
++	else {
++		if (lsegpp)
++			*lsegpp = NULL;
++	}
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	if (pnfs_enabled_sb(nfss))
++		return pnfs_ld_use_rpc_code(nfss->pnfs_curr_ld);
++
++	return 1;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			return ((struct pnfs_fsdata *) fsdata)->lseg;
++	}
++	return fsdata;
++}
++#else  /* CONFIG_NFS_V4_1 */
++
++static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++}
++
++static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++}
++
++static inline void get_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void put_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void
++pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	if (lsegpp)
++		*lsegpp = NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return 1;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *data,
++		      const struct rpc_call_ops *call_ops)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *data,
++		       const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		   const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	*fsdata = NULL;
++	return 0;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	return 1;
++}
++
++static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	return 0;
++}
++
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	return false;
++}
++
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct nfs4_pnfs_layout_segment *lseg,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	return 0;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	return NULL;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++#endif /* FS_NFS_PNFS_H */
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
+@@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	fattr = nfs_alloc_fattr();
+ 	status = -ENOMEM;
+ 	if (fh == NULL || fattr == NULL)
+-		goto out;
++		goto out_free;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -455,7 +455,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 */
+ 	if (status == 0)
+ 		status = nfs_instantiate(dentry, fh, fattr);
+-
++out_free:
+ 	nfs_free_fattr(fattr);
+ 	nfs_free_fhandle(fh);
+ out:
+@@ -694,6 +694,7 @@ const struct nfs_rpc_ops nfs_v2_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs_dir_inode_operations,
+ 	.file_inode_ops	= &nfs_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs_proc_get_root,
+ 	.getattr	= nfs_proc_getattr,
+ 	.setattr	= nfs_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
+@@ -18,8 +18,12 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_page.h>
++#include <linux/smp_lock.h>
++#include <linux/module.h>
+ 
+ #include <asm/system.h>
++#include <linux/module.h>
++#include "pnfs.h"
+ 
+ #include "nfs4_fs.h"
+ #include "internal.h"
+@@ -117,11 +121,14 @@ int nfs_readpage_async(struct nfs_open_c
+ 	LIST_HEAD(one_request);
+ 	struct nfs_page	*new;
+ 	unsigned int len;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	len = nfs_page_length(page);
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+-	new = nfs_create_request(ctx, inode, page, 0, len);
++	pnfs_update_layout(inode, ctx, 0, NFS4_MAX_UINT64, IOMODE_READ, &lseg);
++	new = nfs_create_request(ctx, inode, page, 0, len, lseg);
++	put_lseg(lseg);
+ 	if (IS_ERR(new)) {
+ 		unlock_page(page);
+ 		return PTR_ERR(new);
+@@ -155,24 +162,20 @@ static void nfs_readpage_release(struct 
+ 	nfs_release_request(req);
+ }
+ 
+-/*
+- * Set up the NFS read request struct
+- */
+-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset)
++int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		      const struct rpc_call_ops *call_ops)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+ 		.callback_data = data,
+@@ -180,9 +183,46 @@ static int nfs_read_rpcsetup(struct nfs_
+ 		.flags = RPC_TASK_ASYNC | swap_flags,
+ 	};
+ 
++	/* Set up the initial task struct. */
++	NFS_PROTO(inode)->read_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
++			data->task.tk_pid,
++			inode->i_sb->s_id,
++			(long long)NFS_FILEID(inode),
++			data->args.count,
++			(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_read);
++
++int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
++		return pnfs_get_read_status(data);
++
++	return nfs_initiate_read(data, clnt, call_ops);
++}
++
++/*
++ * Set up the NFS read request struct
++ */
++static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	data->req	  = req;
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -190,27 +230,14 @@ static int nfs_read_rpcsetup(struct nfs_
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.count   = count;
+ 	data->res.eof     = 0;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct. */
+-	NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-			data->task.tk_pid,
+-			inode->i_sb->s_id,
+-			(long long)NFS_FILEID(inode),
+-			count,
+-			(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	return pnfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ }
+ 
+ static void
+@@ -354,7 +381,14 @@ static void nfs_readpage_retry(struct rp
+ {
+ 	struct nfs_readargs *argp = &data->args;
+ 	struct nfs_readres *resp = &data->res;
++	struct nfs_client *clp = NFS_SERVER(data->inode)->nfs_client;
+ 
++#ifdef CONFIG_NFS_V4_1
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (resp->eof || resp->count == argp->count)
+ 		return;
+ 
+@@ -368,7 +402,10 @@ static void nfs_readpage_retry(struct rp
+ 	argp->offset += resp->count;
+ 	argp->pgbase += resp->count;
+ 	argp->count -= resp->count;
+-	nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++	data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++	nfs_restart_rpc(task, clp);
+ }
+ 
+ /*
+@@ -409,13 +446,19 @@ static void nfs_readpage_release_partial
+ void nfs_read_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_read_data *data = calldata;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	}
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
+ 				&data->args.seq_args, &data->res.seq_res,
+ 				0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_read_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_read_partial_ops = {
+@@ -568,7 +611,8 @@ readpage_async_filler(void *data, struct
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+ 
+-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
++	new = nfs_create_request(desc->ctx, inode, page, 0, len,
++				 desc->pgio->pg_lseg);
+ 	if (IS_ERR(new))
+ 		goto out_error;
+ 
+@@ -624,6 +668,9 @@ int nfs_readpages(struct file *filp, str
+ 	if (ret == 0)
+ 		goto read_complete; /* all pages were read */
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_read(&pgio, inode, desc.ctx, pages, &rsize);
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (rsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
+ 	else
+@@ -632,6 +679,7 @@ int nfs_readpages(struct file *filp, str
+ 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ 
+ 	nfs_pageio_complete(&pgio);
++	put_lseg(pgio.pg_lseg);
+ 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+ read_complete:
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
+@@ -64,6 +64,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -669,6 +670,28 @@ static int nfs_show_options(struct seq_f
+ 
+ 	return 0;
+ }
++#ifdef CONFIG_NFS_V4_1
++void show_sessions(struct seq_file *m, struct nfs_server *server)
++{
++	if (nfs4_has_session(server->nfs_client))
++		seq_printf(m, ",sessions");
++}
++#else
++void show_sessions(struct seq_file *m, struct nfs_server *server) {}
++#endif
++
++#ifdef CONFIG_NFS_V4_1
++void show_pnfs(struct seq_file *m, struct nfs_server *server)
++{
++	seq_printf(m, ",pnfs=");
++	if (server->pnfs_curr_ld)
++		seq_printf(m, "%s", server->pnfs_curr_ld->name);
++	else
++		seq_printf(m, "not configured");
++}
++#else  /* CONFIG_NFS_V4_1 */
++void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ /*
+  * Present statistical information for this VFS mountpoint
+@@ -707,6 +730,8 @@ static int nfs_show_stats(struct seq_fil
+ 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ 		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
++		show_sessions(m, nfss);
++		show_pnfs(m, nfss);
+ 	}
+ #endif
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
+@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
+ 	struct nfs_unlinkdata *data = calldata;
+ 	struct nfs_server *server = NFS_SERVER(data->dir);
+ 
+-	if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
++	if (nfs4_setup_sequence(server, NULL, &data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ #include <linux/nfs_page.h>
+ #include <linux/backing-dev.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -28,6 +29,7 @@
+ #include "iostat.h"
+ #include "nfs4_fs.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+ 
+@@ -59,6 +61,7 @@ struct nfs_write_data *nfs_commitdata_al
+ 	}
+ 	return p;
+ }
++EXPORT_SYMBOL(nfs_commitdata_alloc);
+ 
+ void nfs_commit_free(struct nfs_write_data *p)
+ {
+@@ -66,6 +69,7 @@ void nfs_commit_free(struct nfs_write_da
+ 		kfree(p->pagevec);
+ 	mempool_free(p, nfs_commit_mempool);
+ }
++EXPORT_SYMBOL(nfs_commit_free);
+ 
+ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+ {
+@@ -418,6 +422,17 @@ static void nfs_inode_remove_request(str
+ 	nfs_clear_request(req);
+ 	nfs_release_request(req);
+ }
++static void
++nfs_mark_request_nopnfs(struct nfs_page *req)
++{
++	struct pnfs_layout_segment *lseg = req->wb_lseg;
++
++	if (req->wb_lseg == NULL)
++		return;
++	req->wb_lseg = NULL;
++	put_lseg(lseg);
++	dprintk(" retry through MDS\n");
++}
+ 
+ static void
+ nfs_mark_request_dirty(struct nfs_page *req)
+@@ -523,7 +538,7 @@ nfs_need_commit(struct nfs_inode *nfsi)
+  * The requests are *not* checked to ensure that they form a contiguous set.
+  */
+ static int
+-nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
++nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int *use_pnfs)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	int ret;
+@@ -531,7 +546,8 @@ nfs_scan_commit(struct inode *inode, str
+ 	if (!nfs_need_commit(nfsi))
+ 		return 0;
+ 
+-	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
++	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT,
++			    use_pnfs);
+ 	if (ret > 0)
+ 		nfsi->ncommit -= ret;
+ 	if (nfs_need_commit(NFS_I(inode)))
+@@ -560,7 +576,8 @@ static inline int nfs_scan_commit(struct
+ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+ 		struct page *page,
+ 		unsigned int offset,
+-		unsigned int bytes)
++		unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page *req;
+ 	unsigned int rqend;
+@@ -585,8 +602,8 @@ static struct nfs_page *nfs_try_to_updat
+ 		 * Note: nfs_flush_incompatible() will already
+ 		 * have flushed out requests having wrong owners.
+ 		 */
+-		if (offset > rqend
+-		    || end < req->wb_offset)
++		if (offset > rqend || end < req->wb_offset ||
++		    req->wb_lseg != lseg)
+ 			goto out_flushme;
+ 
+ 		if (nfs_set_page_tag_locked(req))
+@@ -634,16 +651,17 @@ out_err:
+  * already called nfs_flush_incompatible() if necessary.
+  */
+ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+-		struct page *page, unsigned int offset, unsigned int bytes)
++		struct page *page, unsigned int offset, unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	struct nfs_page	*req;
+ 	int error;
+ 
+-	req = nfs_try_to_update_request(inode, page, offset, bytes);
++	req = nfs_try_to_update_request(inode, page, offset, bytes, lseg);
+ 	if (req != NULL)
+ 		goto out;
+-	req = nfs_create_request(ctx, inode, page, offset, bytes);
++	req = nfs_create_request(ctx, inode, page, offset, bytes, lseg);
+ 	if (IS_ERR(req))
+ 		goto out;
+ 	error = nfs_inode_add_request(inode, req);
+@@ -656,23 +674,27 @@ out:
+ }
+ 
+ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+-		unsigned int offset, unsigned int count)
++			       unsigned int offset, unsigned int count,
++			       struct pnfs_layout_segment *lseg,
++			       void *fsdata)
+ {
+ 	struct nfs_page	*req;
+ 
+-	req = nfs_setup_write_request(ctx, page, offset, count);
++	req = nfs_setup_write_request(ctx, page, offset, count, lseg);
+ 	if (IS_ERR(req))
+ 		return PTR_ERR(req);
+ 	nfs_mark_request_dirty(req);
+ 	/* Update file length */
+-	nfs_grow_file(page, offset, count);
++	if (pnfs_grow_ok(lseg, fsdata))
++		nfs_grow_file(page, offset, count);
+ 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	return 0;
+ }
+ 
+-int nfs_flush_incompatible(struct file *file, struct page *page)
++int nfs_flush_incompatible(struct file *file, struct page *page,
++			   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct nfs_page	*req;
+@@ -689,7 +711,10 @@ int nfs_flush_incompatible(struct file *
+ 		req = nfs_page_find_request(page);
+ 		if (req == NULL)
+ 			return 0;
+-		do_flush = req->wb_page != page || req->wb_context != ctx;
++		do_flush = req->wb_page != page || req->wb_context != ctx ||
++			req->wb_lock_context->lockowner != current->files ||
++			req->wb_lock_context->pid != current->tgid ||
++			req->wb_lseg != lseg;
+ 		nfs_release_request(req);
+ 		if (!do_flush)
+ 			return 0;
+@@ -716,7 +741,8 @@ static int nfs_write_pageuptodate(struct
+  * things with a page scheduled for an RPC call (e.g. invalidate it).
+  */
+ int nfs_updatepage(struct file *file, struct page *page,
+-		unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg, void *fsdata)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct inode	*inode = page->mapping->host;
+@@ -741,7 +767,7 @@ int nfs_updatepage(struct file *file, st
+ 		offset = 0;
+ 	}
+ 
+-	status = nfs_writepage_setup(ctx, page, offset, count);
++	status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata);
+ 	if (status < 0)
+ 		nfs_set_pageerror(page);
+ 
+@@ -771,25 +797,21 @@ static int flush_task_priority(int how)
+ 	return RPC_PRIORITY_NORMAL;
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_write_rpcsetup(struct nfs_page *req,
+-		struct nfs_write_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset,
+-		int how)
++int nfs_initiate_write(struct nfs_write_data *data,
++		       struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops,
++		       int how)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.task = &data->task,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+@@ -800,12 +822,62 @@ static int nfs_write_rpcsetup(struct nfs
+ 	};
+ 	int ret = 0;
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->write_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	if (how & FLUSH_SYNC) {
++		ret = rpc_wait_for_completion_task(task);
++		if (ret == 0)
++			ret = task->tk_status;
++	}
++	rpc_put_task(task);
++out:
++	return ret;
++}
++EXPORT_SYMBOL(nfs_initiate_write);
++
++int pnfs_initiate_write(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_write(data, clnt, call_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_write_rpcsetup(struct nfs_page *req,
++		struct nfs_write_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset,
++		int how)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	data->req = req;
+ 	data->inode = inode = req->wb_context->path.dentry->d_inode;
+-	data->cred = msg.rpc_cred;
++	data->cred = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -813,6 +885,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 	data->args.stable  = NFS_UNSTABLE;
+ 	if (how & FLUSH_STABLE) {
+ 		data->args.stable = NFS_DATA_SYNC;
+@@ -825,30 +898,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated write call "
+-		"(req %s/%lld, %u bytes @ offset %llu)\n",
+-		data->task.tk_pid,
+-		inode->i_sb->s_id,
+-		(long long)NFS_FILEID(inode),
+-		count,
+-		(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task)) {
+-		ret = PTR_ERR(task);
+-		goto out;
+-	}
+-	if (how & FLUSH_SYNC) {
+-		ret = rpc_wait_for_completion_task(task);
+-		if (ret == 0)
+-			ret = task->tk_status;
+-	}
+-	rpc_put_task(task);
+-out:
+-	return ret;
++	return pnfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ }
+ 
+ /* If a nfs_flush_* function fails, it should remove reqs from @head and
+@@ -859,6 +909,7 @@ static void nfs_redirty_request(struct n
+ {
+ 	struct page *page = req->wb_page;
+ 
++	nfs_mark_request_nopnfs(req);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	nfs_end_page_writeback(page);
+@@ -971,6 +1022,10 @@ static void nfs_pageio_init_write(struct
+ {
+ 	size_t wsize = NFS_SERVER(inode)->wsize;
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_write(pgio, inode, &wsize);
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	if (wsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
+ 	else
+@@ -1036,13 +1091,27 @@ out:
+ void nfs_write_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_write_data *data = calldata;
+-	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(clp, &data->args.seq_args,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	} else if (data->args.count > NFS_SERVER(data->inode)->wsize) {
++		/* retrying via MDS? */
++		data->pdata.orig_count = data->args.count;
++		data->args.count = NFS_SERVER(data->inode)->wsize;
++		dprintk("%s: trimmed count %u to wsize %u\n", __func__,
++		data->pdata.orig_count, data->args.count);
++	} else
++		data->pdata.orig_count = 0;
++
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
++				&data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_write_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_write_partial_ops = {
+@@ -1126,10 +1195,11 @@ int nfs_writeback_done(struct rpc_task *
+ 	struct nfs_writeargs	*argp = &data->args;
+ 	struct nfs_writeres	*resp = &data->res;
+ 	struct nfs_server	*server = NFS_SERVER(data->inode);
++	struct nfs_client	*clp = server->nfs_client;
+ 	int status;
+ 
+-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
+-		task->tk_pid, task->tk_status);
++	dprintk("NFS: %5u nfs_writeback_done (status %d count %u)\n",
++		task->tk_pid, task->tk_status, resp->count);
+ 
+ 	/*
+ 	 * ->write_done will attempt to use post-op attributes to detect
+@@ -1142,6 +1212,13 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (status != 0)
+ 		return status;
+ 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
++#ifdef CONFIG_NFS_V4_1
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+@@ -1158,7 +1235,7 @@ int nfs_writeback_done(struct rpc_task *
+ 		if (time_before(complain, jiffies)) {
+ 			dprintk("NFS:       faulty NFS server %s:"
+ 				" (committed = %d) != (stable = %d)\n",
+-				server->nfs_client->cl_hostname,
++				clp->cl_hostname,
+ 				resp->verf->committed, argp->stable);
+ 			complain = jiffies + 300 * HZ;
+ 		}
+@@ -1168,6 +1245,9 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (task->tk_status >= 0 && resp->count < argp->count) {
+ 		static unsigned long    complain;
+ 
++		dprintk("NFS:       short write:"
++			" (resp->count %u) < (argp->count = %u)\n",
++			resp->count, argp->count);
+ 		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ 
+ 		/* Has the server at least made some progress? */
+@@ -1184,7 +1264,10 @@ int nfs_writeback_done(struct rpc_task *
+ 				 */
+ 				argp->stable = NFS_FILE_SYNC;
+ 			}
+-			nfs_restart_rpc(task, server->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++			data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++			nfs_restart_rpc(task, clp);
+ 			return -EAGAIN;
+ 		}
+ 		if (time_before(complain, jiffies)) {
+@@ -1228,40 +1311,73 @@ static void nfs_commitdata_release(void 
+ 	nfs_commit_free(wdata);
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_commit_rpcsetup(struct list_head *head,
+-		struct nfs_write_data *data,
+-		int how)
++int nfs_initiate_commit(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
+ {
+-	struct nfs_page *first = nfs_list_entry(head->next);
+-	struct inode *inode = first->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = first->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+-		.callback_ops = &nfs_commit_ops,
++		.callback_ops = call_ops,
+ 		.callback_data = data,
+ 		.workqueue = nfsiod_workqueue,
+ 		.flags = RPC_TASK_ASYNC,
+ 		.priority = priority,
+ 	};
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->commit_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_commit);
++
++
++int pnfs_initiate_commit(struct nfs_write_data *data,
++			 struct rpc_clnt *clnt,
++			 const struct rpc_call_ops *call_ops,
++			 int how, int pnfs)
++{
++	if (pnfs &&
++	    (pnfs_try_to_commit(data, &nfs_commit_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_commit(data, clnt, &nfs_commit_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_commit_rpcsetup(struct list_head *head,
++		struct nfs_write_data *data,
++		int how, int pnfs)
++{
++	struct nfs_page *first = nfs_list_entry(head->next);
++	struct inode *inode = first->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	list_splice_init(head, &data->pages);
+ 
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = first->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(data->inode);
+ 	/* Note: we always request a commit of the entire inode */
+@@ -1272,45 +1388,47 @@ static int nfs_commit_rpcsetup(struct li
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
++	kref_init(&data->refcount);
++	data->parent      = NULL;
++	data->args.context = first->wb_context;  /* used by commit done */
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->commit_setup(data, &msg);
++	return pnfs_initiate_commit(data, NFS_CLIENT(inode), &nfs_commit_ops,
++				    how, pnfs);
++}
+ 
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++/* Handle memory error during commit */
++void nfs_mark_list_commit(struct list_head *head)
++{
++	struct nfs_page         *req;
+ 
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	while (!list_empty(head)) {
++		req = nfs_list_entry(head->next);
++		nfs_list_remove_request(req);
++		nfs_mark_request_commit(req);
++		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
++		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
++				BDI_RECLAIMABLE);
++		nfs_clear_page_tag_locked(req);
++	}
+ }
++EXPORT_SYMBOL(nfs_mark_list_commit);
+ 
+ /*
+  * Commit dirty pages
+  */
+ static int
+-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
++nfs_commit_list(struct inode *inode, struct list_head *head, int how, int pnfs)
+ {
+ 	struct nfs_write_data	*data;
+-	struct nfs_page         *req;
+ 
+ 	data = nfs_commitdata_alloc();
+-
+ 	if (!data)
+ 		goto out_bad;
+ 
+ 	/* Set up the argument struct */
+-	return nfs_commit_rpcsetup(head, data, how);
++	return nfs_commit_rpcsetup(head, data, how, pnfs);
+  out_bad:
+-	while (!list_empty(head)) {
+-		req = nfs_list_entry(head->next);
+-		nfs_list_remove_request(req);
+-		nfs_mark_request_commit(req);
+-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+-				BDI_RECLAIMABLE);
+-		nfs_clear_page_tag_locked(req);
+-	}
++	nfs_mark_list_commit(head);
+ 	nfs_commit_clear_lock(NFS_I(inode));
+ 	return -ENOMEM;
+ }
+@@ -1330,6 +1448,19 @@ static void nfs_commit_done(struct rpc_t
+ 		return;
+ }
+ 
++static inline void nfs_commit_cleanup(struct kref *kref)
++{
++	struct nfs_write_data *data;
++
++	data = container_of(kref, struct nfs_write_data, refcount);
++	/* Clear lock only when all cloned commits are finished */
++	if (data->parent)
++		kref_put(&data->parent->refcount, nfs_commit_cleanup);
++	else
++		nfs_commit_clear_lock(NFS_I(data->inode));
++	nfs_commitdata_release(data);
++}
++
+ static void nfs_commit_release(void *calldata)
+ {
+ 	struct nfs_write_data	*data = calldata;
+@@ -1347,6 +1478,11 @@ static void nfs_commit_release(void *cal
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 		if (status < 0) {
++			if (req->wb_lseg) {
++				nfs_mark_request_nopnfs(req);
++				nfs_mark_request_dirty(req);
++				goto next;
++			}
+ 			nfs_context_set_write_error(req->wb_context, status);
+ 			nfs_inode_remove_request(req);
+ 			dprintk(", error = %d\n", status);
+@@ -1363,12 +1499,12 @@ static void nfs_commit_release(void *cal
+ 		}
+ 		/* We have a mismatch. Write the page again */
+ 		dprintk(" mismatch\n");
++		nfs_mark_request_nopnfs(req);
+ 		nfs_mark_request_dirty(req);
+ 	next:
+ 		nfs_clear_page_tag_locked(req);
+ 	}
+-	nfs_commit_clear_lock(NFS_I(data->inode));
+-	nfs_commitdata_release(calldata);
++	kref_put(&data->refcount, nfs_commit_cleanup);
+ }
+ 
+ static const struct rpc_call_ops nfs_commit_ops = {
+@@ -1384,21 +1520,22 @@ int nfs_commit_inode(struct inode *inode
+ 	LIST_HEAD(head);
+ 	int may_wait = how & FLUSH_SYNC;
+ 	int res = 0;
++	int use_pnfs = 0;
+ 
+ 	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ 		goto out_mark_dirty;
+ 	spin_lock(&inode->i_lock);
+-	res = nfs_scan_commit(inode, &head, 0, 0);
++	res = nfs_scan_commit(inode, &head, 0, 0, &use_pnfs);
+ 	spin_unlock(&inode->i_lock);
+ 	if (res) {
+-		int error = nfs_commit_list(inode, &head, how);
++		int error = nfs_commit_list(inode, &head, how, use_pnfs);
+ 		if (error < 0)
+ 			return error;
+-		if (may_wait)
++		if (may_wait) {
+ 			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+ 					nfs_wait_bit_killable,
+ 					TASK_KILLABLE);
+-		else
++		} else
+ 			goto out_mark_dirty;
+ 	} else
+ 		nfs_commit_clear_lock(NFS_I(inode));
+@@ -1451,7 +1588,18 @@ static int nfs_commit_unstable_pages(str
+ 
+ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ {
+-	return nfs_commit_unstable_pages(inode, wbc);
++	int ret;
++	ret = nfs_commit_unstable_pages(inode, wbc);
++	if (ret >= 0 && layoutcommit_needed(NFS_I(inode))) {
++		int err, sync = wbc->sync_mode;
++
++		if (wbc->nonblocking || wbc->for_background)
++			sync = 0;
++		err = pnfs_layoutcommit_inode(inode, sync);
++		if (err < 0)
++			ret = err;
++	}
++	return ret;
+ }
+ 
+ /*
+@@ -1459,6 +1607,7 @@ int nfs_write_inode(struct inode *inode,
+  */
+ int nfs_wb_all(struct inode *inode)
+ {
++	int ret;
+ 	struct writeback_control wbc = {
+ 		.sync_mode = WB_SYNC_ALL,
+ 		.nr_to_write = LONG_MAX,
+@@ -1466,7 +1615,8 @@ int nfs_wb_all(struct inode *inode)
+ 		.range_end = LLONG_MAX,
+ 	};
+ 
+-	return sync_inode(inode, &wbc);
++	ret = sync_inode(inode, &wbc);
++	return ret;
+ }
+ 
+ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
+--- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
+@@ -2,6 +2,7 @@
+ #define LINUX_EXPORTFS_H 1
+ 
+ #include <linux/types.h>
++#include <linux/exp_xdr.h>
+ 
+ struct dentry;
+ struct inode;
+@@ -175,4 +176,62 @@ extern struct dentry *generic_fh_to_pare
+ 	struct fid *fid, int fh_len, int fh_type,
+ 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+ 
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct pnfs_filelayout_device;
++struct pnfs_filelayout_layout;
++
++extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				     const struct pnfs_filelayout_device *fdev);
++extern enum nfsstat4 filelayout_encode_layout(struct exp_xdr_stream *xdr,
++				      const struct pnfs_filelayout_layout *flp);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct list_head;
++
++extern int blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				      const struct list_head *volumes);
++
++extern enum nfsstat4 blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++					       const struct list_head *layouts);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_PNFSD)
++#include <linux/module.h>
++
++struct pnfsd_cb_operations;
++
++struct pnfsd_cb_ctl {
++	spinlock_t lock;
++	struct module *module;
++	const struct pnfsd_cb_operations *cb_op;
++};
++
++/* in expfs.c so that file systems can depend on it */
++extern struct pnfsd_cb_ctl pnfsd_cb_ctl;
++
++static inline int
++pnfsd_get_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	int ret = -ENOENT;
++
++	spin_lock(&pnfsd_cb_ctl.lock);
++	if (!pnfsd_cb_ctl.cb_op)
++		goto out;
++	if (!try_module_get(pnfsd_cb_ctl.module))
++		goto out;
++	ctl->cb_op = pnfsd_cb_ctl.cb_op;
++	ctl->module = pnfsd_cb_ctl.module;
++	ret = 0;
++out:
++	spin_unlock(&pnfsd_cb_ctl.lock);
++	return ret;
++}
++
++static inline void
++pnfsd_put_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	module_put(ctl->module);
++}
++#endif /* CONFIG_PNFSD */
+ #endif /* LINUX_EXPORTFS_H */
+diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
+@@ -0,0 +1,141 @@
++#ifndef _LINUX_EXP_XDR_H
++#define _LINUX_EXP_XDR_H
++
++#include <asm/byteorder.h>
++#include <asm/unaligned.h>
++#include <linux/string.h>
++
++struct exp_xdr_stream {
++	__be32 *p;
++	__be32 *end;
++};
++
++/**
++ * exp_xdr_qwords - Calculate the number of quad-words holding nbytes
++ * @nbytes: number of bytes to encode
++ */
++static inline size_t
++exp_xdr_qwords(__u32 nbytes)
++{
++	return DIV_ROUND_UP(nbytes, 4);
++}
++
++/**
++ * exp_xdr_qbytes - Calculate the number of bytes holding qwords
++ * @qwords: number of quad-words to encode
++ */
++static inline size_t
++exp_xdr_qbytes(size_t qwords)
++{
++	return qwords << 2;
++}
++
++/**
++ * exp_xdr_reserve_space - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nbytes: number of bytes to reserve
++ *
++ * Checks that we have enough buffer space to encode 'nbytes' more
++ * bytes of data. If so, update the xdr stream.
++ */
++static inline __be32 *
++exp_xdr_reserve_space(struct exp_xdr_stream *xdr, size_t nbytes)
++{
++	__be32 *p = xdr->p;
++	__be32 *q;
++
++	/* align nbytes on the next 32-bit boundary */
++	q = p + exp_xdr_qwords(nbytes);
++	if (unlikely(q > xdr->end || q < p))
++		return NULL;
++	xdr->p = q;
++	return p;
++}
++
++/**
++ * exp_xdr_reserve_qwords - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nwords: number of quad words (u32's) to reserve
++ */
++static inline __be32 *
++exp_xdr_reserve_qwords(struct exp_xdr_stream *xdr, size_t qwords)
++{
++	return exp_xdr_reserve_space(xdr, exp_xdr_qbytes(qwords));
++}
++
++/**
++ * exp_xdr_encode_u32 - Encode an unsigned 32-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u32(__be32 *p, __u32 val)
++{
++	*p = cpu_to_be32(val);
++	return p + 1;
++}
++
++/**
++ * exp_xdr_encode_u64 - Encode an unsigned 64-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u64(__be32 *p, __u64 val)
++{
++	put_unaligned_be64(val, p);
++	return p + 2;
++}
++
++/**
++ * exp_xdr_encode_bytes - Encode an array of bytes onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the array of bytes
++ * @nbytes: number of bytes to encode
++ */
++static inline __be32 *
++exp_xdr_encode_bytes(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	if (likely(nbytes != 0)) {
++		unsigned int qwords = exp_xdr_qwords(nbytes);
++		unsigned int padding = exp_xdr_qbytes(qwords) - nbytes;
++
++		memcpy(p, ptr, nbytes);
++		if (padding != 0)
++			memset((char *)p + nbytes, 0, padding);
++		p += qwords;
++	}
++	return p;
++}
++
++/**
++ * exp_xdr_encode_opaque - Encode an opaque type onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the opaque array
++ * @nbytes: number of bytes to encode
++ *
++ * Encodes the 32-bit opaque size in bytes followed by the opaque value.
++ */
++static inline __be32 *
++exp_xdr_encode_opaque(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	p = exp_xdr_encode_u32(p, nbytes);
++	return exp_xdr_encode_bytes(p, ptr, nbytes);
++}
++
++/**
++ * exp_xdr_encode_opaque_qlen - Encode the opaque length onto a xdr stream
++ * @lenp: pointer to the opaque length destination
++ * @endp: pointer to the end of the opaque array
++ *
++ * Encodes the 32-bit opaque size in bytes given the start and end pointers
++ */
++static inline __be32 *
++exp_xdr_encode_opaque_len(__be32 *lenp, const void *endp)
++{
++	size_t nbytes = (char *)endp - (char *)(lenp + 1);
++
++	exp_xdr_encode_u32(lenp, nbytes);
++	return lenp + 1 + exp_xdr_qwords(nbytes);
++}
++#endif /* _LINUX_EXP_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
+@@ -387,6 +387,7 @@ struct inodes_stat_t {
+ #include <asm/byteorder.h>
+ 
+ struct export_operations;
++struct pnfs_export_operations;
+ struct hd_geometry;
+ struct iovec;
+ struct nameidata;
+@@ -1329,6 +1330,7 @@ struct super_block {
+ 	const struct dquot_operations	*dq_op;
+ 	const struct quotactl_ops	*s_qcop;
+ 	const struct export_operations *s_export_op;
++	const struct pnfs_export_operations *s_pnfs_op;
+ 	unsigned long		s_flags;
+ 	unsigned long		s_magic;
+ 	struct dentry		*s_root;
+diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
+--- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
+@@ -17,7 +17,10 @@
+ 
+ #define NFS4_BITMAP_SIZE	2
+ #define NFS4_VERIFIER_SIZE	8
+-#define NFS4_STATEID_SIZE	16
++#define NFS4_CLIENTID_SIZE	8
++#define NFS4_STATEID_SEQID_SIZE 4
++#define NFS4_STATEID_OTHER_SIZE 12
++#define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
+ #define NFS4_FHSIZE		128
+ #define NFS4_MAXPATHLEN		PATH_MAX
+ #define NFS4_MAXNAMLEN		NAME_MAX
+@@ -119,6 +122,13 @@
+ #define EXCHGID4_FLAG_MASK_A			0x40070003
+ #define EXCHGID4_FLAG_MASK_R			0x80070003
+ 
++static inline bool
++is_ds_only_session(u32 exchange_flags)
++{
++	u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;
++	return (exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
++}
++
+ #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED	0x00000004
+@@ -166,8 +176,25 @@ struct nfs4_acl {
+ 	struct nfs4_ace	aces[0];
+ };
+ 
++struct nfs4_fsid {
++	u64	major;
++	u64	minor;
++};
++
+ typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
+-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
++typedef struct { char data[NFS4_CLIENTID_SIZE]; } nfs4_clientid;
++
++struct nfs41_stateid {
++	__be32 seqid;
++	char other[NFS4_STATEID_OTHER_SIZE];
++} __attribute__ ((packed));
++
++typedef struct {
++	union {
++		char data[NFS4_STATEID_SIZE];
++		struct nfs41_stateid stateid;
++	} u;
++} nfs4_stateid;
+ 
+ enum nfs_opnum4 {
+ 	OP_ACCESS = 3,
+@@ -471,6 +498,8 @@ enum lock_type4 {
+ #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
+ #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
+ #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
++#define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
++#define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+ 
+ #define NFSPROC4_NULL 0
+ #define NFSPROC4_COMPOUND 1
+@@ -523,6 +552,7 @@ enum {
+ 	NFSPROC4_CLNT_GETACL,
+ 	NFSPROC4_CLNT_SETACL,
+ 	NFSPROC4_CLNT_FS_LOCATIONS,
++	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
+ 
+ 	/* nfs41 */
+ 	NFSPROC4_CLNT_EXCHANGE_ID,
+@@ -531,6 +561,13 @@ enum {
+ 	NFSPROC4_CLNT_SEQUENCE,
+ 	NFSPROC4_CLNT_GET_LEASE_TIME,
+ 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
++	NFSPROC4_CLNT_PNFS_LAYOUTGET,
++	NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT,
++	NFSPROC4_CLNT_PNFS_LAYOUTRETURN,
++	NFSPROC4_CLNT_PNFS_GETDEVICELIST,
++	NFSPROC4_CLNT_PNFS_GETDEVICEINFO,
++	NFSPROC4_CLNT_PNFS_WRITE,
++	NFSPROC4_CLNT_PNFS_COMMIT,
+ };
+ 
+ /* nfs41 types */
+@@ -549,6 +586,43 @@ enum state_protect_how4 {
+ 	SP4_SSV		= 2
+ };
+ 
++enum pnfs_layouttype {
++	LAYOUT_NFSV4_1_FILES  = 1,
++	LAYOUT_OSD2_OBJECTS = 2,
++	LAYOUT_BLOCK_VOLUME = 3,
++};
++
++/* used for both layout return and recall */
++enum pnfs_layoutreturn_type {
++	RETURN_FILE = 1,
++	RETURN_FSID = 2,
++	RETURN_ALL  = 3
++};
++
++enum pnfs_iomode {
++	IOMODE_READ = 1,
++	IOMODE_RW = 2,
++	IOMODE_ANY = 3,
++};
++
++enum pnfs_notify_deviceid_type4 {
++	NOTIFY_DEVICEID4_CHANGE = 1 << 1,
++	NOTIFY_DEVICEID4_DELETE = 1 << 2,
++};
++
++#define NFL4_UFLG_MASK			0x0000003F
++#define NFL4_UFLG_DENSE			0x00000001
++#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
++#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
++
++/* Encoded in the loh_body field of type layouthint4 */
++enum filelayout_hint_care4 {
++	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
++	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
++	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
++	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
++};
++
+ #endif
+ #endif
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
+@@ -0,0 +1,330 @@
++/*
++ *  include/linux/nfs4_pnfs.h
++ *
++ *  Common data structures needed by the pnfs client and pnfs layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_NFS4_PNFS_H
++#define LINUX_NFS4_PNFS_H
++
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs_page.h>
++
++enum pnfs_try_status {
++	PNFS_ATTEMPTED     = 0,
++	PNFS_NOT_ATTEMPTED = 1,
++};
++
++#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
++
++/* Per-layout driver specific registration structure */
++struct pnfs_layoutdriver_type {
++	const u32 id;
++	const char *name;
++	struct layoutdriver_io_operations *ld_io_ops;
++	struct layoutdriver_policy_operations *ld_policy_ops;
++};
++
++struct pnfs_fsdata {
++	int bypass_eof;
++	struct pnfs_layout_segment *lseg;
++	void *private;
++};
++
++#if defined(CONFIG_NFS_V4_1)
++
++static inline struct nfs_inode *
++PNFS_NFS_INODE(struct pnfs_layout_type *lo)
++{
++	return NFS_I(lo->lo_inode);
++}
++
++static inline struct inode *
++PNFS_INODE(struct pnfs_layout_type *lo)
++{
++	return lo->lo_inode;
++}
++
++static inline struct nfs_server *
++PNFS_NFS_SERVER(struct pnfs_layout_type *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo));
++}
++
++static inline struct pnfs_layoutdriver_type *
++PNFS_LD(struct pnfs_layout_type *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo))->pnfs_curr_ld;
++}
++
++static inline struct layoutdriver_io_operations *
++PNFS_LD_IO_OPS(struct pnfs_layout_type *lo)
++{
++	return PNFS_LD(lo)->ld_io_ops;
++}
++
++static inline struct layoutdriver_policy_operations *
++PNFS_LD_POLICY_OPS(struct pnfs_layout_type *lo)
++{
++	return PNFS_LD(lo)->ld_policy_ops;
++}
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return nfsi->layout != NULL;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return has_layout(nfsi) &&
++	       test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state);
++}
++
++extern void put_lseg(struct pnfs_layout_segment *lseg);
++extern void get_lseg(struct pnfs_layout_segment *lseg);
++
++#else /* CONFIG_NFS_V4_1 */
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return false;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++struct pnfs_layout_segment {
++	struct list_head fi_list;
++	struct nfs4_pnfs_layout_segment range;
++	struct kref kref;
++	bool valid;
++	struct pnfs_layout_type *layout;
++	struct nfs4_deviceid *deviceid;
++	u8 ld_data[];			/* layout driver private data */
++};
++
++static inline void *
++LSEG_LD_DATA(struct pnfs_layout_segment *lseg)
++{
++	return lseg->ld_data;
++}
++
++/* Layout driver I/O operations.
++ * Either the pagecache or non-pagecache read/write operations must be implemented
++ */
++struct layoutdriver_io_operations {
++	/* Functions that use the pagecache.
++	 * If use_pagecache == 1, then these functions must be implemented.
++	 */
++	/* read and write pagelist should return just 0 (to indicate that
++	 * the layout code has taken control) or 1 (to indicate that the
++	 * layout code wishes to fall back to normal nfs.)  If 0 is returned,
++	 * information can be passed back through nfs_data->res and
++	 * nfs_data->task.tk_status, and the appropriate pnfs done function
++	 * MUST be called.
++	 */
++	enum pnfs_try_status
++	(*read_pagelist) (struct nfs_read_data *nfs_data, unsigned nr_pages);
++	enum pnfs_try_status
++	(*write_pagelist) (struct nfs_write_data *nfs_data, unsigned nr_pages, int how);
++	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
++			    loff_t pos, unsigned count,
++			    struct pnfs_fsdata *fsdata);
++	int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
++			 unsigned count, unsigned copied,
++			 struct pnfs_layout_segment *lseg);
++	void (*write_end_cleanup)(struct file *filp,
++				  struct pnfs_fsdata *fsdata);
++
++	/* Consistency ops */
++	/* 2 problems:
++	 * 1) the page list contains nfs_pages, NOT pages
++	 * 2) currently the NFS code doesn't create a page array (as it does with read/write)
++	 */
++	enum pnfs_try_status
++	(*commit) (struct nfs_write_data *nfs_data, int how);
++
++	/* Layout information. For each inode, alloc_layout is executed once to retrieve an
++	 * inode specific layout structure.  Each subsequent layoutget operation results in
++	 * a set_layout call to set the opaque layout in the layout driver.*/
++	struct pnfs_layout_type * (*alloc_layout) (struct inode *inode);
++	void (*free_layout) (struct pnfs_layout_type *);
++	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_type *layoutid, struct nfs4_pnfs_layoutget_res *lgr);
++	void (*free_lseg) (struct pnfs_layout_segment *lseg);
++
++	int (*setup_layoutcommit) (struct pnfs_layout_type *layoutid,
++				struct pnfs_layoutcommit_arg *args);
++
++	void (*encode_layoutcommit) (struct pnfs_layout_type *layoutid,
++				struct xdr_stream *xdr,
++				const struct pnfs_layoutcommit_arg *args);
++	void (*cleanup_layoutcommit) (struct pnfs_layout_type *layoutid,
++				      struct pnfs_layoutcommit_arg *args,
++				      int status);
++	void (*encode_layoutreturn) (struct pnfs_layout_type *layoutid,
++				struct xdr_stream *xdr,
++				const struct nfs4_pnfs_layoutreturn_arg *args);
++
++	/* Registration information for a new mounted file system
++	 */
++	int (*initialize_mountpoint) (struct nfs_server *,
++				      const struct nfs_fh * mntfh);
++	int (*uninitialize_mountpoint) (struct nfs_server *server);
++};
++
++enum layoutdriver_policy_flags {
++	/* Should the full nfs rpc cleanup code be used after io */
++	PNFS_USE_RPC_CODE		= 1 << 0,
++
++	/* Should the NFS req. gather algorithm cross stripe boundaries? */
++	PNFS_GATHER_ACROSS_STRIPES	= 1 << 1,
++
++	/* Should the pNFS client commit and return the layout upon a setattr */
++	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 3,
++};
++
++struct layoutdriver_policy_operations {
++	unsigned flags;
++
++	/* The stripe size of the file system */
++	ssize_t (*get_stripesize) (struct pnfs_layout_type *layoutid);
++
++	/* test for nfs page cache coalescing */
++	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++
++	/* Retreive the block size of the file system.
++	 * If gather_across_stripes == 1, then the file system will gather
++	 * requests into the block size.
++	 * TODO: Where will the layout driver get this info?  It is hard
++	 * coded in PVFS2.
++	 */
++	ssize_t (*get_blocksize) (void);
++};
++
++/* Should the full nfs rpc cleanup code be used after io */
++static inline int
++pnfs_ld_use_rpc_code(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_USE_RPC_CODE;
++}
++
++/* Should the NFS req. gather algorithm cross stripe boundaries? */
++static inline int
++pnfs_ld_gather_across_stripes(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_GATHER_ACROSS_STRIPES;
++}
++
++struct pnfs_device {
++	struct pnfs_deviceid dev_id;
++	unsigned int  layout_type;
++	unsigned int  mincount;
++	struct page **pages;
++	void          *area;
++	unsigned int  pgbase;
++	unsigned int  pglen;
++	unsigned int  dev_notify_types;
++};
++
++struct pnfs_devicelist {
++	unsigned int		eof;
++	unsigned int		num_devs;
++	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
++};
++
++/*
++ * Device ID RCU cache. A device ID is unique per client ID and layout type.
++ */
++#define NFS4_DEVICE_ID_HASH_BITS	5
++#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
++#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
++
++static inline u32
++nfs4_deviceid_hash(struct pnfs_deviceid *id)
++{
++	unsigned char *cptr = (unsigned char *)id->data;
++	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
++	u32 x = 0;
++
++	while (nbytes--) {
++		x *= 37;
++		x += *cptr++;
++	}
++	return x & NFS4_DEVICE_ID_HASH_MASK;
++}
++
++struct nfs4_deviceid_cache {
++	spinlock_t		dc_lock;
++	struct kref		dc_kref;
++	void			(*dc_free_callback)(struct kref *);
++	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
++};
++
++/* Device ID cache node */
++struct nfs4_deviceid {
++	struct hlist_node	de_node;
++	struct pnfs_deviceid	de_id;
++	struct kref		de_kref;
++};
++
++extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_put_deviceid_cache(struct nfs_client *);
++extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
++extern struct nfs4_deviceid *nfs4_find_deviceid(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++extern struct nfs4_deviceid *nfs4_add_deviceid(struct nfs4_deviceid_cache *,
++				struct nfs4_deviceid *);
++extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *);
++extern void nfs4_unset_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_delete_device(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++
++/* pNFS client callback functions.
++ * These operations allow the layout driver to access pNFS client
++ * specific information or call pNFS client->server operations.
++ * E.g., getdeviceinfo, I/O callbacks, etc
++ */
++struct pnfs_client_operations {
++	int (*nfs_getdevicelist) (struct nfs_server *,
++				  const struct nfs_fh *fh,
++				  struct pnfs_devicelist *devlist);
++	int (*nfs_getdeviceinfo) (struct nfs_server *,
++				  struct pnfs_device *dev);
++
++	/* Post read callback. */
++	void (*nfs_readlist_complete) (struct nfs_read_data *nfs_data);
++
++	/* Post write callback. */
++	void (*nfs_writelist_complete) (struct nfs_write_data *nfs_data);
++
++	/* Post commit callback. */
++	void (*nfs_commit_complete) (struct nfs_write_data *nfs_data);
++	void (*nfs_return_layout) (struct inode *);
++};
++
++extern struct pnfs_client_operations pnfs_ops;
++
++extern struct pnfs_client_operations *pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
++extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
++
++#define NFS4_PNFS_MAX_LAYOUTS 4
++#define NFS4_PNFS_PRIVATE_LAYOUT 0x80000000
++
++#endif /* LINUX_NFS4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
+@@ -0,0 +1,101 @@
++#ifndef NFSD4_BLOCK
++#define NFSD4_BLOCK
++
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#define PNFS_BLOCK_SUCCESS		1
++#define PNFS_BLOCK_FAILURE		0
++
++#define PNFS_BLOCK_CTL_START		1
++#define PNFS_BLOCK_CTL_STOP		2
++#define PNFS_BLOCK_CTL_VERS		3 /* Allows daemon to request current
++					   * version from kernel via an upcall.
++					   */
++
++#define PNFS_UPCALL_MSG_STOP	0
++#define PNFS_UPCALL_MSG_GETSIG	1
++#define PNFS_UPCALL_MSG_GETSLICE	2
++#define PNFS_UPCALL_MSG_DMCHK	3	// See if dev_t is a DM volume
++#define PNFS_UPCALL_MSG_DMGET	4
++#define PNFS_UPCALL_MSG_VERS	5
++
++#define PNFS_UPCALL_VERS		8
++
++typedef struct stripe_dev {
++	int	major,
++		minor,
++		offset;
++} stripe_dev_t;
++
++typedef struct bl_comm_res {
++	int				res_status;
++	union {
++		struct {
++			long long	start,
++					length;
++		} slice;
++		struct {
++			int		num_stripes,
++					stripe_size;
++			stripe_dev_t	devs[];
++		} stripe;
++		struct {
++			long long	sector;
++			int		offset,
++					len;
++			char		sig[];
++		} sig;
++		int			vers,
++					dm_vol;
++	} u;
++} bl_comm_res_t;
++
++typedef struct bl_comm_msg {
++	int		msg_type,
++			msg_status;
++	union {
++		dev_t	msg_dev;
++		int	msg_vers;
++	} u;
++	bl_comm_res_t	*msg_res;
++} bl_comm_msg_t;
++
++#ifdef __KERNEL__
++
++typedef struct bl_comm {
++	/* ---- protects access to this structure ---- */
++	struct mutex		lock;
++	/* ---- protects access to rpc pipe ---- */
++	struct mutex		pipe_lock;
++	struct dentry		*pipe_dentry;
++	wait_queue_head_t	pipe_wq;
++	bl_comm_msg_t		msg;
++} bl_comm_t;
++
++int pnfs_block_enabled(struct inode *, int);
++int bl_layout_type(struct super_block *sb);
++int bl_getdeviceiter(struct super_block *, u32 layout_type,
++		     struct nfsd4_pnfs_dev_iter_res *);
++int bl_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++		     u32 layout_type,
++		     const struct nfsd4_pnfs_deviceid *);
++enum nfsstat4 bl_layoutget(struct inode *, struct exp_xdr_stream *,
++			   const struct nfsd4_pnfs_layoutget_arg *,
++			   struct nfsd4_pnfs_layoutget_res *);
++int bl_layoutcommit(struct inode *,
++		    const struct nfsd4_pnfs_layoutcommit_arg *,
++		    struct nfsd4_pnfs_layoutcommit_res *);
++int bl_layoutreturn(struct inode *,
++		    const struct nfsd4_pnfs_layoutreturn_arg *);
++int bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len);
++int bl_init_proc(void);
++int bl_upcall(bl_comm_t *, bl_comm_msg_t *, bl_comm_res_t **);
++
++extern bl_comm_t	*bl_comm_global;	// Ugly...
++#endif /* __KERNEL__ */
++
++#endif /* NFSD4_BLOCK */
++
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
+@@ -0,0 +1,345 @@
++/*
++ * include/linux/nfsd4_spnfs.h
++ *
++ * spNFS - simple pNFS implementation with userspace daemon
++ *
++ */
++
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#ifndef NFS_SPNFS_H
++#define NFS_SPNFS_H
++
++
++#ifdef __KERNEL__
++#include "exportfs.h"
++#include "sunrpc/svc.h"
++#include "nfsd/nfsfh.h"
++#else
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#define SPNFS_STATUS_INVALIDMSG		0x01
++#define SPNFS_STATUS_AGAIN		0x02
++#define SPNFS_STATUS_FAIL		0x04
++#define SPNFS_STATUS_SUCCESS		0x08
++
++#define SPNFS_TYPE_LAYOUTGET		0x01
++#define SPNFS_TYPE_LAYOUTCOMMIT		0x02
++#define SPNFS_TYPE_LAYOUTRETURN		0x03
++#define SPNFS_TYPE_GETDEVICEITER	0x04
++#define SPNFS_TYPE_GETDEVICEINFO	0x05
++#define SPNFS_TYPE_SETATTR		0x06
++#define SPNFS_TYPE_OPEN			0x07
++#define	SPNFS_TYPE_CLOSE		0x08
++#define SPNFS_TYPE_CREATE		0x09
++#define SPNFS_TYPE_REMOVE		0x0a
++#define SPNFS_TYPE_COMMIT		0x0b
++#define SPNFS_TYPE_READ			0x0c
++#define SPNFS_TYPE_WRITE		0x0d
++
++#define	SPNFS_MAX_DEVICES		1
++#define	SPNFS_MAX_DATA_SERVERS		16
++#define SPNFS_MAX_IO			512
++
++/* layout */
++struct spnfs_msg_layoutget_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_filelayout_list {
++	u_int32_t       fh_len;
++	unsigned char   fh_val[128]; /* DMXXX fix this const */
++};
++
++struct spnfs_msg_layoutget_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t stripe_size;
++	u_int32_t stripe_type;
++	u_int32_t stripe_count;
++	struct spnfs_filelayout_list flist[SPNFS_MAX_DATA_SERVERS];
++};
++
++/* layoutcommit */
++struct spnfs_msg_layoutcommit_args {
++	unsigned long inode;
++	unsigned long generation;
++	u_int64_t file_size;
++};
++
++struct spnfs_msg_layoutcommit_res {
++	int status;
++};
++
++/* layoutreturn */
++/* No op for the daemon */
++/*
++struct spnfs_msg_layoutreturn_args {
++};
++
++struct spnfs_msg_layoutreturn_res {
++};
++*/
++
++/* getdeviceiter */
++struct spnfs_msg_getdeviceiter_args {
++	unsigned long inode;
++	u_int64_t cookie;
++	u_int64_t verf;
++};
++
++struct spnfs_msg_getdeviceiter_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t cookie;
++	u_int64_t verf;
++	u_int32_t eof;
++};
++
++/* getdeviceinfo */
++struct spnfs_data_server {
++	u_int32_t dsid;
++	char netid[5];
++	char addr[29];
++};
++
++struct spnfs_device {
++	u_int64_t devid;
++	int dscount;
++	struct spnfs_data_server dslist[SPNFS_MAX_DATA_SERVERS];
++};
++
++struct spnfs_msg_getdeviceinfo_args {
++	u_int64_t devid;
++};
++
++struct spnfs_msg_getdeviceinfo_res {
++	int status;
++	struct spnfs_device devinfo;
++};
++
++/* setattr */
++struct spnfs_msg_setattr_args {
++	unsigned long inode;
++	unsigned long generation;
++	int file_size;
++};
++
++struct spnfs_msg_setattr_res {
++	int status;
++};
++
++/* open */
++struct spnfs_msg_open_args {
++	unsigned long inode;
++	unsigned long generation;
++	int create;
++	int createmode;
++	int truncate;
++};
++
++struct spnfs_msg_open_res {
++	int status;
++};
++
++/* close */
++/* No op for daemon */
++struct spnfs_msg_close_args {
++	int x;
++};
++
++struct spnfs_msg_close_res {
++	int y;
++};
++
++/* create */
++/*
++struct spnfs_msg_create_args {
++	int x;
++};
++
++struct spnfs_msg_create_res {
++	int y;
++};
++*/
++
++/* remove */
++struct spnfs_msg_remove_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_msg_remove_res {
++	int status;
++};
++
++/* commit */
++/*
++struct spnfs_msg_commit_args {
++	int x;
++};
++
++struct spnfs_msg_commit_res {
++	int y;
++};
++*/
++
++/* read */
++struct spnfs_msg_read_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++};
++
++struct spnfs_msg_read_res {
++	int status;
++	char data[SPNFS_MAX_IO];
++};
++
++/* write */
++struct spnfs_msg_write_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++	char data[SPNFS_MAX_IO];
++};
++
++struct spnfs_msg_write_res {
++	int status;
++};
++
++/* bundle args and responses */
++union spnfs_msg_args {
++	struct spnfs_msg_layoutget_args		layoutget_args;
++	struct spnfs_msg_layoutcommit_args	layoutcommit_args;
++/*
++	struct spnfs_msg_layoutreturn_args	layoutreturn_args;
++*/
++	struct spnfs_msg_getdeviceiter_args     getdeviceiter_args;
++	struct spnfs_msg_getdeviceinfo_args     getdeviceinfo_args;
++	struct spnfs_msg_setattr_args		setattr_args;
++	struct spnfs_msg_open_args		open_args;
++	struct spnfs_msg_close_args		close_args;
++/*
++	struct spnfs_msg_create_args		create_args;
++*/
++	struct spnfs_msg_remove_args		remove_args;
++/*
++	struct spnfs_msg_commit_args		commit_args;
++*/
++	struct spnfs_msg_read_args		read_args;
++	struct spnfs_msg_write_args		write_args;
++};
++
++union spnfs_msg_res {
++	struct spnfs_msg_layoutget_res		layoutget_res;
++	struct spnfs_msg_layoutcommit_res	layoutcommit_res;
++/*
++	struct spnfs_msg_layoutreturn_res	layoutreturn_res;
++*/
++	struct spnfs_msg_getdeviceiter_res      getdeviceiter_res;
++	struct spnfs_msg_getdeviceinfo_res      getdeviceinfo_res;
++	struct spnfs_msg_setattr_res		setattr_res;
++	struct spnfs_msg_open_res		open_res;
++	struct spnfs_msg_close_res		close_res;
++/*
++	struct spnfs_msg_create_res		create_res;
++*/
++	struct spnfs_msg_remove_res		remove_res;
++/*
++	struct spnfs_msg_commit_res		commit_res;
++*/
++	struct spnfs_msg_read_res		read_res;
++	struct spnfs_msg_write_res		write_res;
++};
++
++/* a spnfs message, args and response */
++struct spnfs_msg {
++	unsigned char		im_type;
++	unsigned char		im_status;
++	union spnfs_msg_args	im_args;
++	union spnfs_msg_res	im_res;
++};
++
++/* spnfs configuration info */
++struct spnfs_config {
++	unsigned char		dense_striping;
++	int			stripe_size;
++	int			num_ds;
++	char			ds_dir[SPNFS_MAX_DATA_SERVERS][80];  /* XXX */
++};
++
++#if defined(__KERNEL__) && defined(CONFIG_SPNFS)
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* pipe mgmt structure.  messages flow through here */
++struct spnfs {
++	struct dentry		*spnfs_dentry;    /* dentry for pipe */
++	wait_queue_head_t	spnfs_wq;
++	struct spnfs_msg	spnfs_im;         /* spnfs message */
++	struct mutex		spnfs_lock;       /* Serializes upcalls */
++	struct mutex		spnfs_plock;
++};
++
++struct nfsd4_open;
++
++int spnfs_layout_type(struct super_block *);
++enum nfsstat4 spnfs_layoutget(struct inode *, struct exp_xdr_stream *xdr,
++			      const struct nfsd4_pnfs_layoutget_arg *,
++			      struct nfsd4_pnfs_layoutget_res *);
++int spnfs_layoutcommit(void);
++int spnfs_layoutreturn(struct inode *,
++		       const struct nfsd4_pnfs_layoutreturn_arg *);
++int spnfs_getdeviceiter(struct super_block *,
++			u32 layout_type,
++			struct nfsd4_pnfs_dev_iter_res *);
++int spnfs_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++			u32 layout_type,
++			const struct nfsd4_pnfs_deviceid *);
++int spnfs_setattr(void);
++int spnfs_open(struct inode *, struct nfsd4_open *);
++int spnfs_get_state(struct inode *, struct knfsd_fh *, struct pnfs_get_state *);
++int spnfs_remove(unsigned long, unsigned long);
++__be32 spnfs_read(struct inode *, loff_t, unsigned long *,
++		  int, struct svc_rqst *);
++__be32 spnfs_write(struct inode *, loff_t, size_t, int, struct svc_rqst *);
++int spnfs_getfh(int, struct nfs_fh *);
++int spnfs_test_layoutrecall(char *, u64, u64);
++int spnfs_layoutrecall(struct inode *, int, u64, u64);
++
++int nfsd_spnfs_new(void);
++void nfsd_spnfs_delete(void);
++int spnfs_upcall(struct spnfs *, struct spnfs_msg *, union spnfs_msg_res *);
++int spnfs_enabled(void);
++int spnfs_init_proc(void);
++
++extern struct spnfs_config *spnfs_config;
++
++#endif /* __KERNEL__ && CONFIG_SPNFS */
++
++#endif /* NFS_SPNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
+--- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
+@@ -29,6 +29,7 @@
+ #ifdef __KERNEL__
+ 
+ #include <linux/sunrpc/msg_prot.h>
++#include <linux/sunrpc/svc.h>
+ 
+ /*
+  * Largest number of bytes we need to allocate for an NFS
+diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
+--- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
+@@ -32,6 +32,8 @@
+ #define NFSDDBG_REPCACHE	0x0080
+ #define NFSDDBG_XDR		0x0100
+ #define NFSDDBG_LOCKD		0x0200
++#define NFSDDBG_PNFS		0x0400
++#define NFSDDBG_FILELAYOUT	0x0800
+ #define NFSDDBG_ALL		0x7FFF
+ #define NFSDDBG_NOCHANGE	0xFFFF
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
+--- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
+@@ -100,6 +100,7 @@ struct svc_export {
+ 	uid_t			ex_anon_uid;
+ 	gid_t			ex_anon_gid;
+ 	int			ex_fsid;
++	int			ex_pnfs;
+ 	unsigned char *		ex_uuid; /* 16 byte fsid */
+ 	struct nfsd4_fs_locations ex_fslocs;
+ 	int			ex_nflavors;
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
+@@ -0,0 +1,132 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef NFSD_NFS4LAYOUTXDR_H
++#define NFSD_NFS4LAYOUTXDR_H
++
++#include <linux/sunrpc/xdr.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
++struct pnfs_filelayout_devaddr {
++	struct xdr_netobj	r_netid;
++	struct xdr_netobj	r_addr;
++};
++
++/* list of multipath servers */
++struct pnfs_filelayout_multipath {
++	u32				fl_multipath_length;
++	struct pnfs_filelayout_devaddr 	*fl_multipath_list;
++};
++
++struct pnfs_filelayout_device {
++	u32					fl_stripeindices_length;
++	u32       		 		*fl_stripeindices_list;
++	u32					fl_device_length;
++	struct pnfs_filelayout_multipath 	*fl_device_list;
++};
++
++struct pnfs_filelayout_layout {
++	u32                             lg_layout_type; /* response */
++	u32                             lg_stripe_type; /* response */
++	u32                             lg_commit_through_mds; /* response */
++	u64                             lg_stripe_unit; /* response */
++	u64                             lg_pattern_offset; /* response */
++	u32                             lg_first_stripe_index;	/* response */
++	struct nfsd4_pnfs_deviceid	device_id;		/* response */
++	u32                             lg_fh_length;		/* response */
++	struct knfsd_fh                 *lg_fh_list;		/* response */
++};
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++enum pnfs_block_extent_state4 {
++        PNFS_BLOCK_READWRITE_DATA       = 0,
++        PNFS_BLOCK_READ_DATA            = 1,
++        PNFS_BLOCK_INVALID_DATA         = 2,
++        PNFS_BLOCK_NONE_DATA            = 3
++};
++
++enum pnfs_block_volume_type4 {
++        PNFS_BLOCK_VOLUME_SIMPLE = 0,
++        PNFS_BLOCK_VOLUME_SLICE = 1,
++        PNFS_BLOCK_VOLUME_CONCAT = 2,
++        PNFS_BLOCK_VOLUME_STRIPE = 3,
++};
++typedef enum pnfs_block_volume_type4 pnfs_block_volume_type4;
++
++enum bl_cache_state {
++	BLOCK_LAYOUT_NEW	= 0,
++	BLOCK_LAYOUT_CACHE	= 1,
++	BLOCK_LAYOUT_UPDATE	= 2,
++};
++
++typedef struct pnfs_blocklayout_layout {
++        struct list_head                bll_list;
++        struct nfsd4_pnfs_deviceid      bll_vol_id;
++        u64                             bll_foff;	// file offset
++        u64                             bll_len;
++        u64                             bll_soff;	// storage offset
++	int				bll_recalled;
++        enum pnfs_block_extent_state4   bll_es;
++	enum bl_cache_state		bll_cache_state;
++} pnfs_blocklayout_layout_t;
++
++typedef struct pnfs_blocklayout_devinfo {
++        struct list_head                bld_list;
++        pnfs_block_volume_type4         bld_type;
++        struct nfsd4_pnfs_deviceid      bld_devid;
++        int                             bld_index_loc;
++        union {
++                struct {
++                        u64             bld_offset;
++                        u32             bld_sig_len,
++                                        *bld_sig;
++                } simple;
++                struct {
++                        u64             bld_start,
++                                        bld_len;
++                        u32             bld_index;      /* Index of Simple Volume */
++                } slice;
++                struct {
++                        u32             bld_stripes;
++                        u64             bld_chunk_size;
++                        u32             *bld_stripe_indexs;
++                } stripe;
++        } u;
++} pnfs_blocklayout_devinfo_t;
++
++#endif /* NFSD_NFS4LAYOUTXDR_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
+@@ -0,0 +1,54 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++#include <linux/genhd.h>
++
++/*
++ * Length of comma separated pnfs data server IPv4 addresses. Enough room for
++ * 32 addresses.
++ */
++#define NFSD_DLM_DS_LIST_MAX   512
++/*
++ * Length of colon separated pnfs dlm device of the form
++ * disk_name:comma separated data server IPv4 address
++ */
++#define NFSD_PNFS_DLM_DEVICE_MAX (NFSD_DLM_DS_LIST_MAX + DISK_NAME_LEN + 1)
++
++#ifdef CONFIG_PNFSD
++
++/* For use by DLM cluster file systems exported by pNFSD */
++extern const struct pnfs_export_operations pnfs_dlm_export_ops;
++
++int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);
++
++void nfsd4_pnfs_dlm_shutdown(void);
++
++ssize_t nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen);
++
++#else /* CONFIG_PNFSD */
++
++static inline void nfsd4_pnfs_dlm_shutdown(void)
++{
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
+@@ -0,0 +1,271 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef _LINUX_NFSD_NFSD4_PNFS_H
++#define _LINUX_NFSD_NFSD4_PNFS_H
++
++#include <linux/exportfs.h>
++#include <linux/exp_xdr.h>
++#include <linux/nfs_xdr.h>
++
++struct nfsd4_pnfs_deviceid {
++	u64	sbid;			/* per-superblock unique ID */
++	u64	devid;			/* filesystem-wide unique device ID */
++};
++
++struct nfsd4_pnfs_dev_iter_res {
++	u64		gd_cookie;	/* request/repsonse */
++	u64		gd_verf;	/* request/repsonse */
++	u64		gd_devid;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++/* Arguments for set_device_notify */
++struct pnfs_devnotify_arg {
++	struct nfsd4_pnfs_deviceid dn_devid;	/* request */
++	u32 dn_layout_type;			/* request */
++	u32 dn_notify_types;			/* request/response */
++};
++
++struct nfsd4_layout_seg {
++	u64	clientid;
++	u32	layout_type;
++	u32	iomode;
++	u64	offset;
++	u64	length;
++};
++
++/* Used by layout_get to encode layout (loc_body var in spec)
++ * Args:
++ * minlength - min number of accessible bytes given by layout
++ * fsid - Major part of struct pnfs_deviceid.  File system uses this
++ * to build the deviceid returned in the layout.
++ * fh - fs can modify the file handle for use on data servers
++ * seg - layout info requested and layout info returned
++ * xdr - xdr info
++ * return_on_close - true if layout to be returned on file close
++ */
++
++struct nfsd4_pnfs_layoutget_arg {
++	u64			lg_minlength;
++	u64			lg_sbid;
++	const struct knfsd_fh	*lg_fh;
++};
++
++struct nfsd4_pnfs_layoutget_res {
++	struct nfsd4_layout_seg	lg_seg;	/* request/resopnse */
++	u32			lg_return_on_close;
++};
++
++struct nfsd4_pnfs_layoutcommit_arg {
++	struct nfsd4_layout_seg	lc_seg;		/* request */
++	u32			lc_reclaim;	/* request */
++	u32			lc_newoffset;	/* request */
++	u64			lc_last_wr;	/* request */
++	struct nfstime4		lc_mtime;	/* request */
++	u32			lc_up_len;	/* layout length */
++	void			*lc_up_layout;	/* decoded by callback */
++};
++
++struct nfsd4_pnfs_layoutcommit_res {
++	u32			lc_size_chg;	/* boolean for response */
++	u64			lc_newsize;	/* response */
++};
++
++#define PNFS_LAST_LAYOUT_NO_RECALLS ((void *)-1) /* used with lr_cookie below */
++
++struct nfsd4_pnfs_layoutreturn_arg {
++	u32			lr_return_type;	/* request */
++	struct nfsd4_layout_seg	lr_seg;		/* request */
++	u32			lr_reclaim;	/* request */
++	u32			lrf_body_len;	/* request */
++	void			*lrf_body;	/* request */
++	void			*lr_cookie;	/* fs private */
++};
++
++/* pNFS Metadata to Data server state communication */
++struct pnfs_get_state {
++	u32			dsid;    /* request */
++	u64			ino;      /* request */
++	nfs4_stateid		stid;     /* request;response */
++	nfs4_clientid		clid;     /* response */
++	u32			access;    /* response */
++	u32			stid_gen;    /* response */
++	u32			verifier[2]; /* response */
++};
++
++/*
++ * pNFS export operations vector.
++ *
++ * The filesystem must implement the following methods:
++ *   layout_type
++ *   get_device_info
++ *   layout_get
++ *
++ * All other methods are optional and can be set to NULL if not implemented.
++ */
++struct pnfs_export_operations {
++	/* Returns the supported pnfs_layouttype4. */
++	int (*layout_type) (struct super_block *);
++
++	/* Encode device info onto the xdr stream. */
++	int (*get_device_info) (struct super_block *,
++				struct exp_xdr_stream *,
++				u32 layout_type,
++				const struct nfsd4_pnfs_deviceid *);
++
++	/* Retrieve all available devices via an iterator.
++	 * arg->cookie == 0 indicates the beginning of the list,
++	 * otherwise arg->verf is used to verify that the list hasn't changed
++	 * while retrieved.
++	 *
++	 * On output, the filesystem sets the devid based on the current cookie
++	 * and sets res->cookie and res->verf corresponding to the next entry.
++	 * When the last entry in the list is retrieved, res->eof is set to 1.
++	 */
++	int (*get_device_iter) (struct super_block *,
++				u32 layout_type,
++				struct nfsd4_pnfs_dev_iter_res *);
++
++	int (*set_device_notify) (struct super_block *,
++				  struct pnfs_devnotify_arg *);
++
++	/* Retrieve and encode a layout for inode onto the xdr stream.
++	 * arg->minlength is the minimum number of accessible bytes required
++	 *   by the client.
++	 * The maximum number of bytes to encode the layout is given by
++	 *   the xdr stream end pointer.
++	 * arg->fsid contains the major part of struct pnfs_deviceid.
++	 *   The file system uses this to build the deviceid returned
++	 *   in the layout.
++	 * res->seg - layout segment requested and layout info returned.
++	 * res->fh can be modified the file handle for use on data servers
++	 * res->return_on_close - true if layout to be returned on file close
++	 *
++	 * return one of the following nfs errors:
++	 * NFS_OK			Success
++	 * NFS4ERR_ACCESS		Permission error
++	 * NFS4ERR_BADIOMODE		Server does not support requested iomode
++	 * NFS4ERR_BADLAYOUT		No layout matching loga_minlength rules
++	 * NFS4ERR_INVAL		Parameter other than layout is invalid
++	 * NFS4ERR_IO			I/O error
++	 * NFS4ERR_LAYOUTTRYLATER	Layout may be retrieved later
++	 * NFS4ERR_LAYOUTUNAVAILABLE	Layout unavailable for this file
++	 * NFS4ERR_LOCKED		Lock conflict
++	 * NFS4ERR_NOSPC		Out-of-space error occured
++	 * NFS4ERR_RECALLCONFLICT	Layout currently unavialable due to
++	 *				a conflicting CB_LAYOUTRECALL
++	 * NFS4ERR_SERVERFAULT		Server went bezerk
++	 * NFS4ERR_TOOSMALL		loga_maxcount too small to fit layout
++	 * NFS4ERR_WRONG_TYPE		Wrong file type (not a regular file)
++	 */
++	enum nfsstat4 (*layout_get) (struct inode *,
++				     struct exp_xdr_stream *xdr,
++				     const struct nfsd4_pnfs_layoutget_arg *,
++				     struct nfsd4_pnfs_layoutget_res *);
++
++	/* Commit changes to layout */
++	int (*layout_commit) (struct inode *,
++			      const struct nfsd4_pnfs_layoutcommit_arg *,
++			      struct nfsd4_pnfs_layoutcommit_res *);
++
++	/* Returns the layout */
++	int (*layout_return) (struct inode *,
++			      const struct nfsd4_pnfs_layoutreturn_arg *);
++
++	/* Can layout segments be merged for this layout type? */
++	int (*can_merge_layouts) (u32 layout_type);
++
++	/* pNFS Files layout specific operations */
++
++	/* Get the write verifier for DS (called on MDS only) */
++	void (*get_verifier) (struct super_block *, u32 *p);
++	/* Call fs on DS only */
++	int (*get_state) (struct inode *, struct knfsd_fh *,
++			  struct pnfs_get_state *);
++};
++
++struct nfsd4_pnfs_cb_layout {
++	u32			cbl_recall_type;	/* request */
++	struct nfsd4_layout_seg cbl_seg;		/* request */
++	u32			cbl_layoutchanged;	/* request */
++	nfs4_stateid		cbl_sid;		/* request */
++	struct nfs4_fsid	cbl_fsid;
++	void			*cbl_cookie;		/* fs private */
++};
++
++/* layoutrecall request (from exported filesystem) */
++struct nfs4_layoutrecall {
++	struct kref			clr_ref;
++	struct nfsd4_pnfs_cb_layout	cb;	/* request */
++	struct list_head		clr_perclnt; /* on cl_layoutrecalls */
++	struct nfs4_client	       *clr_client;
++	struct nfs4_file	       *clr_file;
++	struct timespec			clr_time;	/* last activity */
++	struct super_block 		*clr_sb; /* We might not have a file */
++	struct nfs4_layoutrecall	*parent; /* The initiating recall */
++
++	void				*clr_args;	/* nfsd internal */
++};
++
++struct nfsd4_pnfs_cb_dev_item {
++	u32			cbd_notify_type;	/* request */
++	u32			cbd_layout_type;	/* request */
++	struct nfsd4_pnfs_deviceid cbd_devid;		/* request */
++	u32			cbd_immediate;		/* request */
++};
++
++struct nfsd4_pnfs_cb_dev_list {
++	u32				cbd_len;  /* request */
++	struct nfsd4_pnfs_cb_dev_item  *cbd_list; /* request */
++};
++
++/*
++ * callbacks provided by the nfsd
++ */
++struct pnfsd_cb_operations {
++	/* Generic callbacks */
++	int (*cb_layout_recall) (struct super_block *, struct inode *,
++				 struct nfsd4_pnfs_cb_layout *);
++	int (*cb_device_notify) (struct super_block *,
++				 struct nfsd4_pnfs_cb_dev_list *);
++
++	/* pNFS Files layout specific callbacks */
++
++	/* Callback from fs on MDS only */
++	int (*cb_get_state) (struct super_block *, struct pnfs_get_state *);
++	/* Callback from fs on DS only */
++	int (*cb_change_state) (struct pnfs_get_state *);
++};
++
++#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
+--- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
+@@ -29,6 +29,7 @@
+ /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
+ #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
+ #define	NFSCTL_GETFS		8	/* get an fh by path with max FH len */
++#define	NFSCTL_FD2FH		9	/* get a fh from a fd */
+ 
+ /* SVC */
+ struct nfsctl_svc {
+@@ -71,6 +72,11 @@ struct nfsctl_fsparm {
+ 	int			gd_maxlen;
+ };
+ 
++/* FD2FH */
++struct nfsctl_fd2fh {
++	int			fd;
++};
++
+ /*
+  * This is the argument union.
+  */
+@@ -82,6 +88,7 @@ struct nfsctl_arg {
+ 		struct nfsctl_export	u_export;
+ 		struct nfsctl_fdparm	u_getfd;
+ 		struct nfsctl_fsparm	u_getfs;
++		struct nfsctl_fd2fh	u_fd2fh;
+ 		/*
+ 		 * The following dummy member is needed to preserve binary compatibility
+ 		 * on platforms where alignof(void*)>alignof(int).  It's needed because
+@@ -95,6 +102,7 @@ struct nfsctl_arg {
+ #define ca_export	u.u_export
+ #define ca_getfd	u.u_getfd
+ #define	ca_getfs	u.u_getfs
++#define	ca_fd2fh	u.u_fd2fh
+ };
+ 
+ union nfsctl_res {
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
+@@ -72,13 +72,20 @@ struct nfs_access_entry {
+ 	int			mask;
+ };
+ 
++struct nfs_lock_context {
++	atomic_t count;
++	struct list_head list;
++	struct nfs_open_context *open_context;
++	fl_owner_t lockowner;
++	pid_t pid;
++};
++
+ struct nfs4_state;
+ struct nfs_open_context {
+-	atomic_t count;
++	struct nfs_lock_context lock_context;
+ 	struct path path;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+-	fl_owner_t lockowner;
+ 	fmode_t mode;
+ 
+ 	unsigned long flags;
+@@ -97,6 +104,26 @@ struct nfs_delegation;
+ 
+ struct posix_acl;
+ 
++struct pnfs_layout_type {
++	int refcount;
++	struct list_head lo_layouts;	/* other client layouts */
++	struct list_head segs;		/* layout segments list */
++	int roc_iomode;			/* iomode to return on close, 0=none */
++	seqlock_t seqlock;		/* Protects the stateid */
++	nfs4_stateid stateid;
++	unsigned long pnfs_layout_state;
++	#define NFS_INO_RO_LAYOUT_FAILED 0      /* get ro layout failed stop trying */
++	#define NFS_INO_RW_LAYOUT_FAILED 1      /* get rw layout failed stop trying */
++	#define NFS_INO_LAYOUTCOMMIT     3      /* LAYOUTCOMMIT needed */
++	struct rpc_cred         *lo_cred; /* layoutcommit credential */
++	/* DH: These vars keep track of the maximum write range
++	 * so the values can be used for layoutcommit.
++	 */
++	loff_t                  pnfs_write_begin_pos;
++	loff_t                  pnfs_write_end_pos;
++	struct inode		*lo_inode;
++};
++
+ /*
+  * nfs fs inode data in memory
+  */
+@@ -181,6 +208,13 @@ struct nfs_inode {
+ 	struct nfs_delegation	*delegation;
+ 	fmode_t			 delegation_state;
+ 	struct rw_semaphore	rwsem;
++
++	/* pNFS layout information */
++#if defined(CONFIG_NFS_V4_1)
++	wait_queue_head_t lo_waitq;
++	struct pnfs_layout_type *layout;
++	time_t pnfs_layout_suspend;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif /* CONFIG_NFS_V4*/
+ #ifdef CONFIG_NFS_FSCACHE
+ 	struct fscache_cookie	*fscache;
+@@ -353,6 +387,8 @@ extern void nfs_setattr_update_inode(str
+ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
+ extern void put_nfs_open_context(struct nfs_open_context *ctx);
+ extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
++extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
++extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
+@@ -481,8 +517,12 @@ extern void nfs_unblock_sillyrename(stru
+ extern int  nfs_congestion_kb;
+ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
+ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
+-extern int  nfs_flush_incompatible(struct file *file, struct page *page);
+-extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
++struct pnfs_layout_segment;
++extern int  nfs_flush_incompatible(struct file *file, struct page *page,
++				   struct pnfs_layout_segment *lseg);
++extern int  nfs_updatepage(struct file *, struct page *,
++			   unsigned int offset, unsigned int count,
++			   struct pnfs_layout_segment *lseg, void *fsdata);
+ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+ 
+ /*
+@@ -604,6 +644,8 @@ extern void * nfs_root_data(void);
+ #define NFSDBG_CLIENT		0x0200
+ #define NFSDBG_MOUNT		0x0400
+ #define NFSDBG_FSCACHE		0x0800
++#define NFSDBG_PNFS		0x1000
++#define NFSDBG_PNFS_LD		0x2000
+ #define NFSDBG_ALL		0xFFFF
+ 
+ #ifdef __KERNEL__
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
+@@ -15,6 +15,7 @@ struct nlm_host;
+ struct nfs4_sequence_args;
+ struct nfs4_sequence_res;
+ struct nfs_server;
++struct nfs4_minor_version_ops;
+ 
+ /*
+  * The nfs_client identifies our client state to the server.
+@@ -70,11 +71,7 @@ struct nfs_client {
+ 	 */
+ 	char			cl_ipaddr[48];
+ 	unsigned char		cl_id_uniquifier;
+-	int		     (* cl_call_sync)(struct nfs_server *server,
+-					      struct rpc_message *msg,
+-					      struct nfs4_sequence_args *args,
+-					      struct nfs4_sequence_res *res,
+-					      int cache_reply);
++	const struct nfs4_minor_version_ops *cl_mvops;
+ #endif /* CONFIG_NFS_V4 */
+ 
+ #ifdef CONFIG_NFS_V4_1
+@@ -85,6 +82,8 @@ struct nfs_client {
+ 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_session	*cl_session; 	/* sharred session */
++	struct list_head	cl_layouts;
++	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ #ifdef CONFIG_NFS_FSCACHE
+@@ -92,6 +91,16 @@ struct nfs_client {
+ #endif
+ };
+ 
++static inline bool
++is_ds_only_client(struct nfs_client *clp)
++{
++#ifdef CONFIG_NFS_V4_1
++	return is_ds_only_session(clp->cl_exchange_flags);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * NFS client parameters stored in the superblock.
+  */
+@@ -136,7 +145,7 @@ struct nfs_server {
+ #endif
+ 
+ #ifdef CONFIG_NFS_V4
+-	u32			attr_bitmask[2];/* V4 bitmask representing the set
++	u32			attr_bitmask[3];/* V4 bitmask representing the set
+ 						   of attributes supported on this
+ 						   filesystem */
+ 	u32			cache_consistency_bitmask[2];
+@@ -148,6 +157,15 @@ struct nfs_server {
+ 						   that are supported on this
+ 						   filesystem */
+ #endif
++
++#ifdef CONFIG_NFS_V4_1
++	u32				pnfs_blksize; /* layout_blksize attr */
++	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
++	void			       *pnfs_ld_data; /* Per-mount data */
++	unsigned int			ds_rsize;  /* Data server read size */
++	unsigned int			ds_wsize;  /* Data server write size */
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	void (*destroy)(struct nfs_server *);
+ 
+ 	atomic_t active; /* Keep trace of any activity to this server */
+diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
+--- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
+@@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
+ 	NFSIOS_SHORTREAD,
+ 	NFSIOS_SHORTWRITE,
+ 	NFSIOS_DELAY,
++	NFSIOS_PNFS_READ,
++	NFSIOS_PNFS_WRITE,
++	NFSIOS_PNFS_COMMIT,
+ 	__NFSIOS_COUNTSMAX,
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
+--- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
+@@ -39,6 +39,7 @@ struct nfs_page {
+ 	struct list_head	wb_list;	/* Defines state of page: */
+ 	struct page		*wb_page;	/* page to read in/write out */
+ 	struct nfs_open_context	*wb_context;	/* File state context info */
++	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
+ 	atomic_t		wb_complete;	/* i/os we're waiting for */
+ 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
+ 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+@@ -47,6 +48,7 @@ struct nfs_page {
+ 	struct kref		wb_kref;	/* reference count */
+ 	unsigned long		wb_flags;
+ 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
++	struct pnfs_layout_segment *wb_lseg;	/* Pnfs layout info */
+ };
+ 
+ struct nfs_pageio_descriptor {
+@@ -60,6 +62,12 @@ struct nfs_pageio_descriptor {
+ 	int			(*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
+ 	int 			pg_ioflags;
+ 	int			pg_error;
++	struct pnfs_layout_segment *pg_lseg;
++#ifdef CONFIG_NFS_V4_1
++	int			pg_iswrite;
++	int			pg_boundary;
++	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
+@@ -68,13 +76,15 @@ extern	struct nfs_page *nfs_create_reque
+ 					    struct inode *inode,
+ 					    struct page *page,
+ 					    unsigned int offset,
+-					    unsigned int count);
++					    unsigned int count,
++					    struct pnfs_layout_segment *lseg);
+ extern	void nfs_clear_request(struct nfs_page *req);
+ extern	void nfs_release_request(struct nfs_page *req);
+ 
+ 
+ extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+-			  pgoff_t idx_start, unsigned int npages, int tag);
++			  pgoff_t idx_start, unsigned int npages, int tag,
++			  int *use_pnfs);
+ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ 			     struct inode *inode,
+ 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
+@@ -3,6 +3,8 @@
+ 
+ #include <linux/nfsacl.h>
+ #include <linux/nfs3.h>
++#include <linux/nfs4.h>
++#include <linux/sunrpc/sched.h>
+ 
+ /*
+  * To change the maximum rsize and wsize supported by the NFS client, adjust
+@@ -10,7 +12,7 @@
+  * support a megabyte or more.  The default is left at 4096 bytes, which is
+  * reasonable for NFS over UDP.
+  */
+-#define NFS_MAX_FILE_IO_SIZE	(1048576U)
++#define NFS_MAX_FILE_IO_SIZE	(4U * 1048576U)
+ #define NFS_DEF_FILE_IO_SIZE	(4096U)
+ #define NFS_MIN_FILE_IO_SIZE	(1024U)
+ 
+@@ -113,6 +115,10 @@ struct nfs_fsinfo {
+ 	__u32			dtpref;	/* pref. readdir transfer size */
+ 	__u64			maxfilesize;
+ 	__u32			lease_time; /* in seconds */
++#if defined(CONFIG_NFS_V4_1)
++	__u32			layouttype; /* supported pnfs layout driver */
++	__u32			blksize; /* preferred pnfs io block size */
++#endif
+ };
+ 
+ struct nfs_fsstat {
+@@ -196,8 +202,10 @@ struct nfs_openargs {
+ 	__u64                   clientid;
+ 	__u64                   id;
+ 	union {
+-		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+-		nfs4_verifier   verifier; /* EXCLUSIVE */
++		struct {
++			struct iattr *  attrs;    /* UNCHECKED, GUARDED */
++			nfs4_verifier   verifier; /* EXCLUSIVE */
++		};
+ 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
+ 		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
+ 	} u;
+@@ -313,6 +321,10 @@ struct nfs_lockt_res {
+ 	struct nfs4_sequence_res	seq_res;
+ };
+ 
++struct nfs_release_lockowner_args {
++	struct nfs_lowner	lock_owner;
++};
++
+ struct nfs4_delegreturnargs {
+ 	const struct nfs_fh *fhandle;
+ 	const nfs4_stateid *stateid;
+@@ -332,6 +344,7 @@ struct nfs4_delegreturnres {
+ struct nfs_readargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	unsigned int		pgbase;
+@@ -352,6 +365,7 @@ struct nfs_readres {
+ struct nfs_writeargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	enum nfs3_stable_how	stable;
+@@ -846,7 +860,7 @@ struct nfs4_server_caps_arg {
+ };
+ 
+ struct nfs4_server_caps_res {
+-	u32				attr_bitmask[2];
++	u32				attr_bitmask[3];
+ 	u32				acl_bitmask;
+ 	u32				has_links;
+ 	u32				has_symlinks;
+@@ -961,6 +975,27 @@ struct nfs_page;
+ 
+ #define NFS_PAGEVEC_SIZE	(8U)
+ 
++#if defined(CONFIG_NFS_V4_1)
++/* pnfsflag values */
++#define PNFS_NO_RPC		0x0001   /* non rpc result callback switch */
++
++/* pnfs-specific data needed for read, write, and commit calls */
++struct pnfs_call_data {
++	struct pnfs_layout_segment *lseg;
++	const struct rpc_call_ops *call_ops;
++	u32			orig_count;	/* for retry via MDS */
++	int			pnfs_error;
++	u8			pnfsflags;
++	u8			how;		/* for FLUSH_STABLE */
++};
++
++/* files layout-type specific data for read, write, and commit */
++struct pnfs_fl_call_data {
++	struct nfs_client	*ds_nfs_client;
++	__u64			orig_offset;
++};
++#endif /* CONFIG_NFS_V4_1 */
++
+ struct nfs_read_data {
+ 	int			flags;
+ 	struct rpc_task		task;
+@@ -976,10 +1011,16 @@ struct nfs_read_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+ struct nfs_write_data {
++	struct kref		refcount;	/* For pnfs commit splitting */
++	struct nfs_write_data	*parent;	/* For pnfs commit splitting */
+ 	int			flags;
+ 	struct rpc_task		task;
+ 	struct inode		*inode;
+@@ -995,6 +1036,10 @@ struct nfs_write_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+@@ -1008,6 +1053,7 @@ struct nfs_rpc_ops {
+ 	const struct dentry_operations *dentry_ops;
+ 	const struct inode_operations *dir_inode_ops;
+ 	const struct inode_operations *file_inode_ops;
++	const struct file_operations *file_ops;
+ 
+ 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
+ 			    struct nfs_fsinfo *);
+@@ -1072,6 +1118,7 @@ struct nfs_rpc_ops {
+ extern const struct nfs_rpc_ops	nfs_v2_clientops;
+ extern const struct nfs_rpc_ops	nfs_v3_clientops;
+ extern const struct nfs_rpc_ops	nfs_v4_clientops;
++extern const struct nfs_rpc_ops	pnfs_v4_clientops;
+ extern struct rpc_version	nfs_version2;
+ extern struct rpc_version	nfs_version3;
+ extern struct rpc_version	nfs_version4;
+diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
+@@ -0,0 +1,57 @@
++#ifndef _PANFS_SHIM_API_H
++#define _PANFS_SHIM_API_H
++
++/*
++ * imported panfs functions
++ */
++struct panfs_export_operations {
++	int (*convert_rc)(pan_status_t rc);
++
++	int (*sm_sec_t_get_size_otw)(
++		pan_sm_sec_otw_t *var,
++		pan_size_t *core_sizep,
++		pan_size_t *wire_size,
++		void *buf_end);
++
++	int (*sm_sec_t_unmarshall)(
++		pan_sm_sec_otw_t *in,
++		pan_sm_sec_t *out,
++		void *buf,
++		pan_size_t size,
++		pan_size_t *otw_consumed,
++		pan_size_t *in_core_consumed);
++
++	int (*ucreds_get)(void **ucreds_pp);
++
++	void (*ucreds_put)(void *ucreds);
++
++	int (*sam_read)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_read_args_t    *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_read_cb_t       closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_read_res_t     *res_p);
++
++	int (*sam_write)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_write_args_t   *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_write_cb_t      closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_write_res_t    *res_p);
++};
++
++extern int
++panfs_shim_register(struct panfs_export_operations *ops);
++
++extern int
++panfs_shim_unregister(void);
++
++#endif /* _PANFS_SHIM_API_H */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
+@@ -0,0 +1,440 @@
++/*
++ *  pnfs_osd_xdr.h
++ *
++ *  pNFS-osd on-the-wire data structures
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#ifndef __PNFS_OSD_XDR_H__
++#define __PNFS_OSD_XDR_H__
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/exp_xdr.h>
++#include <linux/pnfs_xdr.h>
++#include <scsi/osd_protocol.h>
++
++#define PNFS_OSD_OSDNAME_MAXSIZE 256
++
++/*
++ * START OF "GENERIC" DECODE ROUTINES.
++ *   These may look a little ugly since they are imported from a "generic"
++ * set of XDR encode/decode routines which are intended to be shared by
++ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
++ *
++ * If the pain of reading these is too great, it should be a straightforward
++ * task to translate them into Linux-specific versions which are more
++ * consistent with the style used in NFSv2/v3...
++ */
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {			\
++	(x) = (u64)ntohl(*p++) << 32;		\
++	(x) |= ntohl(*p++);			\
++} while (0)
++#define COPYMEM(x, nbytes) do {			\
++	memcpy((x), p, nbytes);			\
++	p += XDR_QUADLEN(nbytes);		\
++} while (0)
++
++/*
++ * draft-ietf-nfsv4-minorversion-22
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/* Layout Structure */
++
++enum pnfs_osd_raid_algorithm4 {
++	PNFS_OSD_RAID_0		= 1,
++	PNFS_OSD_RAID_4		= 2,
++	PNFS_OSD_RAID_5		= 3,
++	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
++};
++
++/*   struct pnfs_osd_data_map4 {
++ *       uint32_t                    odm_num_comps;
++ *       length4                     odm_stripe_unit;
++ *       uint32_t                    odm_group_width;
++ *       uint32_t                    odm_group_depth;
++ *       uint32_t                    odm_mirror_cnt;
++ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
++ *   };
++ */
++struct pnfs_osd_data_map {
++	u32	odm_num_comps;
++	u64	odm_stripe_unit;
++	u32	odm_group_width;
++	u32	odm_group_depth;
++	u32	odm_mirror_cnt;
++	u32	odm_raid_algorithm;
++};
++
++static inline int
++pnfs_osd_data_map_xdr_sz(void)
++{
++	return 1 + 2 + 1 + 1 + 1 + 1;
++}
++
++static inline size_t
++pnfs_osd_data_map_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_data_map);
++}
++
++/*   struct pnfs_osd_objid4 {
++ *       deviceid4       oid_device_id;
++ *       uint64_t        oid_partition_id;
++ *       uint64_t        oid_object_id;
++ *   };
++ */
++struct pnfs_osd_objid {
++	struct pnfs_deviceid	oid_device_id;
++	u64			oid_partition_id;
++	u64			oid_object_id;
++};
++
++/* For printout. I use "dev(%llx:%llx)", _DEVID_LO(), _DEVID_HI BE style */
++#define _DEVID_LO(oid_device_id) \
++	(unsigned long long)be64_to_cpup((__be64 *)oid_device_id.data)
++
++#define _DEVID_HI(oid_device_id) \
++	(unsigned long long)be64_to_cpup(((__be64 *)oid_device_id.data) + 1)
++
++static inline int
++pnfs_osd_objid_xdr_sz(void)
++{
++	return (NFS4_PNFS_DEVICEID4_SIZE / 4) + 2 + 2;
++}
++
++static inline size_t
++pnfs_osd_objid_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_objid);
++}
++
++enum pnfs_osd_version {
++	PNFS_OSD_MISSING              = 0,
++	PNFS_OSD_VERSION_1            = 1,
++	PNFS_OSD_VERSION_2            = 2
++};
++
++struct pnfs_osd_opaque_cred {
++	u32 cred_len;
++	u8 *cred;
++};
++
++static inline int
++pnfs_osd_opaque_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	READ32(n);
++	p += XDR_QUADLEN(n);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_opaque_cred_incore_sz(u32 *p)
++{
++	u32 n;
++
++	READ32(n);
++	return XDR_QUADLEN(n) * 4;
++}
++
++enum pnfs_osd_cap_key_sec {
++	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
++	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
++};
++
++/*   struct pnfs_osd_object_cred4 {
++ *       pnfs_osd_objid4         oc_object_id;
++ *       pnfs_osd_version4       oc_osd_version;
++ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
++ *       opaque                  oc_capability_key<>;
++ *       opaque                  oc_capability<>;
++ *   };
++ */
++struct pnfs_osd_object_cred {
++	struct pnfs_osd_objid		oc_object_id;
++	u32				oc_osd_version;
++	u32				oc_cap_key_sec;
++	struct pnfs_osd_opaque_cred	oc_cap_key;
++	struct pnfs_osd_opaque_cred	oc_cap;
++};
++
++static inline int
++pnfs_osd_object_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_object_cred_incore_sz(u32 *p)
++{
++	size_t sz = sizeof(struct pnfs_osd_object_cred);
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	return sz;
++}
++
++/*   struct pnfs_osd_layout4 {
++ *       pnfs_osd_data_map4      olo_map;
++ *       uint32_t                olo_comps_index;
++ *       pnfs_osd_object_cred4   olo_components<>;
++ *   };
++ */
++struct pnfs_osd_layout {
++	struct pnfs_osd_data_map	olo_map;
++	u32				olo_comps_index;
++	u32				olo_num_comps;
++	struct pnfs_osd_object_cred	*olo_comps;
++};
++
++static inline int
++pnfs_osd_layout_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	while ((int)(n--) > 0)
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_layout_incore_sz(u32 *p)
++{
++	u32 n;
++	size_t sz;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	sz = sizeof(struct pnfs_osd_layout);
++	while ((int)(n--) > 0) {
++		sz += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++	return sz;
++}
++
++/* Device Address */
++
++enum pnfs_osd_targetid_type {
++	OBJ_TARGET_ANON = 1,
++	OBJ_TARGET_SCSI_NAME = 2,
++	OBJ_TARGET_SCSI_DEVICE_ID = 3,
++};
++
++/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
++ *       case OBJ_TARGET_SCSI_NAME:
++ *           string              oti_scsi_name<>;
++ *
++ *       case OBJ_TARGET_SCSI_DEVICE_ID:
++ *           opaque              oti_scsi_device_id<>;
++ *
++ *       default:
++ *           void;
++ *   };
++ *
++ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
++ *       case TRUE:
++ *           netaddr4            ota_netaddr;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_deviceaddr4 {
++ *       pnfs_osd_targetid4      oda_targetid;
++ *       pnfs_osd_targetaddr4    oda_targetaddr;
++ *       uint64_t                oda_lun;
++ *       opaque                  oda_systemid<>;
++ *       pnfs_osd_object_cred4   oda_root_obj_cred;
++ *       opaque                  oda_osdname<>;
++ *   };
++ */
++struct pnfs_osd_targetid {
++	u32				oti_type;
++	struct nfs4_string		oti_scsi_device_id;
++};
++
++enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
++
++/*   struct netaddr4 {
++ *       // see struct rpcb in RFC1833
++ *       string r_netid<>;    // network id
++ *       string r_addr<>;     // universal address
++ *   };
++ */
++struct pnfs_osd_net_addr {
++	struct nfs4_string	r_netid;
++	struct nfs4_string	r_addr;
++};
++
++struct pnfs_osd_targetaddr {
++	u32				ota_available;
++	struct pnfs_osd_net_addr	ota_netaddr;
++};
++
++enum {
++	NETWORK_ID_MAX = 16 / 4,
++	UNIVERSAL_ADDRESS_MAX = 64 / 4,
++	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
++};
++
++struct pnfs_osd_deviceaddr {
++	struct pnfs_osd_targetid	oda_targetid;
++	struct pnfs_osd_targetaddr	oda_targetaddr;
++	u8				oda_lun[8];
++	struct nfs4_string		oda_systemid;
++	struct pnfs_osd_object_cred	oda_root_obj_cred;
++	struct nfs4_string		oda_osdname;
++};
++
++enum {
++	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
++	PNFS_OSD_DEVICEADDR_MAX =
++		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
++		2 /*oda_lun*/ +
++		1 + OSD_SYSTEMID_LEN +
++		1 + ODA_OSDNAME_MAX,
++};
++
++/* LAYOUTCOMMIT: layoutupdate */
++
++/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
++ *       case TRUE:
++ *           int64_t     dsu_delta;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_layoutupdate4 {
++ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
++ *       bool                        olu_ioerr_flag;
++ *   };
++ */
++struct pnfs_osd_layoutupdate {
++	u32	dsu_valid;
++	s64	dsu_delta;
++	u32	olu_ioerr_flag;
++};
++
++/* LAYOUTRETURN: I/O Rrror Report */
++
++enum pnfs_osd_errno {
++	PNFS_OSD_ERR_EIO		= 1,
++	PNFS_OSD_ERR_NOT_FOUND		= 2,
++	PNFS_OSD_ERR_NO_SPACE		= 3,
++	PNFS_OSD_ERR_BAD_CRED		= 4,
++	PNFS_OSD_ERR_NO_ACCESS		= 5,
++	PNFS_OSD_ERR_UNREACHABLE	= 6,
++	PNFS_OSD_ERR_RESOURCE		= 7
++};
++
++/*   struct pnfs_osd_ioerr4 {
++ *       pnfs_osd_objid4     oer_component;
++ *       length4             oer_comp_offset;
++ *       length4             oer_comp_length;
++ *       bool                oer_iswrite;
++ *       pnfs_osd_errno4     oer_errno;
++ *   };
++ */
++struct pnfs_osd_ioerr {
++	struct pnfs_osd_objid	oer_component;
++	u64			oer_comp_offset;
++	u64			oer_comp_length;
++	u32			oer_iswrite;
++	u32			oer_errno;
++};
++
++static inline unsigned
++pnfs_osd_ioerr_xdr_sz(void)
++{
++	return pnfs_osd_objid_xdr_sz() + 2 + 2 + 1 + 1;
++}
++
++/* OSD XDR API */
++
++/* Layout helpers */
++extern struct pnfs_osd_layout *pnfs_osd_xdr_decode_layout(
++	struct pnfs_osd_layout *layout, u32 *p);
++
++extern int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *layout);
++
++/* Device Info helpers */
++
++/* First pass calculate total size for space needed */
++extern size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p);
++
++/* Note: some strings pointed to inside @deviceaddr might point
++ * to space inside @p. @p should stay valid while @deviceaddr
++ * is in use.
++ * It is assumed that @deviceaddr points to bigger memory of size
++ * calculated in first pass by pnfs_osd_xdr_deviceaddr_incore_sz()
++ */
++extern void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p);
++
++/* For Servers */
++extern int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr);
++
++/* layoutupdate (layout_commit) xdr helpers */
++extern int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou);
++extern __be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p);
++
++/* osd_ioerror encoding/decoding (layout_return) */
++extern int
++pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr, struct pnfs_osd_ioerr *ioerr);
++extern __be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p);
++
++#endif /* __PNFS_OSD_XDR_H__ */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
+@@ -0,0 +1,134 @@
++/*
++ *  include/linux/pnfs_xdr.h
++ *
++ *  Common xdr data structures needed by pnfs client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ * Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_PNFS_XDR_H
++#define LINUX_PNFS_XDR_H
++
++#define PNFS_LAYOUT_MAXSIZE 4096
++#define NFS4_PNFS_DEVICEID4_SIZE 16
++
++struct pnfs_deviceid {
++	char data[NFS4_PNFS_DEVICEID4_SIZE];
++};
++
++struct nfs4_pnfs_layout {
++	__u32 len;
++	void *buf;
++};
++
++struct nfs4_pnfs_layout_segment {
++	u32 iomode;
++	u64 offset;
++	u64 length;
++};
++
++struct nfs4_pnfs_layoutget_arg {
++	__u32 type;
++	struct nfs4_pnfs_layout_segment lseg;
++	__u64 minlength;
++	__u32 maxcount;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_layoutget_res {
++	__u32 return_on_close;
++	struct nfs4_pnfs_layout_segment lseg;
++	__u32 type;
++	nfs4_stateid stateid;
++	struct nfs4_pnfs_layout layout;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_pnfs_layoutget {
++	struct nfs4_pnfs_layoutget_arg args;
++	struct nfs4_pnfs_layoutget_res res;
++	struct pnfs_layout_segment **lsegpp;
++	int status;
++};
++
++struct pnfs_layoutcommit_arg {
++	nfs4_stateid stateid;
++	__u64 lastbytewritten;
++	__u32 time_modify_changed;
++	struct timespec time_modify;
++	const u32 *bitmask;
++	struct nfs_fh *fh;
++	struct inode *inode;
++
++	/* Values set by layout driver */
++	struct nfs4_pnfs_layout_segment lseg;
++	__u32 layout_type;
++	void *layoutdriver_data;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct pnfs_layoutcommit_res {
++	__u32 sizechanged;
++	__u64 newsize;
++	struct nfs_fattr *fattr;
++	const struct nfs_server *server;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct pnfs_layoutcommit_data {
++	struct rpc_task task;
++	struct rpc_cred *cred;
++	struct nfs_fattr fattr;
++	struct pnfs_layoutcommit_arg args;
++	struct pnfs_layoutcommit_res res;
++	int status;
++};
++
++struct nfs4_pnfs_layoutreturn_arg {
++	__u32	reclaim;
++	__u32	layout_type;
++	__u32	return_type;
++	struct nfs4_pnfs_layout_segment lseg;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_layoutreturn_res {
++	struct nfs4_sequence_res seq_res;
++	u32 lrs_present;
++	nfs4_stateid stateid;
++};
++
++struct nfs4_pnfs_layoutreturn {
++	struct nfs4_pnfs_layoutreturn_arg args;
++	struct nfs4_pnfs_layoutreturn_res res;
++	struct rpc_cred *cred;
++	int rpc_status;
++};
++
++struct nfs4_pnfs_getdevicelist_arg {
++	const struct nfs_fh *fh;
++	u32 layoutclass;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_getdevicelist_res {
++	struct pnfs_devicelist *devlist;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_pnfs_getdeviceinfo_arg {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_getdeviceinfo_res {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_res seq_res;
++};
++
++#endif /* LINUX_PNFS_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
+--- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
+@@ -8,6 +8,7 @@
+ #ifndef __LINUX_POSIX_ACL_H
+ #define __LINUX_POSIX_ACL_H
+ 
++#include <linux/fs.h>
+ #include <linux/slab.h>
+ 
+ #define ACL_UNDEFINED_ID	(-1)
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
+@@ -14,6 +14,8 @@
+ /* size of an XDR encoding unit in bytes, i.e. 32bit */
+ #define XDR_UNIT	(4)
+ 
++#include <linux/types.h>
++
+ /* spec defines authentication flavor as an unsigned 32 bit integer */
+ typedef u32	rpc_authflavor_t;
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
+@@ -3,6 +3,7 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/fs.h>
+ #include <linux/workqueue.h>
+ 
+ struct rpc_pipe_msg {
+@@ -11,6 +12,10 @@ struct rpc_pipe_msg {
+ 	size_t len;
+ 	size_t copied;
+ 	int errno;
++#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
++#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
++#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
++	u8 flags;
+ };
+ 
+ struct rpc_pipe_ops {
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
+@@ -0,0 +1,111 @@
++/*
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#ifndef _SIMPLE_RPC_PIPEFS_H_
++#define _SIMPLE_RPC_PIPEFS_H_
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++
++
++#define payload_of(headerp)  ((void *)(headerp + 1))
++
++/*
++ * struct pipefs_hdr -- the generic message format for simple_rpc_pipefs.
++ * Messages may simply be the header itself, although having an optional
++ * data payload follow the header allows much more flexibility.
++ *
++ * Messages are created using pipefs_alloc_init_msg() and
++ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
++ * (optional) data payload.
++ *
++ * Given a struct pipefs_hdr *msg that has a struct foo payload, the data
++ * can be accessed using: struct foo *foop = payload_of(msg)
++ */
++struct pipefs_hdr {
++	u32 msgid;
++	u8  type;
++	u8  flags;
++	u16 totallen; /* length of entire message, including hdr itself */
++	u32 status;
++};
++
++/*
++ * struct pipefs_list -- a type of list used for tracking callers who've made an
++ * upcall and are blocked waiting for a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
++ */
++struct pipefs_list {
++	struct list_head list;
++	spinlock_t list_lock;
++};
++
++
++/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
++extern struct dentry *pipefs_mkpipe(const char *name,
++				    const struct rpc_pipe_ops *ops,
++				    int wait_for_open);
++extern void pipefs_closepipe(struct dentry *pipe);
++extern void pipefs_init_list(struct pipefs_list *list);
++extern struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++						void *data, u16 datalen);
++extern struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
++						       u8 flags, void *data,
++						       u16 datalen, u16 padlen);
++extern struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++							struct pipefs_hdr *msg,
++							struct pipefs_list
++							*uplist, u8 upflags,
++							u32 timeout);
++extern int pipefs_queue_upcall_noreply(struct dentry *pipe,
++				       struct pipefs_hdr *msg, u8 upflags);
++extern int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++				      struct pipefs_list *uplist);
++extern struct pipefs_hdr *pipefs_readmsg(struct file *filp,
++					 const char __user *src, size_t len);
++extern ssize_t pipefs_generic_upcall(struct file *filp,
++				     struct rpc_pipe_msg *rpcmsg,
++				     char __user *dst, size_t buflen);
++extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
++
++#endif /* _SIMPLE_RPC_PIPEFS_H_ */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
+@@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
+ 
+ 	return buf;
+ }
++
++/*
++ * Print a network address in a universal format (see rfc1833 and nfsv4.1)
++ */
++static inline int __svc_print_netaddr(struct sockaddr *addr,
++				      struct xdr_netobj *na)
++{
++	u16 port;
++	ssize_t len;
++
++	switch (addr->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *sin = (struct sockaddr_in *)addr;
++		port = ntohs(sin->sin_port);
++
++		len = snprintf(na->data, na->len, "%pI4.%u.%u",
++				&sin->sin_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
++		port = ntohs(sin6->sin6_port);
++
++		len = snprintf(na->data, na->len, "%pI6.%u.%u",
++				&sin6->sin6_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	default:
++		snprintf(na->data, na->len, "unknown address type: %d",
++			 addr->sa_family);
++		len = -EINVAL;
++		break;
++	}
++	return len;
++}
+ #endif /* SUNRPC_SVC_XPRT_H */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
+@@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
+ 	return p + 2;
+ }
+ 
++static inline __be32 *
++xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
++{
++	memcpy(ptr, p, len);
++	return p + XDR_QUADLEN(len);
++}
++
+ /*
+  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
+  */
+@@ -197,6 +204,7 @@ struct xdr_stream {
+ 
+ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
++extern __be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q);
+ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
+ 		unsigned int base, unsigned int len);
+ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
+--- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
++++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
+@@ -0,0 +1 @@
++-pnfs
+diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
+@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
+ 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+ 	    addr.o rpcb_clnt.o timer.o xdr.o \
+ 	    sunrpc_syms.o cache.o rpc_pipe.o \
+-	    svc_xprt.o
++	    svc_xprt.o simple_rpc_pipefs.o
+ sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+ sunrpc-$(CONFIG_PROC_FS) += stats.o
+ sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
+@@ -0,0 +1,424 @@
++/*
++ *  net/sunrpc/simple_rpc_pipefs.c
++ *
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#include <linux/completion.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++
++/*
++ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
++ * filesystem.
++ *
++ * If @wait_for_open is non-zero and an upcall is later queued but the userland
++ * end of the pipe has not yet been opened, the upcall will remain queued until
++ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
++ */
++struct dentry *pipefs_mkpipe(const char *name, const struct rpc_pipe_ops *ops,
++			     int wait_for_open)
++{
++	struct dentry *dir, *pipe;
++	struct vfsmount *mnt;
++
++	mnt = rpc_get_mount();
++	if (IS_ERR(mnt)) {
++		pipe = ERR_CAST(mnt);
++		goto out;
++	}
++	dir = mnt->mnt_root;
++	if (!dir) {
++		pipe = ERR_PTR(-ENOENT);
++		goto out;
++	}
++	pipe = rpc_mkpipe(dir, name, NULL, ops,
++			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
++out:
++	return pipe;
++}
++EXPORT_SYMBOL(pipefs_mkpipe);
++
++/*
++ * Shutdown a pipe made by pipefs_mkpipe().
++ * XXX: do we need to retain an extra reference on the mount?
++ */
++void pipefs_closepipe(struct dentry *pipe)
++{
++	rpc_unlink(pipe);
++	rpc_put_mount();
++}
++EXPORT_SYMBOL(pipefs_closepipe);
++
++/*
++ * Initialize a struct pipefs_list -- which are a way to keep track of callers
++ * who're blocked having made an upcall and are awaiting a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
++ * to use them.
++ */
++inline void pipefs_init_list(struct pipefs_list *list)
++{
++	INIT_LIST_HEAD(&list->list);
++	spin_lock_init(&list->list_lock);
++}
++EXPORT_SYMBOL(pipefs_init_list);
++
++/*
++ * Alloc/init a generic pipefs message header and copy into its message body
++ * an arbitrary data payload.
++ *
++ * struct pipefs_hdr's are meant to serve as generic, general-purpose message
++ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
++ * struct pipefs_hdr is assigned to a struct rpc_pipe_msg and delivered
++ * therein.  --And yes, the naming can seem a little confusing at first:
++ *
++ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
++ * struct pipefs_hdr (possibly with an attached message body).  A
++ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
++ * message is delivered and processed.
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
++					   void *data, u16 datalen, u16 padlen)
++{
++	u16 totallen;
++	struct pipefs_hdr *msg = NULL;
++
++	totallen = sizeof(*msg) + datalen + padlen;
++	if (totallen > PAGE_SIZE) {
++		msg = ERR_PTR(-E2BIG);
++		goto out;
++	}
++
++	msg = kzalloc(totallen, GFP_KERNEL);
++	if (!msg) {
++		msg = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	msg->msgid = msgid;
++	msg->type = type;
++	msg->flags = flags;
++	msg->totallen = totallen;
++	memcpy(payload_of(msg), data, datalen);
++out:
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
++
++/*
++ * See the description of pipefs_alloc_init_msg_padded().
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++				    void *data, u16 datalen)
++{
++	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
++					    datalen, 0);
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg);
++
++
++static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg,
++			       struct pipefs_hdr *msg, u8 upflags)
++{
++	memset(rpcmsg, 0, sizeof(*rpcmsg));
++	rpcmsg->data = msg;
++	rpcmsg->len = msg->totallen;
++	rpcmsg->flags = upflags;
++}
++
++static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(struct pipefs_hdr *msg,
++						     u8 upflags)
++{
++	struct rpc_pipe_msg *rpcmsg;
++
++	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
++	if (!rpcmsg)
++		return ERR_PTR(-ENOMEM);
++
++	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
++	return rpcmsg;
++}
++
++
++/* represents an upcall that'll block and wait for a reply */
++struct pipefs_upcall {
++	u32 msgid;
++	struct rpc_pipe_msg rpcmsg;
++	struct list_head list;
++	wait_queue_head_t waitq;
++	struct pipefs_hdr *reply;
++};
++
++
++static void pipefs_init_upcall_waitreply(struct pipefs_upcall *upcall,
++					 struct pipefs_hdr *msg, u8 upflags)
++{
++	upcall->reply = NULL;
++	upcall->msgid = msg->msgid;
++	INIT_LIST_HEAD(&upcall->list);
++	init_waitqueue_head(&upcall->waitq);
++	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
++}
++
++static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					   struct pipefs_upcall *upcall,
++					   struct pipefs_list *uplist,
++					   u32 timeout)
++{
++	int err = 0;
++	DECLARE_WAITQUEUE(wq, current);
++
++	add_wait_queue(&upcall->waitq, &wq);
++	spin_lock(&uplist->list_lock);
++	list_add(&upcall->list, &uplist->list);
++	spin_unlock(&uplist->list_lock);
++
++	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
++	if (err < 0)
++		goto out;
++
++	if (timeout) {
++		/* retval of 0 means timer expired */
++		err = schedule_timeout_uninterruptible(timeout);
++		if (err == 0 && upcall->reply == NULL)
++			err = -ETIMEDOUT;
++	} else {
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule();
++		__set_current_state(TASK_RUNNING);
++	}
++
++out:
++	spin_lock(&uplist->list_lock);
++	list_del_init(&upcall->list);
++	spin_unlock(&uplist->list_lock);
++	remove_wait_queue(&upcall->waitq, &wq);
++	return err;
++}
++
++/*
++ * Queue a pipefs msg for an upcall to userspace, place the calling thread
++ * on @uplist, and block the thread to wait for a reply.  If @timeout is
++ * nonzero, the thread will be blocked for at most @timeout jiffies.
++ *
++ * (To convert time units into jiffies, consider the functions
++ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
++ *  timespec_to_jiffies().)
++ *
++ * Once a reply is received by your downcall handler, call
++ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
++ * assign the reply, and wake the waiting thread.
++ *
++ * This function's return value pointer may be an error and should be checked
++ * with IS_ERR() before attempting to access the reply message.
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					    struct pipefs_hdr *msg,
++					    struct pipefs_list *uplist,
++					    u8 upflags, u32 timeout)
++{
++	int err = 0;
++	struct pipefs_upcall upcall;
++
++	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
++	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
++	if (err < 0) {
++		kfree(upcall.reply);
++		upcall.reply = ERR_PTR(err);
++	}
++
++	return upcall.reply;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
++
++/*
++ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
++ * no reply is expected).
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++int pipefs_queue_upcall_noreply(struct dentry *pipe, struct pipefs_hdr *msg,
++				u8 upflags)
++{
++	int err = 0;
++	struct rpc_pipe_msg *rpcmsg;
++
++	upflags |= PIPEFS_AUTOFREE_RPCMSG;
++	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
++	if (IS_ERR(rpcmsg)) {
++		err = PTR_ERR(rpcmsg);
++		goto out;
++	}
++	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
++
++
++static struct pipefs_upcall *pipefs_find_upcall_msgid(u32 msgid,
++						 struct pipefs_list *uplist)
++{
++	struct pipefs_upcall *upcall;
++
++	spin_lock(&uplist->list_lock);
++	list_for_each_entry(upcall, &uplist->list, list)
++		if (upcall->msgid == msgid)
++			goto out;
++	upcall = NULL;
++out:
++	spin_unlock(&uplist->list_lock);
++	return upcall;
++}
++
++/*
++ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
++ * message and have determined that it is a reply to a waiting upcall,
++ * you can use this function to find the appropriate upcall, assign the result,
++ * and wake the upcall thread.
++ *
++ * The reply message must have the same msgid as the original upcall message's.
++ *
++ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
++ */
++int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++			       struct pipefs_list *uplist)
++{
++	int err = 0;
++	struct pipefs_upcall *upcall;
++
++	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
++	if (!upcall) {
++		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
++			"for msgid %d\n", __func__, reply->msgid);
++		err = -ENOENT;
++		goto out;
++	}
++	upcall->reply = reply;
++	wake_up(&upcall->waitq);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_assign_upcall_reply);
++
++/*
++ * Generic method to read-in and return a newly-allocated message which begins
++ * with a struct pipefs_hdr.
++ */
++struct pipefs_hdr *pipefs_readmsg(struct file *filp, const char __user *src,
++			     size_t len)
++{
++	int err = 0, hdrsize;
++	struct pipefs_hdr *msg = NULL;
++
++	hdrsize = sizeof(*msg);
++	if (len < hdrsize) {
++		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
++		       __func__, (int) len, hdrsize);
++		err = -EINVAL;
++		goto out;
++	}
++
++	msg = kzalloc(len, GFP_KERNEL);
++	if (!msg) {
++		err = -ENOMEM;
++		goto out;
++	}
++	if (copy_from_user(msg, src, len))
++		err = -EFAULT;
++out:
++	if (err) {
++		kfree(msg);
++		msg = ERR_PTR(err);
++	}
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_readmsg);
++
++/*
++ * Generic rpc_pipe_ops->upcall() handler implementation.
++ *
++ * Don't call this directly: to make an upcall, use
++ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
++ */
++ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
++			      char __user *dst, size_t buflen)
++{
++	char *data;
++	ssize_t len, left;
++
++	data = (char *)rpcmsg->data + rpcmsg->copied;
++	len = rpcmsg->len - rpcmsg->copied;
++	if (len > buflen)
++		len = buflen;
++
++	left = copy_to_user(dst, data, len);
++	if (left < 0) {
++		rpcmsg->errno = left;
++		return left;
++	}
++
++	len -= left;
++	rpcmsg->copied += len;
++	rpcmsg->errno = 0;
++	return len;
++}
++EXPORT_SYMBOL(pipefs_generic_upcall);
++
++/*
++ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
++ *
++ * Items are only freed if @rpcmsg->flags has been set appropriately.
++ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
++ */
++void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
++{
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
++		kfree(rpcmsg->data);
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
++		kfree(rpcmsg);
++}
++EXPORT_SYMBOL(pipefs_generic_destroy_msg);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
+@@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
+ 
+ 	/* Shift the tail first */
+ 	if (tail->iov_len != 0) {
+-		p = (char *)tail->iov_base + len;
+-		if (tail->iov_len > len) {
+-			copy = tail->iov_len - len;
+-			memmove(p, tail->iov_base, copy);
+-		} else
+-			buf->buflen -= len;
+-		/* Copy from the inlined pages into the tail */
+ 		copy = len;
+-		if (copy > tail->iov_len)
++		if (tail->iov_len > len) {
++			p = (char *)tail->iov_base + len;
++			memmove(p, tail->iov_base, tail->iov_len - len);
++		} else {
+ 			copy = tail->iov_len;
++		}
++		/* Copy from the inlined pages into the tail */
+ 		_copy_from_pages((char *)tail->iov_base,
+ 				buf->pages, buf->page_base + pglen - len,
+ 				copy);
+@@ -496,6 +494,27 @@ __be32 * xdr_reserve_space(struct xdr_st
+ EXPORT_SYMBOL_GPL(xdr_reserve_space);
+ 
+ /**
++ * xdr_rewind_stream - rewind a stream back to some checkpoint
++ * @xdr: pointer to xdr_stream
++ * @q: some checkpoint at historical place of @xdr
++ *
++ * Restors an xdr stream to some historical point. @q must be
++ * a logical xdr point in the past that was sampled by @q = @xdr->p.
++ */
++__be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q)
++{
++	size_t nbytes = (xdr->p - q) << 2;
++
++	BUG_ON(xdr->p < q);
++	BUG_ON(nbytes > xdr->iov->iov_len || nbytes > xdr->buf->len);
++	xdr->p = q;
++	xdr->iov->iov_len -= nbytes;
++	xdr->buf->len -= nbytes;
++	return q;
++}
++EXPORT_SYMBOL_GPL(xdr_rewind_stream);
++
++/**
+  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
+  * @xdr: pointer to xdr_stream
+  * @pages: list of pages

From 5c488563eb056aef9c02edc96903714135bf4bd6 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 14:15:46 -0400
Subject: [PATCH 02/20] Fixed a couple compile errors in the server code.

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 nfsd-35-fc.patch | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
index ef99b4995..2825464af 100644
--- a/nfsd-35-fc.patch
+++ b/nfsd-35-fc.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
 --- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 14:12:24.165356789 -0400
 @@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
     | READ                 | REQ        |              | Section 18.22  |
     | READDIR              | REQ        |              | Section 18.23  |
@@ -12,7 +12,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig
     | RENAME               | REQ        |              | Section 18.26  |
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
 --- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 14:12:24.519356675 -0400
 @@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
  	.alloc		= expkey_alloc,
  };
@@ -108,7 +108,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  out_put_clp:
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 14:12:52.625429773 -0400
 @@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
  					cb_sequence_dec_sz +            \
  					op_dec_sz)
@@ -211,7 +211,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  	int status;
  
 -	status = rpc_call_async(cb->cb_client, &msg,
-+	status = rpc_call_async(cb->cl_cb_client, &msg,
++	status = rpc_call_async(clp->cl_cb_client, &msg,
  				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
  				&nfsd4_cb_probe_ops, (void *)clp);
 -	if (status) {
@@ -402,7 +402,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  }
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 14:12:25.698356909 -0400
 @@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
  static const char *nfsd4_op_name(unsigned opnum);
  
@@ -490,7 +490,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 14:12:25.700356284 -0400
 @@ -45,8 +45,8 @@
  #define NFSDDBG_FACILITY                NFSDDBG_PROC
  
@@ -1280,9 +1280,21 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
 -{
 -	user_lease_time = leasetime;
 -}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-23 14:14:22.882428704 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 14:14:33.418376589 -0400
+@@ -1900,7 +1900,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ 	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+ 		if ((buflen -= 4) < 0)
+ 			goto out_resource;
+-		WRITE32(NFSD_LEASE_TIME);
++		WRITE32(nfsd4_lease);
+ 	}
+ 	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
+ 		if ((buflen -= 4) < 0)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 14:12:25.821359224 -0400
 @@ -46,6 +46,7 @@ enum {
  	 */
  #ifdef CONFIG_NFSD_V4
@@ -1403,7 +1415,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  		/* last one */ {""}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 14:12:25.835418441 -0400
 @@ -82,7 +82,6 @@ int nfs4_state_init(void);
  void nfsd4_free_slabs(void);
  int nfs4_state_start(void);
@@ -1440,7 +1452,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  /*
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
 --- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 14:12:25.836366516 -0400
 @@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
  	struct nfs4_client	*cbs_clp;
  };
@@ -1558,7 +1570,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
  nfs4_put_stateowner(struct nfs4_stateowner *so)
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
 --- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 14:12:25.837387292 -0400
 @@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
  	struct nfs4_sessionid	sessionid;
  };
@@ -1600,7 +1612,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 14:12:25.838377224 -0400
 @@ -40,12 +40,12 @@ struct nfs_fhbase_old {
   * This is the new flexible, extensible style NFSv2/v3 file handle.
   * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
@@ -1619,7 +1631,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch
   * This might allow a file to be confirmed to be in a writable part of a
 diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
 --- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
++++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 14:12:25.839376838 -0400
 @@ -49,11 +49,17 @@ static void cache_init(struct cache_head
  	h->last_refresh = now;
  }
@@ -1686,7 +1698,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sun
  		/* entry is valid */
 diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
 --- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 14:12:25.840384371 -0400
 @@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
  			dprintk("svc: recvfrom returned error %d\n", -err);
  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1753,7 +1765,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/s
  error:
 diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
 --- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 14:12:25.841371223 -0400
 @@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
  		if (rqstp->rq_deferred) {
  			svc_xprt_received(xprt);
@@ -1782,7 +1794,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/
  void svc_close_xprt(struct svc_xprt *xprt)
 diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
 --- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 14:12:25.842376584 -0400
 @@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
  		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
  		rqstp->rq_arg.head[0].iov_len);

From 5d3f10424d292ac18b38e364e311df274bad5951 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 24 Aug 2010 14:49:23 -0400
Subject: [PATCH 03/20] Removed the localversion-pnfs file from the pnfs patch

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |   2 +-
 pnfs-all-2.6.35-2010-08-19-f13.patch | 395 +++++++++++++--------------
 2 files changed, 196 insertions(+), 201 deletions(-)

diff --git a/kernel.spec b/kernel.spec
index 70b6f45c8..7b72cab84 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs_all_2.6.35_2010_08_19
+%define buildid .pnfs34.2010.08.19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
index a9d78ba0e..10df9b15c 100644
--- a/pnfs-all-2.6.35-2010-08-19-f13.patch
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
---- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
-+++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-24 14:14:03.643355000 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-24 14:17:48.415730000 -0400
 @@ -13,6 +13,7 @@
  #include <sys/stat.h>
  #include <sys/mman.h>
@@ -11,7 +11,7 @@ diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arc
  #include "os.h"
 diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
 --- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-24 14:17:48.421730000 -0400
 @@ -1009,6 +1009,7 @@ static void disk_release(struct device *
  struct class block_class = {
  	.name		= "block",
@@ -21,8 +21,8 @@ diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.
  static char *block_devnode(struct device *dev, mode_t *mode)
  {
 diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
---- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-24 14:17:48.423729000 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-24 14:17:48.425730000 -0400
 @@ -0,0 +1,211 @@
 +(c) 2007 Network Appliance Inc.
 +
@@ -237,7 +237,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.
 +
 diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
 --- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-24 14:17:48.430730000 -0400
 @@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
  	return r;
  }
@@ -292,7 +292,7 @@ diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/driv
  	int r;
 diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
 --- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-24 14:17:48.435733000 -0400
 @@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
  	put_device(&class_to_shost(dev)->shost_gendev);
  }
@@ -304,7 +304,7 @@ diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drive
  };
 diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
 --- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-24 14:17:48.440733000 -0400
 @@ -36,13 +36,9 @@
  #include <linux/fs.h>
  #include <linux/time.h>
@@ -360,8 +360,8 @@ diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/
 +
  #endif
 diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
---- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
-+++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-24 14:17:48.444731000 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-24 14:17:48.446730000 -0400
 @@ -0,0 +1,396 @@
 +/*
 + * export.c - Implementation of the pnfs_export_operations
@@ -761,7 +761,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs
 +}
 diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
 --- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-24 14:17:48.452730000 -0400
 @@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
  	if (unlikely(wait_obj_created(oi)))
  		goto fail;
@@ -781,7 +781,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/
   * Fill in an inode read from the OSD and set it up for use
 diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
 --- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-24 14:17:48.457733000 -0400
 @@ -13,4 +13,5 @@
  #
  
@@ -790,7 +790,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/K
  obj-$(CONFIG_EXOFS_FS) += exofs.o
 diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
 --- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-24 14:17:48.462739000 -0400
 @@ -1,6 +1,7 @@
  config EXOFS_FS
  	tristate "exofs: OSD based file system support"
@@ -801,7 +801,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/
  	  as its backing storage.
 diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
 --- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-24 14:17:48.468730000 -0400
 @@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
  	sb->s_fs_info = sbi;
  	sb->s_op = &exofs_sops;
@@ -812,7 +812,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/
  		EXOFS_ERR("ERROR: exofs_iget failed\n");
 diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
 --- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-24 14:17:48.473730000 -0400
 @@ -16,6 +16,13 @@
  #include <linux/namei.h>
  #include <linux/sched.h>
@@ -829,7 +829,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exp
  
 diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
 --- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-24 14:17:48.478733000 -0400
 @@ -3,4 +3,7 @@
  
  obj-$(CONFIG_EXPORTFS) += exportfs.o
@@ -840,8 +840,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/ex
 +exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
 +exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-24 14:17:48.482731000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-24 14:17:48.484734000 -0400
 @@ -0,0 +1,158 @@
 +/*
 + *  linux/fs/nfsd/nfs4blocklayoutxdr.c
@@ -1002,8 +1002,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.
 +}
 +EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-24 14:17:48.487733000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-24 14:17:48.489734000 -0400
 @@ -0,0 +1,218 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -1224,8 +1224,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.n
 +}
 +EXPORT_SYMBOL(filelayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
---- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-24 14:17:48.493729000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-24 14:17:48.494735000 -0400
 @@ -0,0 +1,289 @@
 +/*
 + *  pnfs_osd_xdr_enc.c
@@ -1518,7 +1518,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.no
 +EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
 diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
 --- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-24 14:17:48.499730000 -0400
 @@ -19,6 +19,7 @@
  #include <linux/gfs2_ondisk.h>
  #include <linux/slow-work.h>
@@ -1539,7 +1539,7 @@ diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gf
  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
 --- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-24 14:17:48.505733000 -0400
 @@ -224,6 +224,31 @@ config LOCKD_V4
  config EXPORTFS
  	tristate
@@ -1573,8 +1573,8 @@ diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
  	tristate
  	select FS_POSIX_ACL
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-24 14:17:48.509734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-24 14:17:48.511732000 -0400
 @@ -0,0 +1,66 @@
 +#include <linux/module.h>
 +#include <linux/uaccess.h>
@@ -1643,8 +1643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.or
 +	return;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-24 14:17:48.514733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-24 14:17:48.516731000 -0400
 @@ -0,0 +1,1160 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.c
@@ -2807,8 +2807,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.
 +module_init(nfs4blocklayout_init);
 +module_exit(nfs4blocklayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-24 14:17:48.519731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-24 14:17:48.521730000 -0400
 @@ -0,0 +1,335 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdev.c
@@ -3146,8 +3146,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.
 +	goto out;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-24 14:17:48.523733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-24 14:17:48.525730000 -0400
 @@ -0,0 +1,120 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdm.c
@@ -3270,8 +3270,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.3
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-24 14:17:48.528729000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-24 14:17:48.529735000 -0400
 @@ -0,0 +1,303 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -3577,8 +3577,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.
 +
 +#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-24 14:17:48.532731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-24 14:17:48.534734000 -0400
 @@ -0,0 +1,948 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -4529,8 +4529,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noar
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
---- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-24 14:17:48.537729000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-24 14:17:48.538739000 -0400
 @@ -0,0 +1,6 @@
 +#
 +# Makefile for the pNFS block layout driver kernel module
@@ -4540,7 +4540,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarc
 +			extents.o block-device-discovery-pipe.o
 diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
 --- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-24 14:17:48.544730000 -0400
 @@ -8,6 +8,8 @@
  #ifndef __LINUX_FS_NFS_CALLBACK_H
  #define __LINUX_FS_NFS_CALLBACK_H
@@ -4613,7 +4613,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/c
  extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
 --- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-24 14:17:48.562731000 -0400
 @@ -8,10 +8,15 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -5096,7 +5096,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/
  	return status;
 diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
 --- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-24 14:17:48.568730000 -0400
 @@ -22,6 +22,8 @@
  #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
  
@@ -5298,8 +5298,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/n
  		.process_op = (callback_process_op_t)nfs4_callback_sequence,
  		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
---- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-24 14:14:13.062705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-24 14:17:48.575730000 -0400
 @@ -39,6 +39,7 @@
  #include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
@@ -5508,8 +5508,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/cli
  		goto error;
  
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
---- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-24 14:17:48.578729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-24 14:17:48.579735000 -0400
 @@ -0,0 +1,292 @@
 +#if defined(CONFIG_SPNFS_BLOCK)
 +
@@ -5804,8 +5804,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +}
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
---- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-24 14:17:48.584729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-24 14:17:48.586730000 -0400
 @@ -0,0 +1,1672 @@
 +/*
 + *  bl_ops.c
@@ -7480,8 +7480,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
---- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-24 14:14:13.068705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-24 14:17:48.592730000 -0400
 @@ -104,7 +104,8 @@ again:
  			continue;
  		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
@@ -7558,7 +7558,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs
  	rcu_read_unlock();
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
 --- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-24 14:17:48.597733000 -0400
 @@ -34,9 +34,7 @@ enum {
  int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
  void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -7571,8 +7571,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs
  
  struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
---- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-24 14:14:13.612707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-24 14:17:48.604730000 -0400
 @@ -17,11 +17,19 @@
  #include <linux/module.h>
  #include <linux/exportfs.h>
@@ -7750,7 +7750,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  	svcauth_unix_purge();
 diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
 --- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-24 14:17:48.610730000 -0400
 @@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
  	.rpc_release = nfs_direct_read_release,
  };
@@ -7996,7 +7996,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/dir
  		user_addr += bytes;
 diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
 --- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-24 14:17:48.616730000 -0400
 @@ -79,3 +79,52 @@ config NFSD_V4
  	  available from http://linux-nfs.org/.
  
@@ -8052,7 +8052,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kc
 +	  If unsure, say N.
 diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
 --- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-24 14:17:48.621733000 -0400
 @@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
@@ -8062,8 +8062,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/M
 +nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
 +nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-24 14:14:13.618705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-24 14:17:48.628730000 -0400
 @@ -40,7 +40,6 @@
  
  #define NFSPROC4_CB_NULL 0
@@ -8603,8 +8603,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
 +}
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-24 14:17:48.633729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-24 14:17:48.641730000 -0400
 @@ -0,0 +1,1679 @@
 +/******************************************************************************
 + *
@@ -10286,8 +10286,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfs
 +	return status;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-24 14:17:48.645731000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-24 14:17:48.647730000 -0400
 @@ -0,0 +1,461 @@
 +/******************************************************************************
 + *
@@ -10751,8 +10751,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/n
 +};
 +EXPORT_SYMBOL(pnfs_dlm_export_ops);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-24 14:17:48.651729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-24 14:17:48.652735000 -0400
 @@ -0,0 +1,620 @@
 +/*
 +*  linux/fs/nfsd/nfs4pnfsds.c
@@ -11375,8 +11375,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nf
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-24 14:14:13.623707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-24 14:17:48.658733000 -0400
 @@ -34,10 +34,14 @@
   */
  #include <linux/file.h>
@@ -11851,8 +11851,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-24 14:14:13.632707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-24 14:17:48.667732000 -0400
 @@ -42,6 +42,8 @@
  #include "xdr4.h"
  #include "vfs.h"
@@ -12368,8 +12368,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
  }
  
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-24 14:14:13.639707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-24 14:17:48.675730000 -0400
 @@ -47,9 +47,14 @@
  #include <linux/nfsd_idmap.h>
  #include <linux/nfs4_acl.h>
@@ -12988,8 +12988,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
  	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
  	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
---- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-24 14:14:13.645705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-24 14:17:48.681730000 -0400
 @@ -13,10 +13,15 @@
  #include <linux/nfsd/syscall.h>
  #include <linux/lockd/lockd.h>
@@ -13166,8 +13166,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  	remove_proc_entry("fs/nfs/exports", NULL);
  	remove_proc_entry("fs/nfs", NULL);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
---- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-24 14:14:13.651705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-24 14:17:48.687730000 -0400
 @@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
  #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
  	NFSD4_SUPPORTED_ATTRS_WORD0
@@ -13189,7 +13189,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-24 14:17:48.693730000 -0400
 @@ -10,6 +10,7 @@
  #include <linux/exportfs.h>
  
@@ -13227,7 +13227,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nf
  		__u32 tfh[2];
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-24 14:17:48.698733000 -0400
 @@ -14,6 +14,7 @@ enum nfsd_fsid {
  	FSID_UUID8,
  	FSID_UUID16,
@@ -13280,8 +13280,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nf
 +
  #endif /* _LINUX_NFSD_FH_INT_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
---- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-24 14:14:06.365163000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-24 14:17:48.704731000 -0400
 @@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
  
  };
@@ -13292,8 +13292,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/n
  int nfsd_vers(int vers, enum vers_op change)
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
---- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-24 14:17:48.708729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-24 14:17:48.710730000 -0400
 @@ -0,0 +1,143 @@
 +/*
 + *  Copyright (c) 2005 The Regents of the University of Michigan.
@@ -13439,8 +13439,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pn
 +
 +#endif /* LINUX_NFSD_PNFSD_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
---- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-24 14:17:48.713731000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-24 14:17:48.715730000 -0400
 @@ -0,0 +1,225 @@
 +/*
 + * linux/fs/nfsd/pnfs_lexp.c
@@ -13668,8 +13668,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nf
 +	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-24 14:17:48.719729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-24 14:17:48.720735000 -0400
 @@ -0,0 +1,535 @@
 +/*
 + * fs/nfsd/spnfs_com.c
@@ -14207,8 +14207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfs
 +}
 +#endif /* CONFIG_PROC_FS */
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-24 14:17:48.724733000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-24 14:17:48.726730000 -0400
 @@ -0,0 +1,878 @@
 +/*
 + * fs/nfsd/spnfs_ops.c
@@ -15089,8 +15089,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfs
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
---- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-24 14:14:13.656705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-24 14:17:48.731738000 -0400
 @@ -242,6 +242,12 @@ struct nfs4_client {
  	u32			cl_cb_seq_nr;
  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
@@ -15207,8 +15207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
 +
  #endif   /* NFSD4_STATE_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
---- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-24 14:14:06.371160000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-24 14:17:48.737742000 -0400
 @@ -37,7 +37,12 @@
  #ifdef CONFIG_NFSD_V4
  #include <linux/nfs4_acl.h>
@@ -15335,8 +15335,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.
  out_nfserr:
  	err = nfserrno(host_err);
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
---- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-24 14:14:13.661705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-24 14:17:48.743747000 -0400
 @@ -37,6 +37,8 @@
  #ifndef _LINUX_NFSD_XDR4_H
  #define _LINUX_NFSD_XDR4_H
@@ -15413,8 +15413,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  	struct nfs4_replay *			replay;
  };
 diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
---- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
-+++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-24 14:14:13.079708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-24 14:17:48.749746000 -0400
 @@ -28,6 +28,7 @@
  #include <linux/aio.h>
  #include <linux/gfp.h>
@@ -15540,8 +15540,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.
  	if (!ret)
  		return VM_FAULT_LOCKED;
 diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
---- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-24 14:14:13.095705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-24 14:17:48.757730000 -0400
 @@ -48,6 +48,7 @@
  #include "internal.h"
  #include "fscache.h"
@@ -15755,8 +15755,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inod
  	nfs_fs_proc_exit();
  	nfsiod_stop();
 diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
---- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-24 14:14:13.100708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-24 14:17:48.763734000 -0400
 @@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
  					   struct nfs_fattr *);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
@@ -15817,7 +15817,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/i
  		struct page *, struct page *);
 diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
 --- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-24 14:17:48.769730000 -0400
 @@ -79,10 +79,48 @@ config NFS_V4_1
  	depends on NFS_V4 && EXPERIMENTAL
  	help
@@ -15870,7 +15870,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kcon
  	depends on NFS_FS=y && IP_PNP
 diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
 --- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-24 14:17:48.774730000 -0400
 @@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
  			   delegation.o idmap.o \
  			   callback.o callback_xdr.o callback_proc.o \
@@ -15885,8 +15885,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Mak
 +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 +obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-24 14:14:13.119708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-24 14:17:48.780730000 -0400
 @@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
  	.dentry_ops	= &nfs_dentry_operations,
  	.dir_inode_ops	= &nfs3_dir_inode_operations,
@@ -15896,8 +15896,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.getattr	= nfs3_proc_getattr,
  	.setattr	= nfs3_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-24 14:17:48.784731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-24 14:17:48.786730000 -0400
 @@ -0,0 +1,765 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayout.c
@@ -16665,8 +16665,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs
 +module_init(nfs4filelayout_init);
 +module_exit(nfs4filelayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-24 14:17:48.790731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-24 14:17:48.792730000 -0400
 @@ -0,0 +1,636 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayoutdev.c
@@ -17305,8 +17305,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch
 +}
 +
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-24 14:17:48.795731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-24 14:17:48.796742000 -0400
 @@ -0,0 +1,97 @@
 +/*
 + *  pnfs_nfs4filelayout.h
@@ -17406,8 +17406,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs
 +
 +#endif /* FS_NFS_NFS4FILELAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
---- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-24 14:14:13.130705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-24 14:17:48.802730000 -0400
 @@ -45,8 +45,28 @@ enum nfs4_client_state {
  	NFS4CLNT_RECLAIM_NOGRACE,
  	NFS4CLNT_DELEGRETURN,
@@ -17556,8 +17556,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nf
  
  /* nfs4xdr.c */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-24 14:14:13.143709000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-24 14:17:48.811734000 -0400
 @@ -49,12 +49,15 @@
  #include <linux/mount.h>
  #include <linux/module.h>
@@ -19223,7 +19223,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.setattr	= nfs4_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
 --- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-24 14:17:48.818733000 -0400
 @@ -54,17 +54,17 @@
  void
  nfs4_renew_state(struct work_struct *work)
@@ -19246,8 +19246,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs
  	spin_lock(&clp->cl_lock);
  	lease = clp->cl_lease_time;
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
---- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-24 14:14:13.150705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-24 14:17:48.825730000 -0400
 @@ -53,6 +53,9 @@
  #include "callback.h"
  #include "delegation.h"
@@ -19566,8 +19566,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/
  			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
  			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-24 14:14:13.159705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-24 14:17:48.834738000 -0400
 @@ -50,8 +50,11 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -21078,8 +21078,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nf
  };
  
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
---- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-24 14:17:48.839734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-24 14:17:48.840742000 -0400
 @@ -0,0 +1,11 @@
 +#
 +# Makefile for the pNFS Objects Layout Driver kernel module
@@ -21093,8 +21093,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs
 +panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
 +obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-24 14:17:48.843735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-24 14:17:48.845739000 -0400
 @@ -0,0 +1,1087 @@
 +/*
 + *  objio_osd.c
@@ -22184,8 +22184,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noar
 +module_init(objlayout_init);
 +module_exit(objlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-24 14:17:48.848735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-24 14:17:48.851730000 -0400
 @@ -0,0 +1,790 @@
 +/*
 + *  objlayout.c
@@ -22978,8 +22978,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noar
 +	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
 +};
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-24 14:17:48.852735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-24 14:17:48.854746000 -0400
 @@ -0,0 +1,171 @@
 +/*
 + *  objlayout.h
@@ -23153,8 +23153,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noar
 +
 +#endif /* _OBJLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-24 14:17:48.857735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-24 14:17:48.860740000 -0400
 @@ -0,0 +1,734 @@
 +/*
 + *  panfs_shim.c
@@ -23891,8 +23891,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noa
 +module_init(panlayout_init);
 +module_exit(panlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-24 14:17:48.863734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-24 14:17:48.864730000 -0400
 @@ -0,0 +1,482 @@
 +/*
 + *  panfs_shim.h
@@ -24377,8 +24377,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noa
 +
 +#endif /* _PANLAYOUT_PANFS_SHIM_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-24 14:17:48.868731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-24 14:17:48.869739000 -0400
 @@ -0,0 +1,435 @@
 +/*
 + *  pnfs_osd_xdr.c
@@ -24816,8 +24816,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
---- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-24 14:14:13.169705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-24 14:17:48.875733000 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  
@@ -24940,8 +24940,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/p
  				if (res == INT_MAX)
  					goto out;
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
---- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-24 14:17:48.880733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-24 14:17:48.883730000 -0400
 @@ -0,0 +1,2027 @@
 +/*
 + *  linux/fs/nfs/pnfs.c
@@ -26971,8 +26971,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +}
 +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
---- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-24 14:17:48.886733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-24 14:17:48.887735000 -0400
 @@ -0,0 +1,355 @@
 +/*
 + *  fs/nfs/pnfs.h
@@ -27330,8 +27330,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +
 +#endif /* FS_NFS_PNFS_H */
 diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
---- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-24 14:14:13.174707000 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-24 14:17:48.893730000 -0400
 @@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
  	fattr = nfs_alloc_fattr();
  	status = -ENOMEM;
@@ -27359,8 +27359,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.
  	.getattr	= nfs_proc_getattr,
  	.setattr	= nfs_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
---- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-24 14:14:13.179708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-24 14:17:48.899733000 -0400
 @@ -18,8 +18,12 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
@@ -27575,8 +27575,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.
  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
  read_complete:
 diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
---- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
-+++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-24 14:14:13.186707000 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-24 14:17:48.907729000 -0400
 @@ -64,6 +64,7 @@
  #include "iostat.h"
  #include "internal.h"
@@ -27624,8 +27624,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/supe
  #endif
  
 diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
---- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
-+++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-24 14:14:13.192705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-24 14:17:48.913730000 -0400
 @@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
  	struct nfs_unlinkdata *data = calldata;
  	struct nfs_server *server = NFS_SERVER(data->dir);
@@ -27636,8 +27636,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unl
  		return;
  	rpc_call_start(task);
 diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
---- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
-+++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-24 14:14:06.360160000 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-24 14:17:48.921712000 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  #include <linux/nfs_page.h>
@@ -28326,7 +28326,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/writ
  int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
 --- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-24 14:17:48.933713000 -0400
 @@ -2,6 +2,7 @@
  #define LINUX_EXPORTFS_H 1
  
@@ -28399,8 +28399,8 @@ diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/i
 +#endif /* CONFIG_PNFSD */
  #endif /* LINUX_EXPORTFS_H */
 diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
---- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
-+++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-24 14:17:48.945690000 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-24 14:17:48.946693000 -0400
 @@ -0,0 +1,141 @@
 +#ifndef _LINUX_EXP_XDR_H
 +#define _LINUX_EXP_XDR_H
@@ -28544,8 +28544,8 @@ diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/in
 +}
 +#endif /* _LINUX_EXP_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
---- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
-+++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-24 14:14:13.014707000 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-24 14:17:48.961675000 -0400
 @@ -387,6 +387,7 @@ struct inodes_stat_t {
  #include <asm/byteorder.h>
  
@@ -28564,7 +28564,7 @@ diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include
  	struct dentry		*s_root;
 diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
 --- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-24 14:17:48.974681000 -0400
 @@ -17,7 +17,10 @@
  
  #define NFS4_BITMAP_SIZE	2
@@ -28694,8 +28694,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/inclu
  #endif
  
 diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-24 14:17:48.986670000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-24 14:17:48.989666000 -0400
 @@ -0,0 +1,330 @@
 +/*
 + *  include/linux/nfs4_pnfs.h
@@ -29028,8 +29028,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/
 +
 +#endif /* LINUX_NFS4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
---- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-24 14:17:48.998668000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-24 14:17:49.000665000 -0400
 @@ -0,0 +1,101 @@
 +#ifndef NFSD4_BLOCK
 +#define NFSD4_BLOCK
@@ -29133,8 +29133,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarc
 +#endif /* NFSD4_BLOCK */
 +
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-24 14:17:49.012664000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-24 14:17:49.013671000 -0400
 @@ -0,0 +1,345 @@
 +/*
 + * include/linux/nfsd4_spnfs.h
@@ -29483,7 +29483,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarc
 +#endif /* NFS_SPNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
 --- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-24 14:17:49.018668000 -0400
 @@ -29,6 +29,7 @@
  #ifdef __KERNEL__
  
@@ -29494,7 +29494,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch
   * Largest number of bytes we need to allocate for an NFS
 diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
 --- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-24 14:17:49.024673000 -0400
 @@ -32,6 +32,8 @@
  #define NFSDDBG_REPCACHE	0x0080
  #define NFSDDBG_XDR		0x0100
@@ -29506,7 +29506,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
 --- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-24 14:17:49.030665000 -0400
 @@ -100,6 +100,7 @@ struct svc_export {
  	uid_t			ex_anon_uid;
  	gid_t			ex_anon_gid;
@@ -29516,8 +29516,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarc
  	struct nfsd4_fs_locations ex_fslocs;
  	int			ex_nflavors;
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-24 14:17:49.033666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-24 14:17:49.034665000 -0400
 @@ -0,0 +1,132 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29652,8 +29652,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.3
 +
 +#endif /* NFSD_NFS4LAYOUTXDR_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-24 14:17:49.037666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-24 14:17:49.039665000 -0400
 @@ -0,0 +1,54 @@
 +/******************************************************************************
 + *
@@ -29710,8 +29710,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-24 14:17:49.042666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-24 14:17:49.044665000 -0400
 @@ -0,0 +1,271 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29986,7 +29986,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.n
 +#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
 --- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-24 14:17:49.049665000 -0400
 @@ -29,6 +29,7 @@
  /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
  #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
@@ -30024,8 +30024,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noar
  
  union nfsctl_res {
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
---- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-24 14:14:13.201710000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-24 14:17:49.063666000 -0400
 @@ -72,13 +72,20 @@ struct nfs_access_entry {
  	int			mask;
  };
@@ -30124,8 +30124,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/inc
  
  #ifdef __KERNEL__
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
---- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-24 14:14:13.206708000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-24 14:17:49.077665000 -0400
 @@ -15,6 +15,7 @@ struct nlm_host;
  struct nfs4_sequence_args;
  struct nfs4_sequence_res;
@@ -30200,7 +30200,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/
  	atomic_t active; /* Keep trace of any activity to this server */
 diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
 --- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-24 14:17:49.089668000 -0400
 @@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
  	NFSIOS_SHORTREAD,
  	NFSIOS_SHORTWRITE,
@@ -30213,7 +30213,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
 --- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-24 14:17:49.103665000 -0400
 @@ -39,6 +39,7 @@ struct nfs_page {
  	struct list_head	wb_list;	/* Defines state of page: */
  	struct page		*wb_page;	/* page to read in/write out */
@@ -30262,8 +30262,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/i
  			     struct inode *inode,
  			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
 diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
---- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-24 14:14:13.211708000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-24 14:17:49.116665000 -0400
 @@ -3,6 +3,8 @@
  
  #include <linux/nfsacl.h>
@@ -30415,8 +30415,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/in
  extern struct rpc_version	nfs_version3;
  extern struct rpc_version	nfs_version4;
 diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
---- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
-+++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-24 14:17:49.128664000 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-24 14:17:49.129670000 -0400
 @@ -0,0 +1,57 @@
 +#ifndef _PANFS_SHIM_API_H
 +#define _PANFS_SHIM_API_H
@@ -30476,8 +30476,8 @@ diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.no
 +
 +#endif /* _PANFS_SHIM_API_H */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-24 14:17:49.141664000 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-24 14:17:49.142670000 -0400
 @@ -0,0 +1,440 @@
 +/*
 + *  pnfs_osd_xdr.h
@@ -30920,8 +30920,8 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noar
 +
 +#endif /* __PNFS_OSD_XDR_H__ */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-24 14:17:49.153666000 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-24 14:17:49.155665000 -0400
 @@ -0,0 +1,134 @@
 +/*
 + *  include/linux/pnfs_xdr.h
@@ -31059,7 +31059,7 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/i
 +#endif /* LINUX_PNFS_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
 --- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-24 14:17:49.168668000 -0400
 @@ -8,6 +8,7 @@
  #ifndef __LINUX_POSIX_ACL_H
  #define __LINUX_POSIX_ACL_H
@@ -31070,7 +31070,7 @@ diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/
  #define ACL_UNDEFINED_ID	(-1)
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-24 14:17:49.174665000 -0400
 @@ -14,6 +14,8 @@
  /* size of an XDR encoding unit in bytes, i.e. 32bit */
  #define XDR_UNIT	(4)
@@ -31082,7 +31082,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.n
  
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-24 14:17:49.179667000 -0400
 @@ -3,6 +3,7 @@
  
  #ifdef __KERNEL__
@@ -31103,8 +31103,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.3
  
  struct rpc_pipe_ops {
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
---- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-24 14:17:49.183664000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-24 14:17:49.184674000 -0400
 @@ -0,0 +1,111 @@
 +/*
 + *  Copyright (c) 2008 The Regents of the University of Michigan.
@@ -31219,7 +31219,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux
 +#endif /* _SIMPLE_RPC_PIPEFS_H_ */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-24 14:17:49.190665000 -0400
 @@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
  
  	return buf;
@@ -31263,8 +31263,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.n
 +}
  #endif /* SUNRPC_SVC_XPRT_H */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
---- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-24 14:14:13.258707000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-24 14:17:49.195672000 -0400
 @@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
  	return p + 2;
  }
@@ -31287,14 +31287,9 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch
  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
  		unsigned int base, unsigned int len);
  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
---- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
-+++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
-@@ -0,0 +1 @@
-+-pnfs
 diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
 --- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-24 14:17:49.204668000 -0400
 @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
  	    addr.o rpcb_clnt.o timer.o xdr.o \
@@ -31305,8 +31300,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/su
  sunrpc-$(CONFIG_PROC_FS) += stats.o
  sunrpc-$(CONFIG_SYSCTL) += sysctl.o
 diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
---- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-24 14:17:49.208664000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-24 14:17:49.209670000 -0400
 @@ -0,0 +1,424 @@
 +/*
 + *  net/sunrpc/simple_rpc_pipefs.c
@@ -31733,8 +31728,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.no
 +}
 +EXPORT_SYMBOL(pipefs_generic_destroy_msg);
 diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
---- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-24 14:14:13.447705000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-24 14:17:49.215665000 -0400
 @@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
  
  	/* Shift the tail first */

From af636613e811089e31e967eded054a6bb64b25ca Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 24 Aug 2010 15:13:05 -0400
Subject: [PATCH 04/20] set the kernel flags

--with firmware
--with debuginfo
--without vdso_install
--without debug
--without headers

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel.spec b/kernel.spec
index 7b72cab84..24ed4de0b 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -101,23 +101,23 @@ Summary: The Linux kernel
 # kernel-smp (only valid for ppc 32-bit)
 %define with_smp       %{?_without_smp:       0} %{?!_without_smp:       1}
 # kernel-debug
-%define with_debug     %{?_without_debug:     0} %{?!_without_debug:     1}
+%define with_debug     %{?_without_debug:     0} %{?!_without_debug:     0}
 # kernel-doc
-%define with_doc       %{?_without_doc:       0} %{?!_without_doc:       1}
+%define with_doc       %{?_without_doc:       0} %{?!_without_doc:       0}
 # kernel-headers
-%define with_headers   %{?_without_headers:   0} %{?!_without_headers:   1}
+%define with_headers   %{?_without_headers:   0} %{?!_without_headers:   0}
 # kernel-firmware
 %define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     1}
 # tools/perf
-%define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  1}
+%define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  0}
 # perf noarch subpkg
-%define with_perf      %{?_without_perf:      0} %{?!_without_perf:      1}
+%define with_perf      %{?_without_perf:      0} %{?!_without_perf:      0}
 # kernel-debuginfo
-%define with_debuginfo %{?_without_debuginfo: 0} %{?!_without_debuginfo: 1}
+%define with_debuginfo %{?_without_debuginfo: 1} %{?!_without_debuginfo: 1}
 # kernel-bootwrapper (for creating zImages from kernel + initrd)
 %define with_bootwrapper %{?_without_bootwrapper: 0} %{?!_without_bootwrapper: 1}
 # Want to build a the vsdo directories installed
-%define with_vdso_install %{?_without_vdso_install: 0} %{?!_without_vdso_install: 1}
+%define with_vdso_install %{?_without_vdso_install: 0} %{?!_without_vdso_install: 0}
 
 # Build the kernel-doc package, but don't fail the build if it botches.
 # Here "true" means "continue" and "false" means "fail the build".

From c7b01347fb165ece597d6a5863d4ea4103aa5dfb Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 12:20:57 -0400
Subject: [PATCH 05/20] Updated to the latest pNFS tag:
 pnfs-all-2.6.35-2010-08-19

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 config-generic                       |    12 +
 kernel.spec                          |    15 +-
 linux-2.6-pnfs-compile.patch         |    13 +
 linux-2.6.35-inline.patch            |    11 +
 nfs-35-fc.patch                      |  7235 ++++++
 nfsd-35-fc.patch                     |  1808 ++
 pnfs-all-2.6.35-2010-08-19-f13.patch | 31788 +++++++++++++++++++++++++
 7 files changed, 40880 insertions(+), 2 deletions(-)
 create mode 100644 linux-2.6-pnfs-compile.patch
 create mode 100644 linux-2.6.35-inline.patch
 create mode 100644 nfs-35-fc.patch
 create mode 100644 nfsd-35-fc.patch
 create mode 100644 pnfs-all-2.6.35-2010-08-19-f13.patch

diff --git a/config-generic b/config-generic
index 3b23aabcc..76379c8eb 100644
--- a/config-generic
+++ b/config-generic
@@ -3322,6 +3322,18 @@ CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFS_FSCACHE=y
+# Enable pNFS
+CONFIG_PNFS=y
+CONFIG_PNFSD=y
+CONFIG_PNFSD_LOCAL_EXPORT=y
+CONFIG_SPNFS=y
+CONFIG_SPNFS_LAYOUTSEGMENTS=y
+CONFIG_SPNFS_BLOCK=y
+CONFIG_PNFS_OBJLAYOUT=m
+CONFIG_PNFS_BLOCK=m
+CONFIG_PNFS_PANLAYOUT=m
+CONFIG_PNFS_FILE_LAYOUT=m
+#
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
diff --git a/kernel.spec b/kernel.spec
index 6478f8671..14956777b 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-# % define buildid .local
+%define buildid .pnfs_all_2.6.35_2010_08_19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
@@ -107,7 +107,7 @@ Summary: The Linux kernel
 # kernel-headers
 %define with_headers   %{?_without_headers:   0} %{?!_without_headers:   1}
 # kernel-firmware
-%define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     0}
+%define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     1}
 # tools/perf
 %define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  1}
 # perf noarch subpkg
@@ -766,6 +766,12 @@ Patch12460: xfs-move-aio-completion-after-unwritten-extent-conversion.patch
 Patch12470: drivers-hwmon-coretemp-c-detect-the-thermal-sensors-by-cpuid.patch
 Patch12480: kprobes-x86-fix-kprobes-to-skip-prefixes-correctly.patch
 
+Patch30000: nfs-35-fc.patch
+Patch30001: nfsd-35-fc.patch
+Patch30002: pnfs-all-2.6.35-2010-08-19-f13.patch
+Patch30003: linux-2.6-pnfs-compile.patch
+Patch30004: linux-2.6.35-inline.patch
+
 %endif
 
 BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
@@ -1424,6 +1430,11 @@ ApplyPatch drivers-hwmon-coretemp-c-detect-the-thermal-sensors-by-cpuid.patch
 # bz #610941
 ApplyPatch kprobes-x86-fix-kprobes-to-skip-prefixes-correctly.patch
 
+ApplyPatch nfs-35-fc.patch  
+ApplyPatch nfsd-35-fc.patch  
+ApplyPatch pnfs-all-2.6.35-2010-08-19-f13.patch
+ApplyPatch linux-2.6-pnfs-compile.patch
+ApplyPatch linux-2.6.35-inline.patch
 # END OF PATCH APPLICATIONS
 
 %endif
diff --git a/linux-2.6-pnfs-compile.patch b/linux-2.6-pnfs-compile.patch
new file mode 100644
index 000000000..7c8cc4248
--- /dev/null
+++ b/linux-2.6-pnfs-compile.patch
@@ -0,0 +1,13 @@
+diff -up linux-2.6.32.x86_64/fs/nfs/objlayout/pnfs_osd_xdr.h.orig linux-2.6.32.x86_64/fs/nfs/objlayout/pnfs_osd_xdr.h
+diff -up linux-2.6.32.x86_64/include/net/inet_connection_sock.h.orig linux-2.6.32.x86_64/include/net/inet_connection_sock.h
+--- linux-2.6.32.x86_64/include/net/inet_connection_sock.h.orig	2009-12-02 22:51:21.000000000 -0500
++++ linux-2.6.32.x86_64/include/net/inet_connection_sock.h	2010-04-21 14:26:24.475659551 -0400
+@@ -23,7 +23,7 @@
+ #include <net/inet_sock.h>
+ #include <net/request_sock.h>
+ 
+-#define INET_CSK_DEBUG 1
++//#define INET_CSK_DEBUG 1
+ 
+ /* Cancel timers, when they are not required. */
+ #undef INET_CSK_CLEAR_TIMERS
diff --git a/linux-2.6.35-inline.patch b/linux-2.6.35-inline.patch
new file mode 100644
index 000000000..c56d8da5e
--- /dev/null
+++ b/linux-2.6.35-inline.patch
@@ -0,0 +1,11 @@
+diff -up linux-2.6.34.noarch/arch/x86/Makefile.orig linux-2.6.34.noarch/arch/x86/Makefile
+--- linux-2.6.34.noarch/arch/x86/Makefile.orig	2010-07-01 13:33:21.859627499 -0400
++++ linux-2.6.34.noarch/arch/x86/Makefile	2010-07-01 13:36:26.751576450 -0400
+@@ -81,6 +81,7 @@ ifdef CONFIG_CC_STACKPROTECTOR
+                 $(warning stack protector enabled but no compiler support)
+         endif
+ endif
++KBUILD_CFLAGS += -fno-inline-functions-called-once
+ 
+ # Don't unroll struct assignments with kmemcheck enabled
+ ifeq ($(CONFIG_KMEMCHECK),y)
diff --git a/nfs-35-fc.patch b/nfs-35-fc.patch
new file mode 100644
index 000000000..c3ad25f65
--- /dev/null
+++ b/nfs-35-fc.patch
@@ -0,0 +1,7235 @@
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 11:01:00.352376393 -0400
+@@ -934,7 +934,6 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	}
+ 
+ 	fsinfo.fattr = fattr;
+-	nfs_fattr_init(fattr);
+ 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+ 	if (error < 0)
+ 		goto out_error;
+@@ -1047,13 +1046,18 @@ struct nfs_server *nfs_create_server(con
+ 				     struct nfs_fh *mntfh)
+ {
+ 	struct nfs_server *server;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	int error;
+ 
+ 	server = nfs_alloc_server();
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	/* Get a client representation */
+ 	error = nfs_init_server(server, data);
+ 	if (error < 0)
+@@ -1064,7 +1068,7 @@ struct nfs_server *nfs_create_server(con
+ 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ 
+ 	/* Probe the root fh to retrieve its FSID */
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 	if (server->nfs_client->rpc_ops->version == 3) {
+@@ -1077,14 +1081,14 @@ struct nfs_server *nfs_create_server(con
+ 			server->namelen = NFS2_MAXNAMLEN;
+ 	}
+ 
+-	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+-		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
++	if (!(fattr->valid & NFS_ATTR_FATTR)) {
++		error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
+ 		if (error < 0) {
+ 			dprintk("nfs_create_server: getattr error = %d\n", -error);
+ 			goto error;
+ 		}
+ 	}
+-	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
++	memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+ 
+ 	dprintk("Server FSID: %llx:%llx\n",
+ 		(unsigned long long) server->fsid.major,
+@@ -1096,9 +1100,11 @@ struct nfs_server *nfs_create_server(con
+ 	spin_unlock(&nfs_client_lock);
+ 
+ 	server->mount_time = jiffies;
++	nfs_free_fattr(fattr);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	return ERR_PTR(error);
+ }
+@@ -1340,7 +1346,7 @@ error:
+ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
+ 				      struct nfs_fh *mntfh)
+ {
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	struct nfs_server *server;
+ 	int error;
+ 
+@@ -1350,6 +1356,11 @@ struct nfs_server *nfs4_create_server(co
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	/* set up the general RPC client */
+ 	error = nfs4_init_server(server, data);
+ 	if (error < 0)
+@@ -1364,7 +1375,7 @@ struct nfs_server *nfs4_create_server(co
+ 		goto error;
+ 
+ 	/* Probe the root fh to retrieve its FSID */
+-	error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
++	error = nfs4_get_rootfh(server, mntfh);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1375,7 +1386,7 @@ struct nfs_server *nfs4_create_server(co
+ 
+ 	nfs4_session_set_rwsize(server);
+ 
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1389,9 +1400,11 @@ struct nfs_server *nfs4_create_server(co
+ 
+ 	server->mount_time = jiffies;
+ 	dprintk("<-- nfs4_create_server() = %p\n", server);
++	nfs_free_fattr(fattr);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs4_create_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+@@ -1405,7 +1418,7 @@ struct nfs_server *nfs4_create_referral_
+ {
+ 	struct nfs_client *parent_client;
+ 	struct nfs_server *server, *parent_server;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	int error;
+ 
+ 	dprintk("--> nfs4_create_referral_server()\n");
+@@ -1414,6 +1427,11 @@ struct nfs_server *nfs4_create_referral_
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	parent_server = NFS_SB(data->sb);
+ 	parent_client = parent_server->nfs_client;
+ 
+@@ -1443,12 +1461,12 @@ struct nfs_server *nfs4_create_referral_
+ 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ 
+ 	/* Probe the root fh to retrieve its FSID and filehandle */
+-	error = nfs4_path_walk(server, mntfh, data->mnt_path);
++	error = nfs4_get_rootfh(server, mntfh);
+ 	if (error < 0)
+ 		goto error;
+ 
+ 	/* probe the filesystem info for this server filesystem */
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1466,10 +1484,12 @@ struct nfs_server *nfs4_create_referral_
+ 
+ 	server->mount_time = jiffies;
+ 
++	nfs_free_fattr(fattr);
+ 	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+@@ -1485,7 +1505,7 @@ struct nfs_server *nfs_clone_server(stru
+ 				    struct nfs_fattr *fattr)
+ {
+ 	struct nfs_server *server;
+-	struct nfs_fattr fattr_fsinfo;
++	struct nfs_fattr *fattr_fsinfo;
+ 	int error;
+ 
+ 	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+@@ -1496,6 +1516,11 @@ struct nfs_server *nfs_clone_server(stru
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr_fsinfo = nfs_alloc_fattr();
++	if (fattr_fsinfo == NULL)
++		goto out_free_server;
++
+ 	/* Copy data from the source */
+ 	server->nfs_client = source->nfs_client;
+ 	atomic_inc(&server->nfs_client->cl_count);
+@@ -1512,7 +1537,7 @@ struct nfs_server *nfs_clone_server(stru
+ 		nfs_init_server_aclclient(server);
+ 
+ 	/* probe the filesystem info for this server filesystem */
+-	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
++	error = nfs_probe_fsinfo(server, fh, fattr_fsinfo);
+ 	if (error < 0)
+ 		goto out_free_server;
+ 
+@@ -1534,10 +1559,12 @@ struct nfs_server *nfs_clone_server(stru
+ 
+ 	server->mount_time = jiffies;
+ 
++	nfs_free_fattr(fattr_fsinfo);
+ 	dprintk("<-- nfs_clone_server() = %p\n", server);
+ 	return server;
+ 
+ out_free_server:
++	nfs_free_fattr(fattr_fsinfo);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs_clone_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 11:01:00.352376393 -0400
+@@ -213,7 +213,7 @@ int nfs_inode_set_delegation(struct inod
+ 	struct nfs_delegation *freeme = NULL;
+ 	int status = 0;
+ 
+-	delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
++	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+ 	memcpy(delegation->stateid.data, res->delegation.data,
+diff -up linux-2.6.34.noarch/fs/nfs/dir.c.orig linux-2.6.34.noarch/fs/nfs/dir.c
+--- linux-2.6.34.noarch/fs/nfs/dir.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/dir.c	2010-08-23 11:01:00.353376419 -0400
+@@ -530,9 +530,7 @@ static int nfs_readdir(struct file *filp
+ 	nfs_readdir_descriptor_t my_desc,
+ 			*desc = &my_desc;
+ 	struct nfs_entry my_entry;
+-	struct nfs_fh	 fh;
+-	struct nfs_fattr fattr;
+-	long		res;
++	int res = -ENOMEM;
+ 
+ 	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
+ 			dentry->d_parent->d_name.name, dentry->d_name.name,
+@@ -554,9 +552,11 @@ static int nfs_readdir(struct file *filp
+ 
+ 	my_entry.cookie = my_entry.prev_cookie = 0;
+ 	my_entry.eof = 0;
+-	my_entry.fh = &fh;
+-	my_entry.fattr = &fattr;
+-	nfs_fattr_init(&fattr);
++	my_entry.fh = nfs_alloc_fhandle();
++	my_entry.fattr = nfs_alloc_fattr();
++	if (my_entry.fh == NULL || my_entry.fattr == NULL)
++		goto out_alloc_failed;
++
+ 	desc->entry = &my_entry;
+ 
+ 	nfs_block_sillyrename(dentry);
+@@ -598,7 +598,10 @@ out:
+ 	nfs_unblock_sillyrename(dentry);
+ 	if (res > 0)
+ 		res = 0;
+-	dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n",
++out_alloc_failed:
++	nfs_free_fattr(my_entry.fattr);
++	nfs_free_fhandle(my_entry.fh);
++	dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
+ 			dentry->d_parent->d_name.name, dentry->d_name.name,
+ 			res);
+ 	return res;
+@@ -776,9 +779,9 @@ static int nfs_lookup_revalidate(struct 
+ 	struct inode *dir;
+ 	struct inode *inode;
+ 	struct dentry *parent;
++	struct nfs_fh *fhandle = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int error;
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+ 
+ 	parent = dget_parent(dentry);
+ 	dir = parent->d_inode;
+@@ -811,14 +814,22 @@ static int nfs_lookup_revalidate(struct 
+ 	if (NFS_STALE(inode))
+ 		goto out_bad;
+ 
+-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
++	error = -ENOMEM;
++	fhandle = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fhandle == NULL || fattr == NULL)
++		goto out_error;
++
++	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ 	if (error)
+ 		goto out_bad;
+-	if (nfs_compare_fh(NFS_FH(inode), &fhandle))
++	if (nfs_compare_fh(NFS_FH(inode), fhandle))
+ 		goto out_bad;
+-	if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
++	if ((error = nfs_refresh_inode(inode, fattr)) != 0)
+ 		goto out_bad;
+ 
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ out_set_verifier:
+ 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+  out_valid:
+@@ -842,11 +853,21 @@ out_zap_parent:
+ 		shrink_dcache_parent(dentry);
+ 	}
+ 	d_drop(dentry);
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ 	dput(parent);
+ 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ 			__func__, dentry->d_parent->d_name.name,
+ 			dentry->d_name.name);
+ 	return 0;
++out_error:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
++	dput(parent);
++	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
++			__func__, dentry->d_parent->d_name.name,
++			dentry->d_name.name, error);
++	return error;
+ }
+ 
+ /*
+@@ -911,9 +932,9 @@ static struct dentry *nfs_lookup(struct 
+ 	struct dentry *res;
+ 	struct dentry *parent;
+ 	struct inode *inode = NULL;
++	struct nfs_fh *fhandle = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int error;
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+ 
+ 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
+ 		dentry->d_parent->d_name.name, dentry->d_name.name);
+@@ -923,7 +944,6 @@ static struct dentry *nfs_lookup(struct 
+ 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ 		goto out;
+ 
+-	res = ERR_PTR(-ENOMEM);
+ 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ 
+ 	/*
+@@ -936,17 +956,23 @@ static struct dentry *nfs_lookup(struct 
+ 		goto out;
+ 	}
+ 
++	res = ERR_PTR(-ENOMEM);
++	fhandle = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fhandle == NULL || fattr == NULL)
++		goto out;
++
+ 	parent = dentry->d_parent;
+ 	/* Protect against concurrent sillydeletes */
+ 	nfs_block_sillyrename(parent);
+-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
++	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ 	if (error == -ENOENT)
+ 		goto no_entry;
+ 	if (error < 0) {
+ 		res = ERR_PTR(error);
+ 		goto out_unblock_sillyrename;
+ 	}
+-	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
++	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+ 	res = (struct dentry *)inode;
+ 	if (IS_ERR(res))
+ 		goto out_unblock_sillyrename;
+@@ -962,6 +988,8 @@ no_entry:
+ out_unblock_sillyrename:
+ 	nfs_unblock_sillyrename(parent);
+ out:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ 	return res;
+ }
+ 
+@@ -1669,28 +1697,33 @@ static void nfs_access_free_entry(struct
+ 	smp_mb__after_atomic_dec();
+ }
+ 
++static void nfs_access_free_list(struct list_head *head)
++{
++	struct nfs_access_entry *cache;
++
++	while (!list_empty(head)) {
++		cache = list_entry(head->next, struct nfs_access_entry, lru);
++		list_del(&cache->lru);
++		nfs_access_free_entry(cache);
++	}
++}
++
+ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+ {
+ 	LIST_HEAD(head);
+ 	struct nfs_inode *nfsi;
+ 	struct nfs_access_entry *cache;
+ 
+-restart:
++	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
++		return (nr_to_scan == 0) ? 0 : -1;
++
+ 	spin_lock(&nfs_access_lru_lock);
+ 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+-		struct rw_semaphore *s_umount;
+ 		struct inode *inode;
+ 
+ 		if (nr_to_scan-- == 0)
+ 			break;
+-		s_umount = &nfsi->vfs_inode.i_sb->s_umount;
+-		if (!down_read_trylock(s_umount))
+-			continue;
+-		inode = igrab(&nfsi->vfs_inode);
+-		if (inode == NULL) {
+-			up_read(s_umount);
+-			continue;
+-		}
++		inode = &nfsi->vfs_inode;
+ 		spin_lock(&inode->i_lock);
+ 		if (list_empty(&nfsi->access_cache_entry_lru))
+ 			goto remove_lru_entry;
+@@ -1704,61 +1737,47 @@ restart:
+ 		else {
+ remove_lru_entry:
+ 			list_del_init(&nfsi->access_cache_inode_lru);
++			smp_mb__before_clear_bit();
+ 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
++			smp_mb__after_clear_bit();
+ 		}
+-		spin_unlock(&inode->i_lock);
+-		spin_unlock(&nfs_access_lru_lock);
+-		iput(inode);
+-		up_read(s_umount);
+-		goto restart;
+ 	}
+ 	spin_unlock(&nfs_access_lru_lock);
+-	while (!list_empty(&head)) {
+-		cache = list_entry(head.next, struct nfs_access_entry, lru);
+-		list_del(&cache->lru);
+-		nfs_access_free_entry(cache);
+-	}
++	nfs_access_free_list(&head);
+ 	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+-static void __nfs_access_zap_cache(struct inode *inode)
++static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
+ {
+-	struct nfs_inode *nfsi = NFS_I(inode);
+ 	struct rb_root *root_node = &nfsi->access_cache;
+-	struct rb_node *n, *dispose = NULL;
++	struct rb_node *n;
+ 	struct nfs_access_entry *entry;
+ 
+ 	/* Unhook entries from the cache */
+ 	while ((n = rb_first(root_node)) != NULL) {
+ 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+ 		rb_erase(n, root_node);
+-		list_del(&entry->lru);
+-		n->rb_left = dispose;
+-		dispose = n;
++		list_move(&entry->lru, head);
+ 	}
+ 	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+-	spin_unlock(&inode->i_lock);
+-
+-	/* Now kill them all! */
+-	while (dispose != NULL) {
+-		n = dispose;
+-		dispose = n->rb_left;
+-		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+-	}
+ }
+ 
+ void nfs_access_zap_cache(struct inode *inode)
+ {
++	LIST_HEAD(head);
++
++	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
++		return;
+ 	/* Remove from global LRU init */
+-	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
+-		spin_lock(&nfs_access_lru_lock);
++	spin_lock(&nfs_access_lru_lock);
++	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
+ 		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+-		spin_unlock(&nfs_access_lru_lock);
+-	}
+ 
+ 	spin_lock(&inode->i_lock);
+-	/* This will release the spinlock */
+-	__nfs_access_zap_cache(inode);
++	__nfs_access_zap_cache(NFS_I(inode), &head);
++	spin_unlock(&inode->i_lock);
++	spin_unlock(&nfs_access_lru_lock);
++	nfs_access_free_list(&head);
+ }
+ 
+ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+@@ -1809,8 +1828,8 @@ out_stale:
+ 	nfs_access_free_entry(cache);
+ 	return -ENOENT;
+ out_zap:
+-	/* This will release the spinlock */
+-	__nfs_access_zap_cache(inode);
++	spin_unlock(&inode->i_lock);
++	nfs_access_zap_cache(inode);
+ 	return -ENOENT;
+ }
+ 
+@@ -1865,9 +1884,11 @@ static void nfs_access_add_cache(struct 
+ 	smp_mb__after_atomic_inc();
+ 
+ 	/* Add inode to global LRU list */
+-	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
++	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
+ 		spin_lock(&nfs_access_lru_lock);
+-		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
++		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
++			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
++					&nfs_access_lru_list);
+ 		spin_unlock(&nfs_access_lru_lock);
+ 	}
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 11:00:23.790502081 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 11:01:00.354376416 -0400
+@@ -162,14 +162,17 @@ static int nfs_revalidate_file_size(stru
+ 	struct nfs_server *server = NFS_SERVER(inode);
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+-	if (server->flags & NFS_MOUNT_NOAC)
+-		goto force_reval;
++	if (nfs_have_delegated_attributes(inode))
++		goto out_noreval;
++
+ 	if (filp->f_flags & O_DIRECT)
+ 		goto force_reval;
+-	if (nfsi->npages != 0)
+-		return 0;
+-	if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+-		return 0;
++	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
++		goto force_reval;
++	if (nfs_attribute_timeout(inode))
++		goto force_reval;
++out_noreval:
++	return 0;
+ force_reval:
+ 	return __nfs_revalidate_inode(server, inode);
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/fscache.c.orig linux-2.6.34.noarch/fs/nfs/fscache.c
+--- linux-2.6.34.noarch/fs/nfs/fscache.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/fscache.c	2010-08-23 11:01:00.355376416 -0400
+@@ -467,7 +467,8 @@ int __nfs_readpages_from_fscache(struct 
+ 				 struct list_head *pages,
+ 				 unsigned *nr_pages)
+ {
+-	int ret, npages = *nr_pages;
++	unsigned npages = *nr_pages;
++	int ret;
+ 
+ 	dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
+ 		 NFS_I(inode)->fscache, npages, inode);
+diff -up linux-2.6.34.noarch/fs/nfs/getroot.c.orig linux-2.6.34.noarch/fs/nfs/getroot.c
+--- linux-2.6.34.noarch/fs/nfs/getroot.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/getroot.c	2010-08-23 11:01:00.356376417 -0400
+@@ -78,159 +78,94 @@ struct dentry *nfs_get_root(struct super
+ {
+ 	struct nfs_server *server = NFS_SB(sb);
+ 	struct nfs_fsinfo fsinfo;
+-	struct nfs_fattr fattr;
+-	struct dentry *mntroot;
++	struct dentry *ret;
+ 	struct inode *inode;
+ 	int error;
+ 
+ 	/* get the actual root for this mount */
+-	fsinfo.fattr = &fattr;
++	fsinfo.fattr = nfs_alloc_fattr();
++	if (fsinfo.fattr == NULL)
++		return ERR_PTR(-ENOMEM);
+ 
+ 	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ 	if (error < 0) {
+ 		dprintk("nfs_get_root: getattr error = %d\n", -error);
+-		return ERR_PTR(error);
++		ret = ERR_PTR(error);
++		goto out;
+ 	}
+ 
+ 	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+ 	if (IS_ERR(inode)) {
+ 		dprintk("nfs_get_root: get root inode failed\n");
+-		return ERR_CAST(inode);
++		ret = ERR_CAST(inode);
++		goto out;
+ 	}
+ 
+ 	error = nfs_superblock_set_dummy_root(sb, inode);
+-	if (error != 0)
+-		return ERR_PTR(error);
++	if (error != 0) {
++		ret = ERR_PTR(error);
++		goto out;
++	}
+ 
+ 	/* root dentries normally start off anonymous and get spliced in later
+ 	 * if the dentry tree reaches them; however if the dentry already
+ 	 * exists, we'll pick it up at this point and use it as the root
+ 	 */
+-	mntroot = d_obtain_alias(inode);
+-	if (IS_ERR(mntroot)) {
++	ret = d_obtain_alias(inode);
++	if (IS_ERR(ret)) {
+ 		dprintk("nfs_get_root: get root dentry failed\n");
+-		return mntroot;
++		goto out;
+ 	}
+ 
+-	security_d_instantiate(mntroot, inode);
+-
+-	if (!mntroot->d_op)
+-		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
++	security_d_instantiate(ret, inode);
+ 
+-	return mntroot;
++	if (ret->d_op == NULL)
++		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
++out:
++	nfs_free_fattr(fsinfo.fattr);
++	return ret;
+ }
+ 
+ #ifdef CONFIG_NFS_V4
+ 
+-/*
+- * Do a simple pathwalk from the root FH of the server to the nominated target
+- * of the mountpoint
+- * - give error on symlinks
+- * - give error on ".." occurring in the path
+- * - follow traversals
+- */
+-int nfs4_path_walk(struct nfs_server *server,
+-		   struct nfs_fh *mntfh,
+-		   const char *path)
++int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
+ {
+ 	struct nfs_fsinfo fsinfo;
+-	struct nfs_fattr fattr;
+-	struct nfs_fh lastfh;
+-	struct qstr name;
+-	int ret;
+-
+-	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+-
+-	fsinfo.fattr = &fattr;
+-	nfs_fattr_init(&fattr);
+-
+-	/* Eat leading slashes */
+-	while (*path == '/')
+-		path++;
++	int ret = -ENOMEM;
++
++	dprintk("--> nfs4_get_rootfh()\n");
++
++	fsinfo.fattr = nfs_alloc_fattr();
++	if (fsinfo.fattr == NULL)
++		goto out;
+ 
+ 	/* Start by getting the root filehandle from the server */
+ 	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ 	if (ret < 0) {
+-		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+-		return ret;
++		dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
++		goto out;
+ 	}
+ 
+-	if (!S_ISDIR(fattr.mode)) {
+-		printk(KERN_ERR "nfs4_get_root:"
++	if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
++			|| !S_ISDIR(fsinfo.fattr->mode)) {
++		printk(KERN_ERR "nfs4_get_rootfh:"
+ 		       " getroot encountered non-directory\n");
+-		return -ENOTDIR;
++		ret = -ENOTDIR;
++		goto out;
+ 	}
+ 
+-	/* FIXME: It is quite valid for the server to return a referral here */
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+-		printk(KERN_ERR "nfs4_get_root:"
++	if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
++		printk(KERN_ERR "nfs4_get_rootfh:"
+ 		       " getroot obtained referral\n");
+-		return -EREMOTE;
++		ret = -EREMOTE;
++		goto out;
+ 	}
+ 
+-next_component:
+-	dprintk("Next: %s\n", path);
+-
+-	/* extract the next bit of the path */
+-	if (!*path)
+-		goto path_walk_complete;
+-
+-	name.name = path;
+-	while (*path && *path != '/')
+-		path++;
+-	name.len = path - (const char *) name.name;
+-
+-	if (name.len > NFS4_MAXNAMLEN)
+-		return -ENAMETOOLONG;
+-
+-eat_dot_dir:
+-	while (*path == '/')
+-		path++;
+-
+-	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+-		path += 2;
+-		goto eat_dot_dir;
+-	}
+-
+-	/* FIXME: Why shouldn't the user be able to use ".." in the path? */
+-	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+-	    ) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " Mount path contains reference to \"..\"\n");
+-		return -EINVAL;
+-	}
+-
+-	/* lookup the next FH in the sequence */
+-	memcpy(&lastfh, mntfh, sizeof(lastfh));
+-
+-	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+-
+-	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+-						    mntfh, &fattr);
+-	if (ret < 0) {
+-		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+-		return ret;
+-	}
+-
+-	if (!S_ISDIR(fattr.mode)) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " lookupfh encountered non-directory\n");
+-		return -ENOTDIR;
+-	}
+-
+-	/* FIXME: Referrals are quite valid here too */
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " lookupfh obtained referral\n");
+-		return -EREMOTE;
+-	}
+-
+-	goto next_component;
+-
+-path_walk_complete:
+-	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+-	dprintk("<-- nfs4_path_walk() = 0\n");
+-	return 0;
++	memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
++out:
++	nfs_free_fattr(fsinfo.fattr);
++	dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
++	return ret;
+ }
+ 
+ /*
+@@ -239,8 +174,8 @@ path_walk_complete:
+ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+ {
+ 	struct nfs_server *server = NFS_SB(sb);
+-	struct nfs_fattr fattr;
+-	struct dentry *mntroot;
++	struct nfs_fattr *fattr = NULL;
++	struct dentry *ret;
+ 	struct inode *inode;
+ 	int error;
+ 
+@@ -254,40 +189,50 @@ struct dentry *nfs4_get_root(struct supe
+ 		return ERR_PTR(error);
+ 	}
+ 
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		return ERR_PTR(-ENOMEM);;
++
+ 	/* get the actual root for this mount */
+-	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
++	error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
+ 	if (error < 0) {
+ 		dprintk("nfs_get_root: getattr error = %d\n", -error);
+-		return ERR_PTR(error);
++		ret = ERR_PTR(error);
++		goto out;
+ 	}
+ 
+-	inode = nfs_fhget(sb, mntfh, &fattr);
++	inode = nfs_fhget(sb, mntfh, fattr);
+ 	if (IS_ERR(inode)) {
+ 		dprintk("nfs_get_root: get root inode failed\n");
+-		return ERR_CAST(inode);
++		ret = ERR_CAST(inode);
++		goto out;
+ 	}
+ 
+ 	error = nfs_superblock_set_dummy_root(sb, inode);
+-	if (error != 0)
+-		return ERR_PTR(error);
++	if (error != 0) {
++		ret = ERR_PTR(error);
++		goto out;
++	}
+ 
+ 	/* root dentries normally start off anonymous and get spliced in later
+ 	 * if the dentry tree reaches them; however if the dentry already
+ 	 * exists, we'll pick it up at this point and use it as the root
+ 	 */
+-	mntroot = d_obtain_alias(inode);
+-	if (IS_ERR(mntroot)) {
++	ret = d_obtain_alias(inode);
++	if (IS_ERR(ret)) {
+ 		dprintk("nfs_get_root: get root dentry failed\n");
+-		return mntroot;
++		goto out;
+ 	}
+ 
+-	security_d_instantiate(mntroot, inode);
++	security_d_instantiate(ret, inode);
+ 
+-	if (!mntroot->d_op)
+-		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
++	if (ret->d_op == NULL)
++		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+ 
++out:
++	nfs_free_fattr(fattr);
+ 	dprintk("<-- nfs4_get_root()\n");
+-	return mntroot;
++	return ret;
+ }
+ 
+ #endif /* CONFIG_NFS_V4 */
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 11:01:00.357376378 -0400
+@@ -393,8 +393,8 @@ int
+ nfs_setattr(struct dentry *dentry, struct iattr *attr)
+ {
+ 	struct inode *inode = dentry->d_inode;
+-	struct nfs_fattr fattr;
+-	int error;
++	struct nfs_fattr *fattr;
++	int error = -ENOMEM;
+ 
+ 	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+ 
+@@ -417,14 +417,20 @@ nfs_setattr(struct dentry *dentry, struc
+ 		filemap_write_and_wait(inode->i_mapping);
+ 		nfs_wb_all(inode);
+ 	}
++
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
+ 	/*
+ 	 * Return any delegations if we're going to change ACLs
+ 	 */
+ 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+ 		nfs_inode_return_delegation(inode);
+-	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
++	error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
+ 	if (error == 0)
+-		nfs_refresh_inode(inode, &fattr);
++		nfs_refresh_inode(inode, fattr);
++	nfs_free_fattr(fattr);
++out:
+ 	return error;
+ }
+ 
+@@ -682,7 +688,7 @@ int
+ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+ {
+ 	int		 status = -ESTALE;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr = NULL;
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+ 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
+@@ -693,8 +699,13 @@ __nfs_revalidate_inode(struct nfs_server
+ 	if (NFS_STALE(inode))
+ 		goto out;
+ 
++	status = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
++
+ 	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
+-	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
++	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr);
+ 	if (status != 0) {
+ 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+ 			 inode->i_sb->s_id,
+@@ -707,7 +718,7 @@ __nfs_revalidate_inode(struct nfs_server
+ 		goto out;
+ 	}
+ 
+-	status = nfs_refresh_inode(inode, &fattr);
++	status = nfs_refresh_inode(inode, fattr);
+ 	if (status) {
+ 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+ 			 inode->i_sb->s_id,
+@@ -723,6 +734,7 @@ __nfs_revalidate_inode(struct nfs_server
+ 		(long long)NFS_FILEID(inode));
+ 
+  out:
++	nfs_free_fattr(fattr);
+ 	return status;
+ }
+ 
+@@ -730,9 +742,14 @@ int nfs_attribute_timeout(struct inode *
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
++	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
++}
++
++static int nfs_attribute_cache_expired(struct inode *inode)
++{
+ 	if (nfs_have_delegated_attributes(inode))
+ 		return 0;
+-	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
++	return nfs_attribute_timeout(inode);
+ }
+ 
+ /**
+@@ -745,7 +762,7 @@ int nfs_attribute_timeout(struct inode *
+ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+ {
+ 	if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
+-			&& !nfs_attribute_timeout(inode))
++			&& !nfs_attribute_cache_expired(inode))
+ 		return NFS_STALE(inode) ? -ESTALE : 0;
+ 	return __nfs_revalidate_inode(server, inode);
+ }
+@@ -782,7 +799,8 @@ int nfs_revalidate_mapping(struct inode 
+ 	int ret = 0;
+ 
+ 	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
++			|| nfs_attribute_cache_expired(inode)
++			|| NFS_STALE(inode)) {
+ 		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ 		if (ret < 0)
+ 			goto out;
+@@ -916,6 +934,26 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
+ 
++struct nfs_fattr *nfs_alloc_fattr(void)
++{
++	struct nfs_fattr *fattr;
++
++	fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
++	if (fattr != NULL)
++		nfs_fattr_init(fattr);
++	return fattr;
++}
++
++struct nfs_fh *nfs_alloc_fhandle(void)
++{
++	struct nfs_fh *fh;
++
++	fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
++	if (fh != NULL)
++		fh->size = 0;
++	return fh;
++}
++
+ /**
+  * nfs_inode_attrs_need_update - check if the inode attributes need updating
+  * @inode - pointer to inode
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 11:01:00.358564151 -0400
+@@ -244,9 +244,7 @@ extern struct dentry *nfs_get_root(struc
+ #ifdef CONFIG_NFS_V4
+ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+ 
+-extern int nfs4_path_walk(struct nfs_server *server,
+-			  struct nfs_fh *mntfh,
+-			  const char *path);
++extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
+ #endif
+ 
+ /* read.c */
+diff -up linux-2.6.34.noarch/fs/nfs/iostat.h.orig linux-2.6.34.noarch/fs/nfs/iostat.h
+--- linux-2.6.34.noarch/fs/nfs/iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/iostat.h	2010-08-23 11:01:00.358564151 -0400
+@@ -36,14 +36,14 @@ static inline void nfs_inc_stats(const s
+ 
+ static inline void nfs_add_server_stats(const struct nfs_server *server,
+ 					enum nfs_stat_bytecounters stat,
+-					unsigned long addend)
++					long addend)
+ {
+ 	this_cpu_add(server->io_stats->bytes[stat], addend);
+ }
+ 
+ static inline void nfs_add_stats(const struct inode *inode,
+ 				 enum nfs_stat_bytecounters stat,
+-				 unsigned long addend)
++				 long addend)
+ {
+ 	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+ }
+@@ -51,7 +51,7 @@ static inline void nfs_add_stats(const s
+ #ifdef CONFIG_NFS_FSCACHE
+ static inline void nfs_add_fscache_stats(struct inode *inode,
+ 					 enum nfs_stat_fscachecounters stat,
+-					 unsigned long addend)
++					 long addend)
+ {
+ 	this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/namespace.c.orig linux-2.6.34.noarch/fs/nfs/namespace.c
+--- linux-2.6.34.noarch/fs/nfs/namespace.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/namespace.c	2010-08-23 11:01:00.359420147 -0400
+@@ -105,8 +105,8 @@ static void * nfs_follow_mountpoint(stru
+ 	struct vfsmount *mnt;
+ 	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ 	struct dentry *parent;
+-	struct nfs_fh fh;
+-	struct nfs_fattr fattr;
++	struct nfs_fh *fh = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int err;
+ 
+ 	dprintk("--> nfs_follow_mountpoint()\n");
+@@ -115,6 +115,12 @@ static void * nfs_follow_mountpoint(stru
+ 	if (IS_ROOT(dentry))
+ 		goto out_err;
+ 
++	err = -ENOMEM;
++	fh = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fh == NULL || fattr == NULL)
++		goto out_err;
++
+ 	dprintk("%s: enter\n", __func__);
+ 	dput(nd->path.dentry);
+ 	nd->path.dentry = dget(dentry);
+@@ -123,16 +129,16 @@ static void * nfs_follow_mountpoint(stru
+ 	parent = dget_parent(nd->path.dentry);
+ 	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+ 						  &nd->path.dentry->d_name,
+-						  &fh, &fattr);
++						  fh, fattr);
+ 	dput(parent);
+ 	if (err != 0)
+ 		goto out_err;
+ 
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL)
++	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+ 		mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
+ 	else
+-		mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh,
+-				      &fattr);
++		mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh,
++				      fattr);
+ 	err = PTR_ERR(mnt);
+ 	if (IS_ERR(mnt))
+ 		goto out_err;
+@@ -151,6 +157,8 @@ static void * nfs_follow_mountpoint(stru
+ 	nd->path.dentry = dget(mnt->mnt_root);
+ 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+ out:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fh);
+ 	dprintk("%s: done, returned %d\n", __func__, err);
+ 
+ 	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3acl.c.orig linux-2.6.34.noarch/fs/nfs/nfs3acl.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3acl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3acl.c	2010-08-23 11:01:00.359420147 -0400
+@@ -185,7 +185,6 @@ static void nfs3_cache_acls(struct inode
+ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
+ 	struct page *pages[NFSACL_MAXPAGES] = { };
+ 	struct nfs3_getaclargs args = {
+ 		.fh = NFS_FH(inode),
+@@ -193,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 		.pages = pages,
+ 	};
+ 	struct nfs3_getaclres res = {
+-		.fattr =	&fattr,
++		0
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_argp	= &args,
+@@ -228,7 +227,10 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 
+ 	dprintk("NFS call getacl\n");
+ 	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+-	nfs_fattr_init(&fattr);
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		return ERR_PTR(-ENOMEM);
++
+ 	status = rpc_call_sync(server->client_acl, &msg, 0);
+ 	dprintk("NFS reply getacl: %d\n", status);
+ 
+@@ -238,7 +240,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 
+ 	switch (status) {
+ 		case 0:
+-			status = nfs_refresh_inode(inode, &fattr);
++			status = nfs_refresh_inode(inode, res.fattr);
+ 			break;
+ 		case -EPFNOSUPPORT:
+ 		case -EPROTONOSUPPORT:
+@@ -278,6 +280,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ getout:
+ 	posix_acl_release(res.acl_access);
+ 	posix_acl_release(res.acl_default);
++	nfs_free_fattr(res.fattr);
+ 
+ 	if (status != 0) {
+ 		posix_acl_release(acl);
+@@ -290,7 +293,7 @@ static int nfs3_proc_setacls(struct inod
+ 		  struct posix_acl *dfacl)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	struct page *pages[NFSACL_MAXPAGES];
+ 	struct nfs3_setaclargs args = {
+ 		.inode = inode,
+@@ -335,8 +338,13 @@ static int nfs3_proc_setacls(struct inod
+ 	}
+ 
+ 	dprintk("NFS call setacl\n");
++	status = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out_freepages;
++
+ 	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+-	nfs_fattr_init(&fattr);
++	msg.rpc_resp = fattr;
+ 	status = rpc_call_sync(server->client_acl, &msg, 0);
+ 	nfs_access_zap_cache(inode);
+ 	nfs_zap_acl_cache(inode);
+@@ -344,7 +352,7 @@ static int nfs3_proc_setacls(struct inod
+ 
+ 	switch (status) {
+ 		case 0:
+-			status = nfs_refresh_inode(inode, &fattr);
++			status = nfs_refresh_inode(inode, fattr);
+ 			nfs3_cache_acls(inode, acl, dfacl);
+ 			break;
+ 		case -EPFNOSUPPORT:
+@@ -355,6 +363,7 @@ static int nfs3_proc_setacls(struct inod
+ 		case -ENOTSUPP:
+ 			status = -EOPNOTSUPP;
+ 	}
++	nfs_free_fattr(fattr);
+ out_freepages:
+ 	while (args.npages != 0) {
+ 		args.npages--;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 11:01:00.360574301 -0400
+@@ -144,14 +144,12 @@ static int
+ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
+ 		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ {
+-	struct nfs_fattr	dir_attr;
+ 	struct nfs3_diropargs	arg = {
+ 		.fh		= NFS_FH(dir),
+ 		.name		= name->name,
+ 		.len		= name->len
+ 	};
+ 	struct nfs3_diropres	res = {
+-		.dir_attr	= &dir_attr,
+ 		.fh		= fhandle,
+ 		.fattr		= fattr
+ 	};
+@@ -163,29 +161,30 @@ nfs3_proc_lookup(struct inode *dir, stru
+ 	int			status;
+ 
+ 	dprintk("NFS call  lookup %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		return -ENOMEM;
++
+ 	nfs_fattr_init(fattr);
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_refresh_inode(dir, &dir_attr);
++	nfs_refresh_inode(dir, res.dir_attr);
+ 	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+ 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ 		msg.rpc_argp = fhandle;
+ 		msg.rpc_resp = fattr;
+ 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	}
++	nfs_free_fattr(res.dir_attr);
+ 	dprintk("NFS reply lookup: %d\n", status);
+ 	return status;
+ }
+ 
+ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+ {
+-	struct nfs_fattr	fattr;
+ 	struct nfs3_accessargs	arg = {
+ 		.fh		= NFS_FH(inode),
+ 	};
+-	struct nfs3_accessres	res = {
+-		.fattr		= &fattr,
+-	};
++	struct nfs3_accessres	res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
+ 		.rpc_argp	= &arg,
+@@ -193,7 +192,7 @@ static int nfs3_proc_access(struct inode
+ 		.rpc_cred	= entry->cred,
+ 	};
+ 	int mode = entry->mask;
+-	int status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  access\n");
+ 
+@@ -210,9 +209,13 @@ static int nfs3_proc_access(struct inode
+ 		if (mode & MAY_EXEC)
+ 			arg.access |= NFS3_ACCESS_EXECUTE;
+ 	}
+-	nfs_fattr_init(&fattr);
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_refresh_inode(inode, &fattr);
++	nfs_refresh_inode(inode, res.fattr);
+ 	if (status == 0) {
+ 		entry->mask = 0;
+ 		if (res.access & NFS3_ACCESS_READ)
+@@ -222,6 +225,8 @@ static int nfs3_proc_access(struct inode
+ 		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
+ 			entry->mask |= MAY_EXEC;
+ 	}
++	nfs_free_fattr(res.fattr);
++out:
+ 	dprintk("NFS reply access: %d\n", status);
+ 	return status;
+ }
+@@ -229,7 +234,7 @@ static int nfs3_proc_access(struct inode
+ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
+ 		unsigned int pgbase, unsigned int pglen)
+ {
+-	struct nfs_fattr	fattr;
++	struct nfs_fattr	*fattr;
+ 	struct nfs3_readlinkargs args = {
+ 		.fh		= NFS_FH(inode),
+ 		.pgbase		= pgbase,
+@@ -239,14 +244,19 @@ static int nfs3_proc_readlink(struct ino
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READLINK],
+ 		.rpc_argp	= &args,
+-		.rpc_resp	= &fattr,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  readlink\n");
+-	nfs_fattr_init(&fattr);
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
++	msg.rpc_resp = fattr;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_refresh_inode(inode, &fattr);
++	nfs_refresh_inode(inode, fattr);
++	nfs_free_fattr(fattr);
++out:
+ 	dprintk("NFS reply readlink: %d\n", status);
+ 	return status;
+ }
+@@ -396,12 +406,17 @@ nfs3_proc_remove(struct inode *dir, stru
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  remove %s\n", name->name);
+-	nfs_fattr_init(&res.dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_post_op_update_inode(dir, &res.dir_attr);
++	nfs_post_op_update_inode(dir, res.dir_attr);
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	dprintk("NFS reply remove: %d\n", status);
+ 	return status;
+ }
+@@ -419,7 +434,7 @@ nfs3_proc_unlink_done(struct rpc_task *t
+ 	if (nfs3_async_handle_jukebox(task, dir))
+ 		return 0;
+ 	res = task->tk_msg.rpc_resp;
+-	nfs_post_op_update_inode(dir, &res->dir_attr);
++	nfs_post_op_update_inode(dir, res->dir_attr);
+ 	return 1;
+ }
+ 
+@@ -427,7 +442,6 @@ static int
+ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ 		 struct inode *new_dir, struct qstr *new_name)
+ {
+-	struct nfs_fattr	old_dir_attr, new_dir_attr;
+ 	struct nfs3_renameargs	arg = {
+ 		.fromfh		= NFS_FH(old_dir),
+ 		.fromname	= old_name->name,
+@@ -436,23 +450,27 @@ nfs3_proc_rename(struct inode *old_dir, 
+ 		.toname		= new_name->name,
+ 		.tolen		= new_name->len
+ 	};
+-	struct nfs3_renameres	res = {
+-		.fromattr	= &old_dir_attr,
+-		.toattr		= &new_dir_attr
+-	};
++	struct nfs3_renameres res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
+ 		.rpc_argp	= &arg,
+ 		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+-	nfs_fattr_init(&old_dir_attr);
+-	nfs_fattr_init(&new_dir_attr);
++
++	res.fromattr = nfs_alloc_fattr();
++	res.toattr = nfs_alloc_fattr();
++	if (res.fromattr == NULL || res.toattr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+-	nfs_post_op_update_inode(old_dir, &old_dir_attr);
+-	nfs_post_op_update_inode(new_dir, &new_dir_attr);
++	nfs_post_op_update_inode(old_dir, res.fromattr);
++	nfs_post_op_update_inode(new_dir, res.toattr);
++out:
++	nfs_free_fattr(res.toattr);
++	nfs_free_fattr(res.fromattr);
+ 	dprintk("NFS reply rename: %d\n", status);
+ 	return status;
+ }
+@@ -460,30 +478,32 @@ nfs3_proc_rename(struct inode *old_dir, 
+ static int
+ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+ {
+-	struct nfs_fattr	dir_attr, fattr;
+ 	struct nfs3_linkargs	arg = {
+ 		.fromfh		= NFS_FH(inode),
+ 		.tofh		= NFS_FH(dir),
+ 		.toname		= name->name,
+ 		.tolen		= name->len
+ 	};
+-	struct nfs3_linkres	res = {
+-		.dir_attr	= &dir_attr,
+-		.fattr		= &fattr
+-	};
++	struct nfs3_linkres	res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_LINK],
+ 		.rpc_argp	= &arg,
+ 		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  link %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
+-	nfs_fattr_init(&fattr);
++	res.fattr = nfs_alloc_fattr();
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.fattr == NULL || res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_post_op_update_inode(dir, &dir_attr);
+-	nfs_post_op_update_inode(inode, &fattr);
++	nfs_post_op_update_inode(dir, res.dir_attr);
++	nfs_post_op_update_inode(inode, res.fattr);
++out:
++	nfs_free_fattr(res.dir_attr);
++	nfs_free_fattr(res.fattr);
+ 	dprintk("NFS reply link: %d\n", status);
+ 	return status;
+ }
+@@ -554,7 +574,7 @@ out:
+ static int
+ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
+ {
+-	struct nfs_fattr	dir_attr;
++	struct nfs_fattr	*dir_attr;
+ 	struct nfs3_diropargs	arg = {
+ 		.fh		= NFS_FH(dir),
+ 		.name		= name->name,
+@@ -563,14 +583,19 @@ nfs3_proc_rmdir(struct inode *dir, struc
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_RMDIR],
+ 		.rpc_argp	= &arg,
+-		.rpc_resp	= &dir_attr,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  rmdir %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
++	dir_attr = nfs_alloc_fattr();
++	if (dir_attr == NULL)
++		goto out;
++
++	msg.rpc_resp = dir_attr;
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_post_op_update_inode(dir, &dir_attr);
++	nfs_post_op_update_inode(dir, dir_attr);
++	nfs_free_fattr(dir_attr);
++out:
+ 	dprintk("NFS reply rmdir: %d\n", status);
+ 	return status;
+ }
+@@ -589,7 +614,6 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		  u64 cookie, struct page *page, unsigned int count, int plus)
+ {
+ 	struct inode		*dir = dentry->d_inode;
+-	struct nfs_fattr	dir_attr;
+ 	__be32			*verf = NFS_COOKIEVERF(dir);
+ 	struct nfs3_readdirargs	arg = {
+ 		.fh		= NFS_FH(dir),
+@@ -600,7 +624,6 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		.pages		= &page
+ 	};
+ 	struct nfs3_readdirres	res = {
+-		.dir_attr	= &dir_attr,
+ 		.verf		= verf,
+ 		.plus		= plus
+ 	};
+@@ -610,7 +633,7 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		.rpc_resp	= &res,
+ 		.rpc_cred	= cred
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	if (plus)
+ 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+@@ -618,12 +641,17 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 	dprintk("NFS call  readdir%s %d\n",
+ 			plus? "plus" : "", (unsigned int) cookie);
+ 
+-	nfs_fattr_init(&dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 
+ 	nfs_invalidate_atime(dir);
++	nfs_refresh_inode(dir, res.dir_attr);
+ 
+-	nfs_refresh_inode(dir, &dir_attr);
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	dprintk("NFS reply readdir: %d\n", status);
+ 	return status;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs3xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3xdr.c	2010-08-23 11:01:00.361593802 -0400
+@@ -762,7 +762,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, _
+ static int
+ nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
+ {
+-	return nfs3_xdr_wccstat(req, p, &res->dir_attr);
++	return nfs3_xdr_wccstat(req, p, res->dir_attr);
+ }
+ 
+ /*
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 11:01:00.362574935 -0400
+@@ -206,14 +206,14 @@ extern ssize_t nfs4_listxattr(struct den
+ 
+ 
+ /* nfs4proc.c */
+-extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+-extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
++extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
++extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
+ extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
+ extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
+-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
++extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+ extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+@@ -286,7 +286,7 @@ extern void nfs4_put_lock_state(struct n
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+ extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
+ 
+-extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
++extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
+ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4namespace.c.orig linux-2.6.34.noarch/fs/nfs/nfs4namespace.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4namespace.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4namespace.c	2010-08-23 11:01:00.363574219 -0400
+@@ -115,6 +115,7 @@ static struct vfsmount *try_location(str
+ 				     char *page, char *page2,
+ 				     const struct nfs4_fs_location *location)
+ {
++	const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+ 	char *mnt_path;
+ 	unsigned int maxbuflen;
+@@ -126,9 +127,12 @@ static struct vfsmount *try_location(str
+ 	mountdata->mnt_path = mnt_path;
+ 	maxbuflen = mnt_path - 1 - page2;
+ 
++	mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
++	if (mountdata->addr == NULL)
++		return ERR_PTR(-ENOMEM);
++
+ 	for (s = 0; s < location->nservers; s++) {
+ 		const struct nfs4_string *buf = &location->servers[s];
+-		struct sockaddr_storage addr;
+ 
+ 		if (buf->len <= 0 || buf->len >= maxbuflen)
+ 			continue;
+@@ -137,11 +141,10 @@ static struct vfsmount *try_location(str
+ 			continue;
+ 
+ 		mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
+-				(struct sockaddr *)&addr, sizeof(addr));
++				mountdata->addr, addr_bufsize);
+ 		if (mountdata->addrlen == 0)
+ 			continue;
+ 
+-		mountdata->addr = (struct sockaddr *)&addr;
+ 		rpc_set_port(mountdata->addr, NFS_PORT);
+ 
+ 		memcpy(page2, buf->data, buf->len);
+@@ -156,6 +159,7 @@ static struct vfsmount *try_location(str
+ 		if (!IS_ERR(mnt))
+ 			break;
+ 	}
++	kfree(mountdata->addr);
+ 	return mnt;
+ }
+ 
+@@ -221,8 +225,8 @@ out:
+ 
+ /*
+  * nfs_do_refmount - handle crossing a referral on server
++ * @mnt_parent - mountpoint of referral
+  * @dentry - dentry of referral
+- * @nd - nameidata info
+  *
+  */
+ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 11:01:00.365544029 -0400
+@@ -70,6 +70,9 @@ static int nfs4_do_fsinfo(struct nfs_ser
+ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
++static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
++			    struct nfs_fattr *fattr, struct iattr *sattr,
++			    struct nfs4_state *state);
+ 
+ /* Prevent leaks of NFSv4 errors into userland */
+ static int nfs4_map_errors(int err)
+@@ -714,17 +717,18 @@ static void nfs4_init_opendata_res(struc
+ 
+ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
+ 		struct nfs4_state_owner *sp, fmode_t fmode, int flags,
+-		const struct iattr *attrs)
++		const struct iattr *attrs,
++		gfp_t gfp_mask)
+ {
+ 	struct dentry *parent = dget_parent(path->dentry);
+ 	struct inode *dir = parent->d_inode;
+ 	struct nfs_server *server = NFS_SERVER(dir);
+ 	struct nfs4_opendata *p;
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), gfp_mask);
+ 	if (p == NULL)
+ 		goto err;
+-	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
++	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
+ 	if (p->o_arg.seqid == NULL)
+ 		goto err_free;
+ 	path_get(path);
+@@ -1060,7 +1064,7 @@ static struct nfs4_opendata *nfs4_open_r
+ {
+ 	struct nfs4_opendata *opendata;
+ 
+-	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
++	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS);
+ 	if (opendata == NULL)
+ 		return ERR_PTR(-ENOMEM);
+ 	opendata->state = state;
+@@ -1648,7 +1652,7 @@ static int _nfs4_do_open(struct inode *d
+ 	if (path->dentry->d_inode != NULL)
+ 		nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
+ 	status = -ENOMEM;
+-	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
++	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL);
+ 	if (opendata == NULL)
+ 		goto err_put_state_owner;
+ 
+@@ -1659,15 +1663,24 @@ static int _nfs4_do_open(struct inode *d
+ 	if (status != 0)
+ 		goto err_opendata_put;
+ 
+-	if (opendata->o_arg.open_flags & O_EXCL)
+-		nfs4_exclusive_attrset(opendata, sattr);
+-
+ 	state = nfs4_opendata_to_nfs4_state(opendata);
+ 	status = PTR_ERR(state);
+ 	if (IS_ERR(state))
+ 		goto err_opendata_put;
+ 	if (server->caps & NFS_CAP_POSIX_LOCK)
+ 		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
++
++	if (opendata->o_arg.open_flags & O_EXCL) {
++		nfs4_exclusive_attrset(opendata, sattr);
++
++		nfs_fattr_init(opendata->o_res.f_attr);
++		status = nfs4_do_setattr(state->inode, cred,
++				opendata->o_res.f_attr, sattr,
++				state);
++		if (status == 0)
++			nfs_setattr_update_inode(state->inode, sattr);
++		nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
++	}
+ 	nfs4_opendata_put(opendata);
+ 	nfs4_put_state_owner(sp);
+ 	*res = state;
+@@ -1914,7 +1927,7 @@ static const struct rpc_call_ops nfs4_cl
+  *
+  * NOTE: Caller must be holding the sp->so_owner semaphore!
+  */
+-int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
++int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+ {
+ 	struct nfs_server *server = NFS_SERVER(state->inode);
+ 	struct nfs4_closedata *calldata;
+@@ -1933,7 +1946,7 @@ int nfs4_do_close(struct path *path, str
+ 	};
+ 	int status = -ENOMEM;
+ 
+-	calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
++	calldata = kzalloc(sizeof(*calldata), gfp_mask);
+ 	if (calldata == NULL)
+ 		goto out;
+ 	calldata->inode = state->inode;
+@@ -1941,7 +1954,7 @@ int nfs4_do_close(struct path *path, str
+ 	calldata->arg.fh = NFS_FH(state->inode);
+ 	calldata->arg.stateid = &state->open_stateid;
+ 	/* Serialization for the sequence id */
+-	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
++	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
+ 	if (calldata->arg.seqid == NULL)
+ 		goto out_free_calldata;
+ 	calldata->arg.fmode = 0;
+@@ -2404,14 +2417,12 @@ static int nfs4_proc_lookup(struct inode
+ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
+ 	struct nfs4_accessargs args = {
+ 		.fh = NFS_FH(inode),
+ 		.bitmask = server->attr_bitmask,
+ 	};
+ 	struct nfs4_accessres res = {
+ 		.server = server,
+-		.fattr = &fattr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
+@@ -2438,7 +2449,11 @@ static int _nfs4_proc_access(struct inod
+ 		if (mode & MAY_EXEC)
+ 			args.access |= NFS4_ACCESS_EXECUTE;
+ 	}
+-	nfs_fattr_init(&fattr);
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		return -ENOMEM;
++
+ 	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+ 	if (!status) {
+ 		entry->mask = 0;
+@@ -2448,8 +2463,9 @@ static int _nfs4_proc_access(struct inod
+ 			entry->mask |= MAY_WRITE;
+ 		if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
+ 			entry->mask |= MAY_EXEC;
+-		nfs_refresh_inode(inode, &fattr);
++		nfs_refresh_inode(inode, res.fattr);
+ 	}
++	nfs_free_fattr(res.fattr);
+ 	return status;
+ }
+ 
+@@ -2562,13 +2578,6 @@ nfs4_proc_create(struct inode *dir, stru
+ 	}
+ 	d_add(dentry, igrab(state->inode));
+ 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+-	if (flags & O_EXCL) {
+-		struct nfs_fattr fattr;
+-		status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state);
+-		if (status == 0)
+-			nfs_setattr_update_inode(state->inode, sattr);
+-		nfs_post_op_update_inode(state->inode, &fattr);
+-	}
+ 	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
+ 		status = nfs4_intent_set_file(nd, &path, state, fmode);
+ 	else
+@@ -2596,14 +2605,19 @@ static int _nfs4_proc_remove(struct inod
+ 		.rpc_argp = &args,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
++
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
+ 
+-	nfs_fattr_init(&res.dir_attr);
+ 	status = nfs4_call_sync(server, &msg, &args, &res, 1);
+ 	if (status == 0) {
+ 		update_changeattr(dir, &res.cinfo);
+-		nfs_post_op_update_inode(dir, &res.dir_attr);
++		nfs_post_op_update_inode(dir, res.dir_attr);
+ 	}
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	return status;
+ }
+ 
+@@ -2638,7 +2652,7 @@ static int nfs4_proc_unlink_done(struct 
+ 	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+-	nfs_post_op_update_inode(dir, &res->dir_attr);
++	nfs_post_op_update_inode(dir, res->dir_attr);
+ 	return 1;
+ }
+ 
+@@ -2653,29 +2667,31 @@ static int _nfs4_proc_rename(struct inod
+ 		.new_name = new_name,
+ 		.bitmask = server->attr_bitmask,
+ 	};
+-	struct nfs_fattr old_fattr, new_fattr;
+ 	struct nfs4_rename_res res = {
+ 		.server = server,
+-		.old_fattr = &old_fattr,
+-		.new_fattr = &new_fattr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 	
+-	nfs_fattr_init(res.old_fattr);
+-	nfs_fattr_init(res.new_fattr);
+-	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
++	res.old_fattr = nfs_alloc_fattr();
++	res.new_fattr = nfs_alloc_fattr();
++	if (res.old_fattr == NULL || res.new_fattr == NULL)
++		goto out;
+ 
++	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+ 	if (!status) {
+ 		update_changeattr(old_dir, &res.old_cinfo);
+ 		nfs_post_op_update_inode(old_dir, res.old_fattr);
+ 		update_changeattr(new_dir, &res.new_cinfo);
+ 		nfs_post_op_update_inode(new_dir, res.new_fattr);
+ 	}
++out:
++	nfs_free_fattr(res.new_fattr);
++	nfs_free_fattr(res.old_fattr);
+ 	return status;
+ }
+ 
+@@ -2702,28 +2718,30 @@ static int _nfs4_proc_link(struct inode 
+ 		.name   = name,
+ 		.bitmask = server->attr_bitmask,
+ 	};
+-	struct nfs_fattr fattr, dir_attr;
+ 	struct nfs4_link_res res = {
+ 		.server = server,
+-		.fattr = &fattr,
+-		.dir_attr = &dir_attr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
++
++	res.fattr = nfs_alloc_fattr();
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.fattr == NULL || res.dir_attr == NULL)
++		goto out;
+ 
+-	nfs_fattr_init(res.fattr);
+-	nfs_fattr_init(res.dir_attr);
+ 	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+ 	if (!status) {
+ 		update_changeattr(dir, &res.cinfo);
+ 		nfs_post_op_update_inode(dir, res.dir_attr);
+ 		nfs_post_op_update_inode(inode, res.fattr);
+ 	}
+-
++out:
++	nfs_free_fattr(res.dir_attr);
++	nfs_free_fattr(res.fattr);
+ 	return status;
+ }
+ 
+@@ -3146,23 +3164,31 @@ static void nfs4_proc_commit_setup(struc
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
++struct nfs4_renewdata {
++	struct nfs_client	*client;
++	unsigned long		timestamp;
++};
++
+ /*
+  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
+  * standalone procedure for queueing an asynchronous RENEW.
+  */
+-static void nfs4_renew_release(void *data)
++static void nfs4_renew_release(void *calldata)
+ {
+-	struct nfs_client *clp = data;
++	struct nfs4_renewdata *data = calldata;
++	struct nfs_client *clp = data->client;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(data);
+ }
+ 
+-static void nfs4_renew_done(struct rpc_task *task, void *data)
++static void nfs4_renew_done(struct rpc_task *task, void *calldata)
+ {
+-	struct nfs_client *clp = data;
+-	unsigned long timestamp = task->tk_start;
++	struct nfs4_renewdata *data = calldata;
++	struct nfs_client *clp = data->client;
++	unsigned long timestamp = data->timestamp;
+ 
+ 	if (task->tk_status < 0) {
+ 		/* Unless we're shutting down, schedule state recovery! */
+@@ -3188,11 +3214,17 @@ int nfs4_proc_async_renew(struct nfs_cli
+ 		.rpc_argp	= clp,
+ 		.rpc_cred	= cred,
+ 	};
++	struct nfs4_renewdata *data;
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+ 		return -EIO;
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++	if (data == NULL)
++		return -ENOMEM;
++	data->client = clp;
++	data->timestamp = jiffies;
+ 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			&nfs4_renew_ops, clp);
++			&nfs4_renew_ops, data);
+ }
+ 
+ int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
+@@ -3494,7 +3526,9 @@ nfs4_async_handle_error(struct rpc_task 
+ 	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+ }
+ 
+-int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
++int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
++		unsigned short port, struct rpc_cred *cred,
++		struct nfs4_setclientid_res *res)
+ {
+ 	nfs4_verifier sc_verifier;
+ 	struct nfs4_setclientid setclientid = {
+@@ -3504,7 +3538,7 @@ int nfs4_proc_setclientid(struct nfs_cli
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
+ 		.rpc_argp = &setclientid,
+-		.rpc_resp = clp,
++		.rpc_resp = res,
+ 		.rpc_cred = cred,
+ 	};
+ 	__be32 *p;
+@@ -3547,12 +3581,14 @@ int nfs4_proc_setclientid(struct nfs_cli
+ 	return status;
+ }
+ 
+-static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
++static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
++		struct nfs4_setclientid_res *arg,
++		struct rpc_cred *cred)
+ {
+ 	struct nfs_fsinfo fsinfo;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
+-		.rpc_argp = clp,
++		.rpc_argp = arg,
+ 		.rpc_resp = &fsinfo,
+ 		.rpc_cred = cred,
+ 	};
+@@ -3570,12 +3606,14 @@ static int _nfs4_proc_setclientid_confir
+ 	return status;
+ }
+ 
+-int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
++int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
++		struct nfs4_setclientid_res *arg,
++		struct rpc_cred *cred)
+ {
+ 	long timeout = 0;
+ 	int err;
+ 	do {
+-		err = _nfs4_proc_setclientid_confirm(clp, cred);
++		err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
+ 		switch (err) {
+ 			case 0:
+ 				return err;
+@@ -3667,7 +3705,7 @@ static int _nfs4_proc_delegreturn(struct
+ 	};
+ 	int status = 0;
+ 
+-	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	data = kzalloc(sizeof(*data), GFP_NOFS);
+ 	if (data == NULL)
+ 		return -ENOMEM;
+ 	data->args.fhandle = &data->fh;
+@@ -3823,7 +3861,7 @@ static struct nfs4_unlockdata *nfs4_allo
+ 	struct nfs4_unlockdata *p;
+ 	struct inode *inode = lsp->ls_state->inode;
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), GFP_NOFS);
+ 	if (p == NULL)
+ 		return NULL;
+ 	p->arg.fh = NFS_FH(inode);
+@@ -3961,7 +3999,7 @@ static int nfs4_proc_unlck(struct nfs4_s
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 		goto out;
+ 	lsp = request->fl_u.nfs4_fl.owner;
+-	seqid = nfs_alloc_seqid(&lsp->ls_seqid);
++	seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
+ 	status = -ENOMEM;
+ 	if (seqid == NULL)
+ 		goto out;
+@@ -3989,22 +4027,23 @@ struct nfs4_lockdata {
+ };
+ 
+ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
+-		struct nfs_open_context *ctx, struct nfs4_lock_state *lsp)
++		struct nfs_open_context *ctx, struct nfs4_lock_state *lsp,
++		gfp_t gfp_mask)
+ {
+ 	struct nfs4_lockdata *p;
+ 	struct inode *inode = lsp->ls_state->inode;
+ 	struct nfs_server *server = NFS_SERVER(inode);
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), gfp_mask);
+ 	if (p == NULL)
+ 		return NULL;
+ 
+ 	p->arg.fh = NFS_FH(inode);
+ 	p->arg.fl = &p->fl;
+-	p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
++	p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
+ 	if (p->arg.open_seqid == NULL)
+ 		goto out_free;
+-	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
++	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
+ 	if (p->arg.lock_seqid == NULL)
+ 		goto out_free_seqid;
+ 	p->arg.lock_stateid = &lsp->ls_stateid;
+@@ -4158,7 +4197,8 @@ static int _nfs4_do_setlk(struct nfs4_st
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 	data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
+-			fl->fl_u.nfs4_fl.owner);
++			fl->fl_u.nfs4_fl.owner,
++			recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
+ 	if (data == NULL)
+ 		return -ENOMEM;
+ 	if (IS_SETLKW(cmd))
+@@ -4647,7 +4687,7 @@ static int nfs4_reset_slot_table(struct 
+ 	if (max_reqs != tbl->max_slots) {
+ 		ret = -ENOMEM;
+ 		new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
+-			      GFP_KERNEL);
++			      GFP_NOFS);
+ 		if (!new)
+ 			goto out;
+ 		ret = 0;
+@@ -4712,7 +4752,7 @@ static int nfs4_init_slot_table(struct n
+ 
+ 	dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
+ 
+-	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
++	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
+ 	if (!slot)
+ 		goto out;
+ 	ret = 0;
+@@ -4761,7 +4801,7 @@ struct nfs4_session *nfs4_alloc_session(
+ 	struct nfs4_session *session;
+ 	struct nfs4_slot_table *tbl;
+ 
+-	session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
++	session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+ 	if (!session)
+ 		return NULL;
+ 
+@@ -5105,8 +5145,8 @@ static int nfs41_proc_async_sequence(str
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+ 		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_KERNEL);
+-	res = kzalloc(sizeof(*res), GFP_KERNEL);
++	args = kzalloc(sizeof(*args), GFP_NOFS);
++	res = kzalloc(sizeof(*res), GFP_NOFS);
+ 	if (!args || !res) {
+ 		kfree(args);
+ 		kfree(res);
+@@ -5207,7 +5247,7 @@ static int nfs41_proc_reclaim_complete(s
+ 	int status = -ENOMEM;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
++	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
+ 	if (calldata == NULL)
+ 		goto out;
+ 	calldata->clp = clp;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 11:01:00.367574218 -0400
+@@ -62,6 +62,7 @@ static LIST_HEAD(nfs4_clientid_list);
+ 
+ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
+ {
++	struct nfs4_setclientid_res clid;
+ 	unsigned short port;
+ 	int status;
+ 
+@@ -69,11 +70,15 @@ int nfs4_init_clientid(struct nfs_client
+ 	if (clp->cl_addr.ss_family == AF_INET6)
+ 		port = nfs_callback_tcpport6;
+ 
+-	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
+-	if (status == 0)
+-		status = nfs4_proc_setclientid_confirm(clp, cred);
+-	if (status == 0)
+-		nfs4_schedule_state_renewal(clp);
++	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
++	if (status != 0)
++		goto out;
++	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
++	if (status != 0)
++		goto out;
++	clp->cl_clientid = clid.clientid;
++	nfs4_schedule_state_renewal(clp);
++out:
+ 	return status;
+ }
+ 
+@@ -361,7 +366,7 @@ nfs4_alloc_state_owner(void)
+ {
+ 	struct nfs4_state_owner *sp;
+ 
+-	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
++	sp = kzalloc(sizeof(*sp),GFP_NOFS);
+ 	if (!sp)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+@@ -435,7 +440,7 @@ nfs4_alloc_open_state(void)
+ {
+ 	struct nfs4_state *state;
+ 
+-	state = kzalloc(sizeof(*state), GFP_KERNEL);
++	state = kzalloc(sizeof(*state), GFP_NOFS);
+ 	if (!state)
+ 		return NULL;
+ 	atomic_set(&state->count, 1);
+@@ -537,7 +542,8 @@ void nfs4_put_open_state(struct nfs4_sta
+ /*
+  * Close the current file.
+  */
+-static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
++static void __nfs4_close(struct path *path, struct nfs4_state *state,
++		fmode_t fmode, gfp_t gfp_mask, int wait)
+ {
+ 	struct nfs4_state_owner *owner = state->owner;
+ 	int call_close = 0;
+@@ -578,17 +584,17 @@ static void __nfs4_close(struct path *pa
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+ 	} else
+-		nfs4_do_close(path, state, wait);
++		nfs4_do_close(path, state, gfp_mask, wait);
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+ {
+-	__nfs4_close(path, state, fmode, 0);
++	__nfs4_close(path, state, fmode, GFP_NOFS, 0);
+ }
+ 
+ void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
+ {
+-	__nfs4_close(path, state, fmode, 1);
++	__nfs4_close(path, state, fmode, GFP_KERNEL, 1);
+ }
+ 
+ /*
+@@ -618,7 +624,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 	struct nfs4_lock_state *lsp;
+ 	struct nfs_client *clp = state->owner->so_client;
+ 
+-	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
++	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+ 		return NULL;
+ 	rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
+@@ -754,11 +760,11 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 	nfs4_put_lock_state(lsp);
+ }
+ 
+-struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
++struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
+ {
+ 	struct nfs_seqid *new;
+ 
+-	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	new = kmalloc(sizeof(*new), gfp_mask);
+ 	if (new != NULL) {
+ 		new->sequence = counter;
+ 		INIT_LIST_HEAD(&new->list);
+@@ -1347,7 +1353,7 @@ static int nfs4_recall_slot(struct nfs_c
+ 
+ 	nfs4_begin_drain_session(clp);
+ 	new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
+-		      GFP_KERNEL);
++		      GFP_NOFS);
+         if (!new)
+ 		return -ENOMEM;
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 11:00:23.792491380 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 11:01:00.369544055 -0400
+@@ -1504,14 +1504,14 @@ static void encode_setclientid(struct xd
+ 	hdr->replen += decode_setclientid_maxsz;
+ }
+ 
+-static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
++static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
+ {
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
+-	p = xdr_encode_hyper(p, client_state->cl_clientid);
+-	xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
++	p = xdr_encode_hyper(p, arg->clientid);
++	xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setclientid_confirm_maxsz;
+ }
+@@ -2324,7 +2324,7 @@ static int nfs4_xdr_enc_setclientid(stru
+ /*
+  * a SETCLIENTID_CONFIRM request
+  */
+-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
++static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+@@ -2334,7 +2334,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+-	encode_setclientid_confirm(&xdr, clp, &hdr);
++	encode_setclientid_confirm(&xdr, arg, &hdr);
+ 	encode_putrootfh(&xdr, &hdr);
+ 	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+ 	encode_nops(&hdr);
+@@ -4397,7 +4397,7 @@ out_overflow:
+ 	return -EIO;
+ }
+ 
+-static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
++static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
+ {
+ 	__be32 *p;
+ 	uint32_t opnum;
+@@ -4417,8 +4417,8 @@ static int decode_setclientid(struct xdr
+ 		p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
+ 		if (unlikely(!p))
+ 			goto out_overflow;
+-		p = xdr_decode_hyper(p, &clp->cl_clientid);
+-		memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
++		p = xdr_decode_hyper(p, &res->clientid);
++		memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
+ 	} else if (nfserr == NFSERR_CLID_INUSE) {
+ 		uint32_t len;
+ 
+@@ -4815,7 +4815,7 @@ static int nfs4_xdr_dec_remove(struct rp
+ 		goto out;
+ 	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+ 		goto out;
+-	decode_getfattr(&xdr, &res->dir_attr, res->server,
++	decode_getfattr(&xdr, res->dir_attr, res->server,
+ 			!RPC_IS_ASYNC(rqstp->rq_task));
+ out:
+ 	return status;
+@@ -5498,7 +5498,7 @@ static int nfs4_xdr_dec_renew(struct rpc
+  * Decode SETCLIENTID response
+  */
+ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
+-		struct nfs_client *clp)
++		struct nfs4_setclientid_res *res)
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr;
+@@ -5507,7 +5507,7 @@ static int nfs4_xdr_dec_setclientid(stru
+ 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ 	status = decode_compound_hdr(&xdr, &hdr);
+ 	if (!status)
+-		status = decode_setclientid(&xdr, clp);
++		status = decode_setclientid(&xdr, res);
+ 	return status;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/nfsroot.c.orig linux-2.6.34.noarch/fs/nfs/nfsroot.c
+--- linux-2.6.34.noarch/fs/nfs/nfsroot.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfsroot.c	2010-08-23 11:01:00.371574358 -0400
+@@ -488,7 +488,6 @@ static int __init root_nfs_ports(void)
+  */
+ static int __init root_nfs_get_handle(void)
+ {
+-	struct nfs_fh fh;
+ 	struct sockaddr_in sin;
+ 	unsigned int auth_flav_len = 0;
+ 	struct nfs_mount_request request = {
+@@ -499,21 +498,24 @@ static int __init root_nfs_get_handle(vo
+ 					NFS_MNT3_VERSION : NFS_MNT_VERSION,
+ 		.protocol	= (nfs_data.flags & NFS_MOUNT_TCP) ?
+ 					XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
+-		.fh		= &fh,
+ 		.auth_flav_len	= &auth_flav_len,
+ 	};
+-	int status;
++	int status = -ENOMEM;
+ 
++	request.fh = nfs_alloc_fhandle();
++	if (!request.fh)
++		goto out;
+ 	set_sockaddr(&sin, servaddr, htons(mount_port));
+ 	status = nfs_mount(&request);
+ 	if (status < 0)
+ 		printk(KERN_ERR "Root-NFS: Server returned error %d "
+ 				"while mounting %s\n", status, nfs_export_path);
+ 	else {
+-		nfs_data.root.size = fh.size;
+-		memcpy(nfs_data.root.data, fh.data, fh.size);
++		nfs_data.root.size = request.fh->size;
++		memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
+ 	}
+-
++	nfs_free_fhandle(request.fh);
++out:
+ 	return status;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 11:01:00.371574358 -0400
+@@ -60,16 +60,10 @@ nfs_create_request(struct nfs_open_conte
+ {
+ 	struct nfs_page		*req;
+ 
+-	for (;;) {
+-		/* try to allocate the request struct */
+-		req = nfs_page_alloc();
+-		if (req != NULL)
+-			break;
+-
+-		if (fatal_signal_pending(current))
+-			return ERR_PTR(-ERESTARTSYS);
+-		yield();
+-	}
++	/* try to allocate the request struct */
++	req = nfs_page_alloc();
++	if (req == NULL)
++		return ERR_PTR(-ENOMEM);
+ 
+ 	/* Initialize the request struct. Initially, we assume a
+ 	 * long write-back delay. This will be adjusted in
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 11:01:00.372574292 -0400
+@@ -224,35 +224,60 @@ static int nfs_proc_readlink(struct inod
+ 	return status;
+ }
+ 
++struct nfs_createdata {
++	struct nfs_createargs arg;
++	struct nfs_diropok res;
++	struct nfs_fh fhandle;
++	struct nfs_fattr fattr;
++};
++
++static struct nfs_createdata *nfs_alloc_createdata(struct inode *dir,
++		struct dentry *dentry, struct iattr *sattr)
++{
++	struct nfs_createdata *data;
++
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++
++	if (data != NULL) {
++		data->arg.fh = NFS_FH(dir);
++		data->arg.name = dentry->d_name.name;
++		data->arg.len = dentry->d_name.len;
++		data->arg.sattr = sattr;
++		nfs_fattr_init(&data->fattr);
++		data->fhandle.size = 0;
++		data->res.fh = &data->fhandle;
++		data->res.fattr = &data->fattr;
++	}
++	return data;
++};
++
++static void nfs_free_createdata(const struct nfs_createdata *data)
++{
++	kfree(data);
++}
++
+ static int
+ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ 		int flags, struct nameidata *nd)
+ {
+-	struct nfs_fh		fhandle;
+-	struct nfs_fattr	fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+-	nfs_fattr_init(&fattr);
+ 	dprintk("NFS call  create %s\n", dentry->d_name.name);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply create: %d\n", status);
+ 	return status;
+ }
+@@ -264,24 +289,12 @@ static int
+ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ 	       dev_t rdev)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int status, mode;
++	umode_t mode;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
+ 
+@@ -294,17 +307,24 @@ nfs_proc_mknod(struct inode *dir, struct
+ 		sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
+ 	}
+ 
+-	nfs_fattr_init(&fattr);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 
+ 	if (status == -EINVAL && S_ISFIFO(mode)) {
+ 		sattr->ia_mode = mode;
+-		nfs_fattr_init(&fattr);
++		nfs_fattr_init(data->res.fattr);
+ 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	}
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply mknod: %d\n", status);
+ 	return status;
+ }
+@@ -398,8 +418,8 @@ static int
+ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+ 		 unsigned int len, struct iattr *sattr)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
++	struct nfs_fh *fh;
++	struct nfs_fattr *fattr;
+ 	struct nfs_symlinkargs	arg = {
+ 		.fromfh		= NFS_FH(dir),
+ 		.fromname	= dentry->d_name.name,
+@@ -412,12 +432,18 @@ nfs_proc_symlink(struct inode *dir, stru
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_SYMLINK],
+ 		.rpc_argp	= &arg,
+ 	};
+-	int			status;
++	int status = -ENAMETOOLONG;
++
++	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+ 
+ 	if (len > NFS2_MAXPATHLEN)
+-		return -ENAMETOOLONG;
++		goto out;
+ 
+-	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
++	fh = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	status = -ENOMEM;
++	if (fh == NULL || fattr == NULL)
++		goto out;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -427,12 +453,12 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 * filehandle size to zero indicates to nfs_instantiate that it
+ 	 * should fill in the data with a LOOKUP call on the wire.
+ 	 */
+-	if (status == 0) {
+-		nfs_fattr_init(&fattr);
+-		fhandle.size = 0;
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+-	}
++	if (status == 0)
++		status = nfs_instantiate(dentry, fh, fattr);
+ 
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fh);
++out:
+ 	dprintk("NFS reply symlink: %d\n", status);
+ 	return status;
+ }
+@@ -440,31 +466,25 @@ nfs_proc_symlink(struct inode *dir, stru
+ static int
+ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_MKDIR],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+-	nfs_fattr_init(&fattr);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply mkdir: %d\n", status);
+ 	return status;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 11:01:00.373574317 -0400
+@@ -40,7 +40,7 @@ static mempool_t *nfs_rdata_mempool;
+ 
+ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+ {
+-	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
++	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
+ 
+ 	if (p) {
+ 		memset(p, 0, sizeof(*p));
+@@ -50,7 +50,7 @@ struct nfs_read_data *nfs_readdata_alloc
+ 		if (pagecount <= ARRAY_SIZE(p->page_array))
+ 			p->pagevec = p->page_array;
+ 		else {
+-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
++			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ 			if (!p->pagevec) {
+ 				mempool_free(p, nfs_rdata_mempool);
+ 				p = NULL;
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 11:00:23.794511661 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 11:01:00.374564179 -0400
+@@ -141,7 +141,6 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_resvport, "resvport" },
+ 	{ Opt_noresvport, "noresvport" },
+ 	{ Opt_fscache, "fsc" },
+-	{ Opt_fscache_uniq, "fsc=%s" },
+ 	{ Opt_nofscache, "nofsc" },
+ 
+ 	{ Opt_port, "port=%s" },
+@@ -171,6 +170,7 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_mountaddr, "mountaddr=%s" },
+ 
+ 	{ Opt_lookupcache, "lookupcache=%s" },
++	{ Opt_fscache_uniq, "fsc=%s" },
+ 
+ 	{ Opt_err, NULL }
+ };
+@@ -423,15 +423,19 @@ static int nfs_statfs(struct dentry *den
+ 	unsigned char blockbits;
+ 	unsigned long blockres;
+ 	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
+-	struct nfs_fattr fattr;
+-	struct nfs_fsstat res = {
+-			.fattr = &fattr,
+-	};
+-	int error;
++	struct nfs_fsstat res;
++	int error = -ENOMEM;
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		goto out_err;
+ 
+ 	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
++
++	nfs_free_fattr(res.fattr);
+ 	if (error < 0)
+ 		goto out_err;
++
+ 	buf->f_type = NFS_SUPER_MAGIC;
+ 
+ 	/*
+@@ -1060,14 +1064,6 @@ static int nfs_parse_mount_options(char 
+ 			kfree(mnt->fscache_uniq);
+ 			mnt->fscache_uniq = NULL;
+ 			break;
+-		case Opt_fscache_uniq:
+-			string = match_strdup(args);
+-			if (!string)
+-				goto out_nomem;
+-			kfree(mnt->fscache_uniq);
+-			mnt->fscache_uniq = string;
+-			mnt->options |= NFS_OPTION_FSCACHE;
+-			break;
+ 
+ 		/*
+ 		 * options that take numeric values
+@@ -1398,6 +1394,14 @@ static int nfs_parse_mount_options(char 
+ 					return 0;
+ 			};
+ 			break;
++		case Opt_fscache_uniq:
++			string = match_strdup(args);
++			if (string == NULL)
++				goto out_nomem;
++			kfree(mnt->fscache_uniq);
++			mnt->fscache_uniq = string;
++			mnt->options |= NFS_OPTION_FSCACHE;
++			break;
+ 
+ 		/*
+ 		 * Special options
+@@ -2186,7 +2190,7 @@ static int nfs_get_sb(struct file_system
+ 	int error = -ENOMEM;
+ 
+ 	data = nfs_alloc_parsed_mount_data(3);
+-	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
++	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+ 		goto out_free_fh;
+ 
+@@ -2261,7 +2265,7 @@ out:
+ 	kfree(data->fscache_uniq);
+ 	security_free_mnt_opts(&data->lsm_opts);
+ out_free_fh:
+-	kfree(mntfh);
++	nfs_free_fhandle(mntfh);
+ 	kfree(data);
+ 	return error;
+ 
+@@ -2570,7 +2574,7 @@ static int nfs4_remote_get_sb(struct fil
+ 	};
+ 	int error = -ENOMEM;
+ 
+-	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
++	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+ 		goto out_free_fh;
+ 
+@@ -2628,7 +2632,7 @@ static int nfs4_remote_get_sb(struct fil
+ out:
+ 	security_free_mnt_opts(&data->lsm_opts);
+ out_free_fh:
+-	kfree(mntfh);
++	nfs_free_fhandle(mntfh);
+ 	return error;
+ 
+ out_free:
+@@ -2683,41 +2687,120 @@ out_freepage:
+ 	free_page((unsigned long)page);
+ }
+ 
++struct nfs_referral_count {
++	struct list_head list;
++	const struct task_struct *task;
++	unsigned int referral_count;
++};
++
++static LIST_HEAD(nfs_referral_count_list);
++static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
++
++static struct nfs_referral_count *nfs_find_referral_count(void)
++{
++	struct nfs_referral_count *p;
++
++	list_for_each_entry(p, &nfs_referral_count_list, list) {
++		if (p->task == current)
++			return p;
++	}
++	return NULL;
++}
++
++#define NFS_MAX_NESTED_REFERRALS 2
++
++static int nfs_referral_loop_protect(void)
++{
++	struct nfs_referral_count *p, *new;
++	int ret = -ENOMEM;
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		goto out;
++	new->task = current;
++	new->referral_count = 1;
++
++	ret = 0;
++	spin_lock(&nfs_referral_count_list_lock);
++	p = nfs_find_referral_count();
++	if (p != NULL) {
++		if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
++			ret = -ELOOP;
++		else
++			p->referral_count++;
++	} else {
++		list_add(&new->list, &nfs_referral_count_list);
++		new = NULL;
++	}
++	spin_unlock(&nfs_referral_count_list_lock);
++	kfree(new);
++out:
++	return ret;
++}
++
++static void nfs_referral_loop_unprotect(void)
++{
++	struct nfs_referral_count *p;
++
++	spin_lock(&nfs_referral_count_list_lock);
++	p = nfs_find_referral_count();
++	p->referral_count--;
++	if (p->referral_count == 0)
++		list_del(&p->list);
++	else
++		p = NULL;
++	spin_unlock(&nfs_referral_count_list_lock);
++	kfree(p);
++}
++
+ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
+ 		const char *export_path, struct vfsmount *mnt_target)
+ {
++	struct nameidata *nd = NULL;
+ 	struct mnt_namespace *ns_private;
+-	struct nameidata nd;
+ 	struct super_block *s;
+ 	int ret;
+ 
++	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
++	if (nd == NULL)
++		return -ENOMEM;
++
+ 	ns_private = create_mnt_ns(root_mnt);
+ 	ret = PTR_ERR(ns_private);
+ 	if (IS_ERR(ns_private))
+ 		goto out_mntput;
+ 
++	ret = nfs_referral_loop_protect();
++	if (ret != 0)
++		goto out_put_mnt_ns;
++
+ 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
+-			export_path, LOOKUP_FOLLOW, &nd);
++			export_path, LOOKUP_FOLLOW, nd);
+ 
++	nfs_referral_loop_unprotect();
+ 	put_mnt_ns(ns_private);
+ 
+ 	if (ret != 0)
+ 		goto out_err;
+ 
+-	s = nd.path.mnt->mnt_sb;
++	s = nd->path.mnt->mnt_sb;
+ 	atomic_inc(&s->s_active);
+ 	mnt_target->mnt_sb = s;
+-	mnt_target->mnt_root = dget(nd.path.dentry);
++	mnt_target->mnt_root = dget(nd->path.dentry);
+ 
+ 	/* Correct the device pathname */
+-	nfs_fix_devname(&nd.path, mnt_target);
++	nfs_fix_devname(&nd->path, mnt_target);
+ 
+-	path_put(&nd.path);
++	path_put(&nd->path);
++	kfree(nd);
+ 	down_write(&s->s_umount);
+ 	return 0;
++out_put_mnt_ns:
++	put_mnt_ns(ns_private);
+ out_mntput:
+ 	mntput(root_mnt);
+ out_err:
++	kfree(nd);
+ 	return ret;
+ }
+ 
+@@ -2888,17 +2971,21 @@ static int nfs4_remote_referral_get_sb(s
+ 	struct super_block *s;
+ 	struct nfs_server *server;
+ 	struct dentry *mntroot;
+-	struct nfs_fh mntfh;
++	struct nfs_fh *mntfh;
+ 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ 	struct nfs_sb_mountdata sb_mntdata = {
+ 		.mntflags = flags,
+ 	};
+-	int error;
++	int error = -ENOMEM;
+ 
+ 	dprintk("--> nfs4_referral_get_sb()\n");
+ 
++	mntfh = nfs_alloc_fhandle();
++	if (mntfh == NULL)
++		goto out_err_nofh;
++
+ 	/* create a new volume representation */
+-	server = nfs4_create_referral_server(data, &mntfh);
++	server = nfs4_create_referral_server(data, mntfh);
+ 	if (IS_ERR(server)) {
+ 		error = PTR_ERR(server);
+ 		goto out_err_noserver;
+@@ -2930,7 +3017,7 @@ static int nfs4_remote_referral_get_sb(s
+ 		nfs_fscache_get_super_cookie(s, NULL, data);
+ 	}
+ 
+-	mntroot = nfs4_get_root(s, &mntfh);
++	mntroot = nfs4_get_root(s, mntfh);
+ 	if (IS_ERR(mntroot)) {
+ 		error = PTR_ERR(mntroot);
+ 		goto error_splat_super;
+@@ -2947,12 +3034,15 @@ static int nfs4_remote_referral_get_sb(s
+ 
+ 	security_sb_clone_mnt_opts(data->sb, s);
+ 
++	nfs_free_fhandle(mntfh);
+ 	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+ 	return 0;
+ 
+ out_err_nosb:
+ 	nfs_free_server(server);
+ out_err_noserver:
++	nfs_free_fhandle(mntfh);
++out_err_nofh:
+ 	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+ 	return error;
+ 
+@@ -2961,6 +3051,7 @@ error_splat_super:
+ 		bdi_unregister(&server->backing_dev_info);
+ error_splat_bdi:
+ 	deactivate_locked_super(s);
++	nfs_free_fhandle(mntfh);
+ 	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+ 	return error;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 11:01:00.375554592 -0400
+@@ -23,6 +23,7 @@ struct nfs_unlinkdata {
+ 	struct nfs_removeres res;
+ 	struct inode *dir;
+ 	struct rpc_cred	*cred;
++	struct nfs_fattr dir_attr;
+ };
+ 
+ /**
+@@ -169,7 +170,7 @@ static int nfs_do_call_unlink(struct den
+ 	}
+ 	nfs_sb_active(dir->i_sb);
+ 	data->args.fh = NFS_FH(dir);
+-	nfs_fattr_init(&data->res.dir_attr);
++	nfs_fattr_init(data->res.dir_attr);
+ 
+ 	NFS_PROTO(dir)->unlink_setup(&msg, dir);
+ 
+@@ -259,6 +260,7 @@ nfs_async_unlink(struct inode *dir, stru
+ 		goto out_free;
+ 	}
+ 	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	data->res.dir_attr = &data->dir_attr;
+ 
+ 	status = -EBUSY;
+ 	spin_lock(&dentry->d_lock);
+diff -up linux-2.6.34.noarch/include/linux/ktime.h.orig linux-2.6.34.noarch/include/linux/ktime.h
+--- linux-2.6.34.noarch/include/linux/ktime.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/ktime.h	2010-08-23 11:01:00.377554285 -0400
+@@ -130,7 +130,7 @@ static inline ktime_t timeval_to_ktime(s
+ /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
+ #define ktime_to_ns(kt)			((kt).tv64)
+ 
+-#else
++#else	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+ 
+ /*
+  * Helper macros/inlines to get the ktime_t math right in the timespec
+@@ -275,7 +275,7 @@ static inline s64 ktime_to_ns(const ktim
+ 	return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec;
+ }
+ 
+-#endif
++#endif	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+ 
+ /**
+  * ktime_equal - Compares two ktime_t variables to see if they are equal
+@@ -295,6 +295,12 @@ static inline s64 ktime_to_us(const ktim
+ 	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
+ }
+ 
++static inline s64 ktime_to_ms(const ktime_t kt)
++{
++	struct timeval tv = ktime_to_timeval(kt);
++	return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
++}
++
+ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
+ {
+        return ktime_to_us(ktime_sub(later, earlier));
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 11:00:23.822502111 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 11:01:00.378563926 -0400
+@@ -356,6 +356,20 @@ extern struct nfs_open_context *nfs_find
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
++extern struct nfs_fattr *nfs_alloc_fattr(void);
++
++static inline void nfs_free_fattr(const struct nfs_fattr *fattr)
++{
++	kfree(fattr);
++}
++
++extern struct nfs_fh *nfs_alloc_fhandle(void);
++
++static inline void nfs_free_fhandle(const struct nfs_fh *fh)
++{
++	kfree(fh);
++}
++
+ /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
+ extern __be32 root_nfs_parse_addr(char *name); /*__init*/
+ extern unsigned long nfs_inc_attr_generation_counter(void);
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 11:01:00.380553887 -0400
+@@ -44,7 +44,6 @@ struct nfs_client {
+ 
+ #ifdef CONFIG_NFS_V4
+ 	u64			cl_clientid;	/* constant */
+-	nfs4_verifier		cl_confirm;
+ 	unsigned long		cl_state;
+ 
+ 	struct rb_root		cl_openowner_id;
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 11:01:00.381564072 -0400
+@@ -386,8 +386,8 @@ struct nfs_removeargs {
+ 
+ struct nfs_removeres {
+ 	const struct nfs_server *server;
++	struct nfs_fattr	*dir_attr;
+ 	struct nfs4_change_info	cinfo;
+-	struct nfs_fattr	dir_attr;
+ 	struct nfs4_sequence_res 	seq_res;
+ };
+ 
+@@ -824,6 +824,11 @@ struct nfs4_setclientid {
+ 	u32				sc_cb_ident;
+ };
+ 
++struct nfs4_setclientid_res {
++	u64				clientid;
++	nfs4_verifier			confirm;
++};
++
+ struct nfs4_statfs_arg {
+ 	const struct nfs_fh *		fh;
+ 	const u32 *			bitmask;
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h.orig linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h	2010-08-23 11:01:00.382564026 -0400
+@@ -82,6 +82,7 @@ struct gss_cred {
+ 	enum rpc_gss_svc	gc_service;
+ 	struct gss_cl_ctx	*gc_ctx;
+ 	struct gss_upcall_msg	*gc_upcall;
++	unsigned long		gc_upcall_timestamp;
+ 	unsigned char		gc_machine_cred : 1;
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/auth.h.orig linux-2.6.34.noarch/include/linux/sunrpc/auth.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/auth.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/auth.h	2010-08-23 11:01:00.382564026 -0400
+@@ -54,6 +54,7 @@ struct rpc_cred {
+ #define RPCAUTH_CRED_NEW	0
+ #define RPCAUTH_CRED_UPTODATE	1
+ #define RPCAUTH_CRED_HASHED	2
++#define RPCAUTH_CRED_NEGATIVE	3
+ 
+ #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h.orig linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h	2010-08-23 11:01:00.383574314 -0400
+@@ -35,7 +35,8 @@ int gss_import_sec_context(
+ 		const void*		input_token,
+ 		size_t			bufsize,
+ 		struct gss_api_mech	*mech,
+-		struct gss_ctx		**ctx_id);
++		struct gss_ctx		**ctx_id,
++		gfp_t			gfp_mask);
+ u32 gss_get_mic(
+ 		struct gss_ctx		*ctx_id,
+ 		struct xdr_buf		*message,
+@@ -80,6 +81,8 @@ struct gss_api_mech {
+ 	/* pseudoflavors supported by this mechanism: */
+ 	int			gm_pf_num;
+ 	struct pf_desc *	gm_pfs;
++	/* Should the following be a callback operation instead? */
++	const char		*gm_upcall_enctypes;
+ };
+ 
+ /* and must provide the following operations: */
+@@ -87,7 +90,8 @@ struct gss_api_ops {
+ 	int (*gss_import_sec_context)(
+ 			const void		*input_token,
+ 			size_t			bufsize,
+-			struct gss_ctx		*ctx_id);
++			struct gss_ctx		*ctx_id,
++			gfp_t			gfp_mask);
+ 	u32 (*gss_get_mic)(
+ 			struct gss_ctx		*ctx_id,
+ 			struct xdr_buf		*message,
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h.orig linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h	2010-08-23 11:01:00.383574314 -0400
+@@ -4,7 +4,7 @@
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
+  *  lib/gssapi/krb5/gssapiP_krb5.h, and others
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -36,17 +36,86 @@
+  *
+  */
+ 
++#include <linux/crypto.h>
+ #include <linux/sunrpc/auth_gss.h>
+ #include <linux/sunrpc/gss_err.h>
+ #include <linux/sunrpc/gss_asn1.h>
+ 
++/* Length of constant used in key derivation */
++#define GSS_KRB5_K5CLENGTH (5)
++
++/* Maximum key length (in bytes) for the supported crypto algorithms*/
++#define GSS_KRB5_MAX_KEYLEN (32)
++
++/* Maximum checksum function output for the supported crypto algorithms */
++#define GSS_KRB5_MAX_CKSUM_LEN  (20)
++
++/* Maximum blocksize for the supported crypto algorithms */
++#define GSS_KRB5_MAX_BLOCKSIZE  (16)
++
++struct krb5_ctx;
++
++struct gss_krb5_enctype {
++	const u32		etype;		/* encryption (key) type */
++	const u32		ctype;		/* checksum type */
++	const char		*name;		/* "friendly" name */
++	const char		*encrypt_name;	/* crypto encrypt name */
++	const char		*cksum_name;	/* crypto checksum name */
++	const u16		signalg;	/* signing algorithm */
++	const u16		sealalg;	/* sealing algorithm */
++	const u32		blocksize;	/* encryption blocksize */
++	const u32		conflen;	/* confounder length
++						   (normally the same as
++						   the blocksize) */
++	const u32		cksumlength;	/* checksum length */
++	const u32		keyed_cksum;	/* is it a keyed cksum? */
++	const u32		keybytes;	/* raw key len, in bytes */
++	const u32		keylength;	/* final key len, in bytes */
++	u32 (*encrypt) (struct crypto_blkcipher *tfm,
++			void *iv, void *in, void *out,
++			int length);		/* encryption function */
++	u32 (*decrypt) (struct crypto_blkcipher *tfm,
++			void *iv, void *in, void *out,
++			int length);		/* decryption function */
++	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
++		       struct xdr_netobj *in,
++		       struct xdr_netobj *out);	/* complete key generation */
++	u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
++			   struct xdr_buf *buf, int ec,
++			   struct page **pages); /* v2 encryption function */
++	u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
++			   struct xdr_buf *buf, u32 *headskip,
++			   u32 *tailskip);	/* v2 decryption function */
++};
++
++/* krb5_ctx flags definitions */
++#define KRB5_CTX_FLAG_INITIATOR         0x00000001
++#define KRB5_CTX_FLAG_CFX               0x00000002
++#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY   0x00000004
++
+ struct krb5_ctx {
+ 	int			initiate; /* 1 = initiating, 0 = accepting */
++	u32			enctype;
++	u32			flags;
++	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
+ 	struct crypto_blkcipher	*enc;
+ 	struct crypto_blkcipher	*seq;
++	struct crypto_blkcipher *acceptor_enc;
++	struct crypto_blkcipher *initiator_enc;
++	struct crypto_blkcipher *acceptor_enc_aux;
++	struct crypto_blkcipher *initiator_enc_aux;
++	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
++	u8			cksum[GSS_KRB5_MAX_KEYLEN];
+ 	s32			endtime;
+ 	u32			seq_send;
++	u64			seq_send64;
+ 	struct xdr_netobj	mech_used;
++	u8			initiator_sign[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_sign[GSS_KRB5_MAX_KEYLEN];
++	u8			initiator_seal[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_seal[GSS_KRB5_MAX_KEYLEN];
++	u8			initiator_integ[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_integ[GSS_KRB5_MAX_KEYLEN];
+ };
+ 
+ extern spinlock_t krb5_seq_lock;
+@@ -57,6 +126,18 @@ extern spinlock_t krb5_seq_lock;
+ #define KG_TOK_MIC_MSG    0x0101
+ #define KG_TOK_WRAP_MSG   0x0201
+ 
++#define KG2_TOK_INITIAL     0x0101
++#define KG2_TOK_RESPONSE    0x0202
++#define KG2_TOK_MIC         0x0404
++#define KG2_TOK_WRAP        0x0504
++
++#define KG2_TOKEN_FLAG_SENTBYACCEPTOR   0x01
++#define KG2_TOKEN_FLAG_SEALED           0x02
++#define KG2_TOKEN_FLAG_ACCEPTORSUBKEY   0x04
++
++#define KG2_RESP_FLAG_ERROR             0x0001
++#define KG2_RESP_FLAG_DELEG_OK          0x0002
++
+ enum sgn_alg {
+ 	SGN_ALG_DES_MAC_MD5 = 0x0000,
+ 	SGN_ALG_MD2_5 = 0x0001,
+@@ -81,6 +162,9 @@ enum seal_alg {
+ #define CKSUMTYPE_RSA_MD5_DES		0x0008
+ #define CKSUMTYPE_NIST_SHA		0x0009
+ #define CKSUMTYPE_HMAC_SHA1_DES3	0x000c
++#define CKSUMTYPE_HMAC_SHA1_96_AES128   0x000f
++#define CKSUMTYPE_HMAC_SHA1_96_AES256   0x0010
++#define CKSUMTYPE_HMAC_MD5_ARCFOUR      -138 /* Microsoft md5 hmac cksumtype */
+ 
+ /* from gssapi_err_krb5.h */
+ #define KG_CCACHE_NOMATCH                        (39756032L)
+@@ -111,11 +195,56 @@ enum seal_alg {
+ #define ENCTYPE_DES3_CBC_RAW    0x0006	/* DES-3 cbc mode raw */
+ #define ENCTYPE_DES_HMAC_SHA1   0x0008
+ #define ENCTYPE_DES3_CBC_SHA1   0x0010
++#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011
++#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012
++#define ENCTYPE_ARCFOUR_HMAC            0x0017
++#define ENCTYPE_ARCFOUR_HMAC_EXP        0x0018
+ #define ENCTYPE_UNKNOWN         0x01ff
+ 
+-s32
+-make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body,
+-		   int body_offset, struct xdr_netobj *cksum);
++/*
++ * Constants used for key derivation
++ */
++/* for 3DES */
++#define KG_USAGE_SEAL (22)
++#define KG_USAGE_SIGN (23)
++#define KG_USAGE_SEQ  (24)
++
++/* from rfc3961 */
++#define KEY_USAGE_SEED_CHECKSUM         (0x99)
++#define KEY_USAGE_SEED_ENCRYPTION       (0xAA)
++#define KEY_USAGE_SEED_INTEGRITY        (0x55)
++
++/* from rfc4121 */
++#define KG_USAGE_ACCEPTOR_SEAL  (22)
++#define KG_USAGE_ACCEPTOR_SIGN  (23)
++#define KG_USAGE_INITIATOR_SEAL (24)
++#define KG_USAGE_INITIATOR_SIGN (25)
++
++/*
++ * This compile-time check verifies that we will not exceed the
++ * slack space allotted by the client and server auth_gss code
++ * before they call gss_wrap().
++ */
++#define GSS_KRB5_MAX_SLACK_NEEDED \
++	(GSS_KRB5_TOK_HDR_LEN     /* gss token header */         \
++	+ GSS_KRB5_MAX_CKSUM_LEN  /* gss token checksum */       \
++	+ GSS_KRB5_MAX_BLOCKSIZE  /* confounder */               \
++	+ GSS_KRB5_MAX_BLOCKSIZE  /* possible padding */         \
++	+ GSS_KRB5_TOK_HDR_LEN    /* encrypted hdr in v2 token */\
++	+ GSS_KRB5_MAX_CKSUM_LEN  /* encryption hmac */          \
++	+ 4 + 4                   /* RPC verifier */             \
++	+ GSS_KRB5_TOK_HDR_LEN                                   \
++	+ GSS_KRB5_MAX_CKSUM_LEN)
++
++u32
++make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
++		struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		unsigned int usage, struct xdr_netobj *cksumout);
++
++u32
++make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen,
++		 struct xdr_buf *body, int body_offset, u8 *key,
++		 unsigned int usage, struct xdr_netobj *cksum);
+ 
+ u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
+ 		struct xdr_netobj *);
+@@ -149,11 +278,54 @@ gss_decrypt_xdr_buf(struct crypto_blkcip
+ 		    int offset);
+ 
+ s32
+-krb5_make_seq_num(struct crypto_blkcipher *key,
++krb5_make_seq_num(struct krb5_ctx *kctx,
++		struct crypto_blkcipher *key,
+ 		int direction,
+ 		u32 seqnum, unsigned char *cksum, unsigned char *buf);
+ 
+ s32
+-krb5_get_seq_num(struct crypto_blkcipher *key,
++krb5_get_seq_num(struct krb5_ctx *kctx,
+ 	       unsigned char *cksum,
+ 	       unsigned char *buf, int *direction, u32 *seqnum);
++
++int
++xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen);
++
++u32
++krb5_derive_key(const struct gss_krb5_enctype *gk5e,
++		const struct xdr_netobj *inkey,
++		struct xdr_netobj *outkey,
++		const struct xdr_netobj *in_constant,
++		gfp_t gfp_mask);
++
++u32
++gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
++		       struct xdr_netobj *randombits,
++		       struct xdr_netobj *key);
++
++u32
++gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
++		      struct xdr_netobj *randombits,
++		      struct xdr_netobj *key);
++
++u32
++gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, int ec,
++		     struct page **pages);
++
++u32
++gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, u32 *plainoffset,
++		     u32 *plainlen);
++
++int
++krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
++		       struct crypto_blkcipher *cipher,
++		       unsigned char *cksum);
++
++int
++krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
++		       struct crypto_blkcipher *cipher,
++		       s32 seqnum);
++void
++gss_krb5_make_confounder(char *p, u32 conflen);
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/metrics.h.orig linux-2.6.34.noarch/include/linux/sunrpc/metrics.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/metrics.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/metrics.h	2010-08-23 11:01:00.384611889 -0400
+@@ -26,6 +26,7 @@
+ #define _LINUX_SUNRPC_METRICS_H
+ 
+ #include <linux/seq_file.h>
++#include <linux/ktime.h>
+ 
+ #define RPC_IOSTATS_VERS	"1.0"
+ 
+@@ -58,9 +59,9 @@ struct rpc_iostats {
+ 	 * and the total time the request spent from init to release
+ 	 * are measured.
+ 	 */
+-	unsigned long long	om_queue,	/* jiffies queued for xmit */
+-				om_rtt,		/* jiffies for RPC RTT */
+-				om_execute;	/* jiffies for RPC execution */
++	ktime_t			om_queue,	/* queued for xmit */
++				om_rtt,		/* RPC RTT */
++				om_execute;	/* RPC execution */
+ } ____cacheline_aligned;
+ 
+ struct rpc_task;
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/sched.h.orig linux-2.6.34.noarch/include/linux/sunrpc/sched.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/sched.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/sched.h	2010-08-23 11:01:00.385361873 -0400
+@@ -10,6 +10,7 @@
+ #define _LINUX_SUNRPC_SCHED_H_
+ 
+ #include <linux/timer.h>
++#include <linux/ktime.h>
+ #include <linux/sunrpc/types.h>
+ #include <linux/spinlock.h>
+ #include <linux/wait.h>
+@@ -40,21 +41,15 @@ struct rpc_wait {
+  * This is the RPC task struct
+  */
+ struct rpc_task {
+-#ifdef RPC_DEBUG
+-	unsigned long		tk_magic;	/* 0xf00baa */
+-#endif
+ 	atomic_t		tk_count;	/* Reference count */
+ 	struct list_head	tk_task;	/* global list of tasks */
+ 	struct rpc_clnt *	tk_client;	/* RPC client */
+ 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
+-	int			tk_status;	/* result of last operation */
+ 
+ 	/*
+ 	 * RPC call state
+ 	 */
+ 	struct rpc_message	tk_msg;		/* RPC call info */
+-	__u8			tk_garb_retry;
+-	__u8			tk_cred_retry;
+ 
+ 	/*
+ 	 * callback	to be executed after waking up
+@@ -67,7 +62,6 @@ struct rpc_task {
+ 	void *			tk_calldata;
+ 
+ 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
+-	unsigned short		tk_flags;	/* misc flags */
+ 	unsigned long		tk_runstate;	/* Task run status */
+ 	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
+ 						 * be any workqueue
+@@ -78,17 +72,19 @@ struct rpc_task {
+ 		struct rpc_wait		tk_wait;	/* RPC wait */
+ 	} u;
+ 
+-	unsigned short		tk_timeouts;	/* maj timeouts */
+-	size_t			tk_bytes_sent;	/* total bytes sent */
+-	unsigned long		tk_start;	/* RPC task init timestamp */
+-	long			tk_rtt;		/* round-trip time (jiffies) */
++	ktime_t			tk_start;	/* RPC task init timestamp */
+ 
+ 	pid_t			tk_owner;	/* Process id for batching tasks */
+-	unsigned char		tk_priority : 2;/* Task priority */
++	int			tk_status;	/* result of last operation */
++	unsigned short		tk_flags;	/* misc flags */
++	unsigned short		tk_timeouts;	/* maj timeouts */
+ 
+ #ifdef RPC_DEBUG
+ 	unsigned short		tk_pid;		/* debugging aid */
+ #endif
++	unsigned char		tk_priority : 2,/* Task priority */
++				tk_garb_retry : 2,
++				tk_cred_retry : 2;
+ };
+ #define tk_xprt			tk_client->cl_xprt
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 11:01:00.385361873 -0400
+@@ -1,7 +1,10 @@
+ /*
+- * include/linux/sunrpc/xdr.h
++ * XDR standard data types and function declarations
+  *
+  * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
++ *
++ * Based on:
++ *   RFC 4506 "XDR: External Data Representation Standard", May 2006
+  */
+ 
+ #ifndef _SUNRPC_XDR_H_
+@@ -62,7 +65,6 @@ struct xdr_buf {
+ 
+ 	unsigned int	buflen,		/* Total length of storage buffer */
+ 			len;		/* Length of XDR encoded message */
+-
+ };
+ 
+ /*
+@@ -178,7 +180,7 @@ struct xdr_array2_desc {
+ };
+ 
+ extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+-                             struct xdr_array2_desc *desc);
++			     struct xdr_array2_desc *desc);
+ extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+ 			     struct xdr_array2_desc *desc);
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xprt.h	2010-08-23 11:01:00.386574704 -0400
+@@ -13,6 +13,7 @@
+ #include <linux/socket.h>
+ #include <linux/in.h>
+ #include <linux/kref.h>
++#include <linux/ktime.h>
+ #include <linux/sunrpc/sched.h>
+ #include <linux/sunrpc/xdr.h>
+ #include <linux/sunrpc/msg_prot.h>
+@@ -65,8 +66,6 @@ struct rpc_rqst {
+ 	struct rpc_task *	rq_task;	/* RPC task data */
+ 	__be32			rq_xid;		/* request XID */
+ 	int			rq_cong;	/* has incremented xprt->cong */
+-	int			rq_reply_bytes_recvd;	/* number of reply */
+-							/* bytes received */
+ 	u32			rq_seqno;	/* gss seq no. used on req. */
+ 	int			rq_enc_pages_num;
+ 	struct page		**rq_enc_pages;	/* scratch pages for use by
+@@ -77,12 +76,16 @@ struct rpc_rqst {
+ 	__u32 *			rq_buffer;	/* XDR encode buffer */
+ 	size_t			rq_callsize,
+ 				rq_rcvsize;
++	size_t			rq_xmit_bytes_sent;	/* total bytes sent */
++	size_t			rq_reply_bytes_recvd;	/* total reply bytes */
++							/* received */
+ 
+ 	struct xdr_buf		rq_private_buf;		/* The receive buffer
+ 							 * used in the softirq.
+ 							 */
+ 	unsigned long		rq_majortimeo;	/* major timeout alarm */
+ 	unsigned long		rq_timeout;	/* Current timeout value */
++	ktime_t			rq_rtt;		/* round-trip time */
+ 	unsigned int		rq_retries;	/* # of retries */
+ 	unsigned int		rq_connect_cookie;
+ 						/* A cookie used to track the
+@@ -94,7 +97,7 @@ struct rpc_rqst {
+ 	 */
+ 	u32			rq_bytes_sent;	/* Bytes we have sent */
+ 
+-	unsigned long		rq_xtime;	/* when transmitted */
++	ktime_t			rq_xtime;	/* transmit time stamp */
+ 	int			rq_ntrans;
+ 
+ #if defined(CONFIG_NFS_V4_1)
+@@ -174,8 +177,7 @@ struct rpc_xprt {
+ 	/*
+ 	 * Connection of transports
+ 	 */
+-	unsigned long		connect_timeout,
+-				bind_timeout,
++	unsigned long		bind_timeout,
+ 				reestablish_timeout;
+ 	unsigned int		connect_cookie;	/* A cookie that gets bumped
+ 						   every time the transport
+@@ -294,7 +296,6 @@ void			xprt_set_retrans_timeout_rtt(stru
+ void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
+ void			xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
+ void			xprt_write_space(struct rpc_xprt *xprt);
+-void			xprt_update_rtt(struct rpc_task *task);
+ void			xprt_adjust_cwnd(struct rpc_task *task, int result);
+ struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
+ void			xprt_complete_rqst(struct rpc_task *task, int copied);
+diff -up linux-2.6.34.noarch/net/sunrpc/auth.c.orig linux-2.6.34.noarch/net/sunrpc/auth.c
+--- linux-2.6.34.noarch/net/sunrpc/auth.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth.c	2010-08-23 11:01:00.387574079 -0400
+@@ -236,10 +236,15 @@ rpcauth_prune_expired(struct list_head *
+ 
+ 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
+ 
+-		/* Enforce a 60 second garbage collection moratorium */
++		if (nr_to_scan-- == 0)
++			break;
++		/*
++		 * Enforce a 60 second garbage collection moratorium
++		 * Note that the cred_unused list must be time-ordered.
++		 */
+ 		if (time_in_range(cred->cr_expire, expired, jiffies) &&
+ 		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+-			continue;
++			return 0;
+ 
+ 		list_del_init(&cred->cr_lru);
+ 		number_cred_unused--;
+@@ -252,13 +257,10 @@ rpcauth_prune_expired(struct list_head *
+ 			get_rpccred(cred);
+ 			list_add_tail(&cred->cr_lru, free);
+ 			rpcauth_unhash_cred_locked(cred);
+-			nr_to_scan--;
+ 		}
+ 		spin_unlock(cache_lock);
+-		if (nr_to_scan == 0)
+-			break;
+ 	}
+-	return nr_to_scan;
++	return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+ /*
+@@ -270,11 +272,12 @@ rpcauth_cache_shrinker(int nr_to_scan, g
+ 	LIST_HEAD(free);
+ 	int res;
+ 
++	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
++		return (nr_to_scan == 0) ? 0 : -1;
+ 	if (list_empty(&cred_unused))
+ 		return 0;
+ 	spin_lock(&rpc_credcache_lock);
+-	nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
+-	res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
++	res = rpcauth_prune_expired(&free, nr_to_scan);
+ 	spin_unlock(&rpc_credcache_lock);
+ 	rpcauth_destroy_credlist(&free);
+ 	return res;
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c	2010-08-23 11:01:00.388574680 -0400
+@@ -57,11 +57,14 @@ static const struct rpc_authops authgss_
+ static const struct rpc_credops gss_credops;
+ static const struct rpc_credops gss_nullops;
+ 
++#define GSS_RETRY_EXPIRED 5
++static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
++
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+ #endif
+ 
+-#define GSS_CRED_SLACK		1024
++#define GSS_CRED_SLACK		(RPC_MAX_AUTH_SIZE * 2)
+ /* length of a krb5 verifier (48), plus data added before arguments when
+  * using integrity (two 4-byte integers): */
+ #define GSS_VERF_SLACK		100
+@@ -229,7 +232,7 @@ gss_fill_context(const void *p, const vo
+ 		p = ERR_PTR(-EFAULT);
+ 		goto err;
+ 	}
+-	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
++	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
+ 	if (ret < 0) {
+ 		p = ERR_PTR(ret);
+ 		goto err;
+@@ -350,6 +353,24 @@ gss_unhash_msg(struct gss_upcall_msg *gs
+ }
+ 
+ static void
++gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
++{
++	switch (gss_msg->msg.errno) {
++	case 0:
++		if (gss_msg->ctx == NULL)
++			break;
++		clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
++		gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
++		break;
++	case -EKEYEXPIRED:
++		set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
++	}
++	gss_cred->gc_upcall_timestamp = jiffies;
++	gss_cred->gc_upcall = NULL;
++	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
++}
++
++static void
+ gss_upcall_callback(struct rpc_task *task)
+ {
+ 	struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+@@ -358,13 +379,9 @@ gss_upcall_callback(struct rpc_task *tas
+ 	struct inode *inode = &gss_msg->inode->vfs_inode;
+ 
+ 	spin_lock(&inode->i_lock);
+-	if (gss_msg->ctx)
+-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
+-	else
+-		task->tk_status = gss_msg->msg.errno;
+-	gss_cred->gc_upcall = NULL;
+-	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
++	gss_handle_downcall_result(gss_cred, gss_msg);
+ 	spin_unlock(&inode->i_lock);
++	task->tk_status = gss_msg->msg.errno;
+ 	gss_release_msg(gss_msg);
+ }
+ 
+@@ -377,11 +394,12 @@ static void gss_encode_v0_msg(struct gss
+ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
+ 				struct rpc_clnt *clnt, int machine_cred)
+ {
++	struct gss_api_mech *mech = gss_msg->auth->mech;
+ 	char *p = gss_msg->databuf;
+ 	int len = 0;
+ 
+ 	gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
+-				   gss_msg->auth->mech->gm_name,
++				   mech->gm_name,
+ 				   gss_msg->uid);
+ 	p += gss_msg->msg.len;
+ 	if (clnt->cl_principal) {
+@@ -398,6 +416,11 @@ static void gss_encode_v1_msg(struct gss
+ 		p += len;
+ 		gss_msg->msg.len += len;
+ 	}
++	if (mech->gm_upcall_enctypes) {
++		len = sprintf(p, mech->gm_upcall_enctypes);
++		p += len;
++		gss_msg->msg.len += len;
++	}
+ 	len = sprintf(p, "\n");
+ 	gss_msg->msg.len += len;
+ 
+@@ -507,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task
+ 	spin_lock(&inode->i_lock);
+ 	if (gss_cred->gc_upcall != NULL)
+ 		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
+-	else if (gss_msg->ctx != NULL) {
+-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
+-		gss_cred->gc_upcall = NULL;
+-		rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+-	} else if (gss_msg->msg.errno >= 0) {
++	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+ 		task->tk_timeout = 0;
+ 		gss_cred->gc_upcall = gss_msg;
+ 		/* gss_upcall_callback will release the reference to gss_upcall_msg */
+ 		atomic_inc(&gss_msg->count);
+ 		rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
+-	} else
++	} else {
++		gss_handle_downcall_result(gss_cred, gss_msg);
+ 		err = gss_msg->msg.errno;
++	}
+ 	spin_unlock(&inode->i_lock);
+ 	gss_release_msg(gss_msg);
+ out:
+@@ -1117,6 +1138,23 @@ static int gss_renew_cred(struct rpc_tas
+ 	return 0;
+ }
+ 
++static int gss_cred_is_negative_entry(struct rpc_cred *cred)
++{
++	if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
++		unsigned long now = jiffies;
++		unsigned long begin, expire;
++		struct gss_cred *gss_cred; 
++
++		gss_cred = container_of(cred, struct gss_cred, gc_base);
++		begin = gss_cred->gc_upcall_timestamp;
++		expire = begin + gss_expired_cred_retry_delay * HZ;
++
++		if (time_in_range_open(now, begin, expire))
++			return 1;
++	}
++	return 0;
++}
++
+ /*
+ * Refresh credentials. XXX - finish
+ */
+@@ -1126,6 +1164,9 @@ gss_refresh(struct rpc_task *task)
+ 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ 	int ret = 0;
+ 
++	if (gss_cred_is_negative_entry(cred))
++		return -EKEYEXPIRED;
++
+ 	if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
+ 			!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ 		ret = gss_renew_cred(task);
+@@ -1316,15 +1357,21 @@ gss_wrap_req_priv(struct rpc_cred *cred,
+ 	inpages = snd_buf->pages + first;
+ 	snd_buf->pages = rqstp->rq_enc_pages;
+ 	snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
+-	/* Give the tail its own page, in case we need extra space in the
+-	 * head when wrapping: */
++	/*
++	 * Give the tail its own page, in case we need extra space in the
++	 * head when wrapping:
++	 *
++	 * call_allocate() allocates twice the slack space required
++	 * by the authentication flavor to rq_callsize.
++	 * For GSS, slack is GSS_CRED_SLACK.
++	 */
+ 	if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+ 		tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
+ 		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
+ 		snd_buf->tail[0].iov_base = tmp;
+ 	}
+ 	maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
+-	/* RPC_SLACK_SPACE should prevent this ever happening: */
++	/* slack space should prevent this ever happening: */
+ 	BUG_ON(snd_buf->len > snd_buf->buflen);
+ 	status = -EIO;
+ 	/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
+@@ -1573,5 +1620,11 @@ static void __exit exit_rpcsec_gss(void)
+ }
+ 
+ MODULE_LICENSE("GPL");
++module_param_named(expired_cred_retry_delay,
++		   gss_expired_cred_retry_delay,
++		   uint, 0644);
++MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
++		"the RPC engine retries an expired credential");
++
+ module_init(init_rpcsec_gss)
+ module_exit(exit_rpcsec_gss)
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c	2010-08-23 11:01:00.390553891 -0400
+@@ -1,7 +1,7 @@
+ /*
+  *  linux/net/sunrpc/gss_krb5_crypto.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -41,6 +41,7 @@
+ #include <linux/crypto.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
++#include <linux/random.h>
+ #include <linux/sunrpc/gss_krb5.h>
+ #include <linux/sunrpc/xdr.h>
+ 
+@@ -58,13 +59,13 @@ krb5_encrypt(
+ {
+ 	u32 ret = -EINVAL;
+ 	struct scatterlist sg[1];
+-	u8 local_iv[16] = {0};
++	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+ 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ 
+ 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ 		goto out;
+ 
+-	if (crypto_blkcipher_ivsize(tfm) > 16) {
++	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
+ 			crypto_blkcipher_ivsize(tfm));
+ 		goto out;
+@@ -92,13 +93,13 @@ krb5_decrypt(
+ {
+ 	u32 ret = -EINVAL;
+ 	struct scatterlist sg[1];
+-	u8 local_iv[16] = {0};
++	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+ 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ 
+ 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ 		goto out;
+ 
+-	if (crypto_blkcipher_ivsize(tfm) > 16) {
++	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
+ 			crypto_blkcipher_ivsize(tfm));
+ 		goto out;
+@@ -123,21 +124,155 @@ checksummer(struct scatterlist *sg, void
+ 	return crypto_hash_update(desc, sg, sg->length);
+ }
+ 
+-/* checksum the plaintext data and hdrlen bytes of the token header */
+-s32
+-make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
+-		   int body_offset, struct xdr_netobj *cksum)
++static int
++arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
++{
++	unsigned int ms_usage;
++
++	switch (usage) {
++	case KG_USAGE_SIGN:
++		ms_usage = 15;
++		break;
++	case KG_USAGE_SEAL:
++		ms_usage = 13;
++		break;
++	default:
++		return EINVAL;;
++	}
++	salt[0] = (ms_usage >> 0) & 0xff;
++	salt[1] = (ms_usage >> 8) & 0xff;
++	salt[2] = (ms_usage >> 16) & 0xff;
++	salt[3] = (ms_usage >> 24) & 0xff;
++
++	return 0;
++}
++
++static u32
++make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
++		       struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		       unsigned int usage, struct xdr_netobj *cksumout)
+ {
+-	struct hash_desc                desc; /* XXX add to ctx? */
++	struct hash_desc                desc;
+ 	struct scatterlist              sg[1];
+ 	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	u8 rc4salt[4];
++	struct crypto_hash *md5;
++	struct crypto_hash *hmac_md5;
++
++	if (cksumkey == NULL)
++		return GSS_S_FAILURE;
++
++	if (cksumout->len < kctx->gk5e->cksumlength) {
++		dprintk("%s: checksum buffer length, %u, too small for %s\n",
++			__func__, cksumout->len, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++
++	if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
++		dprintk("%s: invalid usage value %u\n", __func__, usage);
++		return GSS_S_FAILURE;
++	}
++
++	md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(md5))
++		return GSS_S_FAILURE;
++
++	hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
++				     CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac_md5)) {
++		crypto_free_hash(md5);
++		return GSS_S_FAILURE;
++	}
++
++	desc.tfm = md5;
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	sg_init_one(sg, rc4salt, 4);
++	err = crypto_hash_update(&desc, sg, 4);
++	if (err)
++		goto out;
++
++	sg_init_one(sg, header, hdrlen);
++	err = crypto_hash_update(&desc, sg, hdrlen);
++	if (err)
++		goto out;
++	err = xdr_process_buf(body, body_offset, body->len - body_offset,
++			      checksummer, &desc);
++	if (err)
++		goto out;
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
++
++	desc.tfm = hmac_md5;
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
++	if (err)
++		goto out;
++
++	sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
++	err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
++				 checksumdata);
++	if (err)
++		goto out;
++
++	memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++	cksumout->len = kctx->gk5e->cksumlength;
++out:
++	crypto_free_hash(md5);
++	crypto_free_hash(hmac_md5);
++	return err ? GSS_S_FAILURE : 0;
++}
++
++/*
++ * checksum the plaintext data and hdrlen bytes of the token header
++ * The checksum is performed over the first 8 bytes of the
++ * gss token header and then over the data body
++ */
++u32
++make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
++	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
++	      unsigned int usage, struct xdr_netobj *cksumout)
++{
++	struct hash_desc                desc;
++	struct scatterlist              sg[1];
++	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	unsigned int checksumlen;
++
++	if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
++		return make_checksum_hmac_md5(kctx, header, hdrlen,
++					      body, body_offset,
++					      cksumkey, usage, cksumout);
++
++	if (cksumout->len < kctx->gk5e->cksumlength) {
++		dprintk("%s: checksum buffer length, %u, too small for %s\n",
++			__func__, cksumout->len, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
+ 
+-	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
++	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ 	if (IS_ERR(desc.tfm))
+ 		return GSS_S_FAILURE;
+-	cksum->len = crypto_hash_digestsize(desc.tfm);
+ 	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ 
++	checksumlen = crypto_hash_digestsize(desc.tfm);
++
++	if (cksumkey != NULL) {
++		err = crypto_hash_setkey(desc.tfm, cksumkey,
++					 kctx->gk5e->keylength);
++		if (err)
++			goto out;
++	}
++
+ 	err = crypto_hash_init(&desc);
+ 	if (err)
+ 		goto out;
+@@ -149,15 +284,109 @@ make_checksum(char *cksumname, char *hea
+ 			      checksummer, &desc);
+ 	if (err)
+ 		goto out;
+-	err = crypto_hash_final(&desc, cksum->data);
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
+ 
++	switch (kctx->gk5e->ctype) {
++	case CKSUMTYPE_RSA_MD5:
++		err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
++					  checksumdata, checksumlen);
++		if (err)
++			goto out;
++		memcpy(cksumout->data,
++		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
++		       kctx->gk5e->cksumlength);
++		break;
++	case CKSUMTYPE_HMAC_SHA1_DES3:
++		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++		break;
++	default:
++		BUG();
++		break;
++	}
++	cksumout->len = kctx->gk5e->cksumlength;
++out:
++	crypto_free_hash(desc.tfm);
++	return err ? GSS_S_FAILURE : 0;
++}
++
++/*
++ * checksum the plaintext data and hdrlen bytes of the token header
++ * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
++ * body then over the first 16 octets of the MIC token
++ * Inclusion of the header data in the calculation of the
++ * checksum is optional.
++ */
++u32
++make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
++		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		 unsigned int usage, struct xdr_netobj *cksumout)
++{
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	unsigned int checksumlen;
++
++	if (kctx->gk5e->keyed_cksum == 0) {
++		dprintk("%s: expected keyed hash for %s\n",
++			__func__, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++	if (cksumkey == NULL) {
++		dprintk("%s: no key supplied for %s\n",
++			__func__, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++
++	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
++							CRYPTO_ALG_ASYNC);
++	if (IS_ERR(desc.tfm))
++		return GSS_S_FAILURE;
++	checksumlen = crypto_hash_digestsize(desc.tfm);
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
++	if (err)
++		goto out;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	err = xdr_process_buf(body, body_offset, body->len - body_offset,
++			      checksummer, &desc);
++	if (err)
++		goto out;
++	if (header != NULL) {
++		sg_init_one(sg, header, hdrlen);
++		err = crypto_hash_update(&desc, sg, hdrlen);
++		if (err)
++			goto out;
++	}
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
++
++	cksumout->len = kctx->gk5e->cksumlength;
++
++	switch (kctx->gk5e->ctype) {
++	case CKSUMTYPE_HMAC_SHA1_96_AES128:
++	case CKSUMTYPE_HMAC_SHA1_96_AES256:
++		/* note that this truncates the hash */
++		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++		break;
++	default:
++		BUG();
++		break;
++	}
+ out:
+ 	crypto_free_hash(desc.tfm);
+ 	return err ? GSS_S_FAILURE : 0;
+ }
+ 
+ struct encryptor_desc {
+-	u8 iv[8]; /* XXX hard-coded blocksize */
++	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+ 	struct blkcipher_desc desc;
+ 	int pos;
+ 	struct xdr_buf *outbuf;
+@@ -198,7 +427,7 @@ encryptor(struct scatterlist *sg, void *
+ 	desc->fraglen += sg->length;
+ 	desc->pos += sg->length;
+ 
+-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
++	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ 	thislen -= fraglen;
+ 
+ 	if (thislen == 0)
+@@ -256,7 +485,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcip
+ }
+ 
+ struct decryptor_desc {
+-	u8 iv[8]; /* XXX hard-coded blocksize */
++	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+ 	struct blkcipher_desc desc;
+ 	struct scatterlist frags[4];
+ 	int fragno;
+@@ -278,7 +507,7 @@ decryptor(struct scatterlist *sg, void *
+ 	desc->fragno++;
+ 	desc->fraglen += sg->length;
+ 
+-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
++	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ 	thislen -= fraglen;
+ 
+ 	if (thislen == 0)
+@@ -325,3 +554,437 @@ gss_decrypt_xdr_buf(struct crypto_blkcip
+ 
+ 	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+ }
++
++/*
++ * This function makes the assumption that it was ultimately called
++ * from gss_wrap().
++ *
++ * The client auth_gss code moves any existing tail data into a
++ * separate page before calling gss_wrap.
++ * The server svcauth_gss code ensures that both the head and the
++ * tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
++ *
++ * Even with that guarantee, this function may be called more than
++ * once in the processing of gss_wrap().  The best we can do is
++ * verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
++ * largest expected shift will fit within RPC_MAX_AUTH_SIZE.
++ * At run-time we can verify that a single invocation of this
++ * function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
++ */
++
++int
++xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
++{
++	u8 *p;
++
++	if (shiftlen == 0)
++		return 0;
++
++	BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
++	BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
++
++	p = buf->head[0].iov_base + base;
++
++	memmove(p + shiftlen, p, buf->head[0].iov_len - base);
++
++	buf->head[0].iov_len += shiftlen;
++	buf->len += shiftlen;
++
++	return 0;
++}
++
++static u32
++gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
++		   u32 offset, u8 *iv, struct page **pages, int encrypt)
++{
++	u32 ret;
++	struct scatterlist sg[1];
++	struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
++	u8 data[crypto_blkcipher_blocksize(cipher) * 2];
++	struct page **save_pages;
++	u32 len = buf->len - offset;
++
++	BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2);
++
++	/*
++	 * For encryption, we want to read from the cleartext
++	 * page cache pages, and write the encrypted data to
++	 * the supplied xdr_buf pages.
++	 */
++	save_pages = buf->pages;
++	if (encrypt)
++		buf->pages = pages;
++
++	ret = read_bytes_from_xdr_buf(buf, offset, data, len);
++	buf->pages = save_pages;
++	if (ret)
++		goto out;
++
++	sg_init_one(sg, data, len);
++
++	if (encrypt)
++		ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
++	else
++		ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
++
++	if (ret)
++		goto out;
++
++	ret = write_bytes_to_xdr_buf(buf, offset, data, len);
++
++out:
++	return ret;
++}
++
++u32
++gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, int ec, struct page **pages)
++{
++	u32 err;
++	struct xdr_netobj hmac;
++	u8 *cksumkey;
++	u8 *ecptr;
++	struct crypto_blkcipher *cipher, *aux_cipher;
++	int blocksize;
++	struct page **save_pages;
++	int nblocks, nbytes;
++	struct encryptor_desc desc;
++	u32 cbcbytes;
++	unsigned int usage;
++
++	if (kctx->initiate) {
++		cipher = kctx->initiator_enc;
++		aux_cipher = kctx->initiator_enc_aux;
++		cksumkey = kctx->initiator_integ;
++		usage = KG_USAGE_INITIATOR_SEAL;
++	} else {
++		cipher = kctx->acceptor_enc;
++		aux_cipher = kctx->acceptor_enc_aux;
++		cksumkey = kctx->acceptor_integ;
++		usage = KG_USAGE_ACCEPTOR_SEAL;
++	}
++	blocksize = crypto_blkcipher_blocksize(cipher);
++
++	/* hide the gss token header and insert the confounder */
++	offset += GSS_KRB5_TOK_HDR_LEN;
++	if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
++		return GSS_S_FAILURE;
++	gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
++	offset -= GSS_KRB5_TOK_HDR_LEN;
++
++	if (buf->tail[0].iov_base != NULL) {
++		ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
++	} else {
++		buf->tail[0].iov_base = buf->head[0].iov_base
++							+ buf->head[0].iov_len;
++		buf->tail[0].iov_len = 0;
++		ecptr = buf->tail[0].iov_base;
++	}
++
++	memset(ecptr, 'X', ec);
++	buf->tail[0].iov_len += ec;
++	buf->len += ec;
++
++	/* copy plaintext gss token header after filler (if any) */
++	memcpy(ecptr + ec, buf->head[0].iov_base + offset,
++						GSS_KRB5_TOK_HDR_LEN);
++	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
++	buf->len += GSS_KRB5_TOK_HDR_LEN;
++
++	/* Do the HMAC */
++	hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
++	hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
++
++	/*
++	 * When we are called, pages points to the real page cache
++	 * data -- which we can't go and encrypt!  buf->pages points
++	 * to scratch pages which we are going to send off to the
++	 * client/server.  Swap in the plaintext pages to calculate
++	 * the hmac.
++	 */
++	save_pages = buf->pages;
++	buf->pages = pages;
++
++	err = make_checksum_v2(kctx, NULL, 0, buf,
++			       offset + GSS_KRB5_TOK_HDR_LEN,
++			       cksumkey, usage, &hmac);
++	buf->pages = save_pages;
++	if (err)
++		return GSS_S_FAILURE;
++
++	nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
++	nblocks = (nbytes + blocksize - 1) / blocksize;
++	cbcbytes = 0;
++	if (nblocks > 2)
++		cbcbytes = (nblocks - 2) * blocksize;
++
++	memset(desc.iv, 0, sizeof(desc.iv));
++
++	if (cbcbytes) {
++		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
++		desc.fragno = 0;
++		desc.fraglen = 0;
++		desc.pages = pages;
++		desc.outbuf = buf;
++		desc.desc.info = desc.iv;
++		desc.desc.flags = 0;
++		desc.desc.tfm = aux_cipher;
++
++		sg_init_table(desc.infrags, 4);
++		sg_init_table(desc.outfrags, 4);
++
++		err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
++				      cbcbytes, encryptor, &desc);
++		if (err)
++			goto out_err;
++	}
++
++	/* Make sure IV carries forward from any CBC results. */
++	err = gss_krb5_cts_crypt(cipher, buf,
++				 offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
++				 desc.iv, pages, 1);
++	if (err) {
++		err = GSS_S_FAILURE;
++		goto out_err;
++	}
++
++	/* Now update buf to account for HMAC */
++	buf->tail[0].iov_len += kctx->gk5e->cksumlength;
++	buf->len += kctx->gk5e->cksumlength;
++
++out_err:
++	if (err)
++		err = GSS_S_FAILURE;
++	return err;
++}
++
++u32
++gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
++		     u32 *headskip, u32 *tailskip)
++{
++	struct xdr_buf subbuf;
++	u32 ret = 0;
++	u8 *cksum_key;
++	struct crypto_blkcipher *cipher, *aux_cipher;
++	struct xdr_netobj our_hmac_obj;
++	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
++	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
++	int nblocks, blocksize, cbcbytes;
++	struct decryptor_desc desc;
++	unsigned int usage;
++
++	if (kctx->initiate) {
++		cipher = kctx->acceptor_enc;
++		aux_cipher = kctx->acceptor_enc_aux;
++		cksum_key = kctx->acceptor_integ;
++		usage = KG_USAGE_ACCEPTOR_SEAL;
++	} else {
++		cipher = kctx->initiator_enc;
++		aux_cipher = kctx->initiator_enc_aux;
++		cksum_key = kctx->initiator_integ;
++		usage = KG_USAGE_INITIATOR_SEAL;
++	}
++	blocksize = crypto_blkcipher_blocksize(cipher);
++
++
++	/* create a segment skipping the header and leaving out the checksum */
++	xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
++				    (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
++				     kctx->gk5e->cksumlength));
++
++	nblocks = (subbuf.len + blocksize - 1) / blocksize;
++
++	cbcbytes = 0;
++	if (nblocks > 2)
++		cbcbytes = (nblocks - 2) * blocksize;
++
++	memset(desc.iv, 0, sizeof(desc.iv));
++
++	if (cbcbytes) {
++		desc.fragno = 0;
++		desc.fraglen = 0;
++		desc.desc.info = desc.iv;
++		desc.desc.flags = 0;
++		desc.desc.tfm = aux_cipher;
++
++		sg_init_table(desc.frags, 4);
++
++		ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
++		if (ret)
++			goto out_err;
++	}
++
++	/* Make sure IV carries forward from any CBC results. */
++	ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
++	if (ret)
++		goto out_err;
++
++
++	/* Calculate our hmac over the plaintext data */
++	our_hmac_obj.len = sizeof(our_hmac);
++	our_hmac_obj.data = our_hmac;
++
++	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
++			       cksum_key, usage, &our_hmac_obj);
++	if (ret)
++		goto out_err;
++
++	/* Get the packet's hmac value */
++	ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
++				      pkt_hmac, kctx->gk5e->cksumlength);
++	if (ret)
++		goto out_err;
++
++	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
++		ret = GSS_S_BAD_SIG;
++		goto out_err;
++	}
++	*headskip = kctx->gk5e->conflen;
++	*tailskip = kctx->gk5e->cksumlength;
++out_err:
++	if (ret && ret != GSS_S_BAD_SIG)
++		ret = GSS_S_FAILURE;
++	return ret;
++}
++
++/*
++ * Compute Kseq given the initial session key and the checksum.
++ * Set the key of the given cipher.
++ */
++int
++krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
++		       unsigned char *cksum)
++{
++	struct crypto_hash *hmac;
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	u8 Kseq[GSS_KRB5_MAX_KEYLEN];
++	u32 zeroconstant = 0;
++	int err;
++
++	dprintk("%s: entered\n", __func__);
++
++	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld, allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
++		return PTR_ERR(hmac);
++	}
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err;
++
++	/* Compute intermediate Kseq from session key */
++	err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, &zeroconstant, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kseq);
++	if (err)
++		goto out_err;
++
++	/* Compute final Kseq from the checksum and intermediate Kseq */
++	err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_set_buf(sg, cksum, 8);
++
++	err = crypto_hash_digest(&desc, sg, 8, Kseq);
++	if (err)
++		goto out_err;
++
++	err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	err = 0;
++
++out_err:
++	crypto_free_hash(hmac);
++	dprintk("%s: returning %d\n", __func__, err);
++	return err;
++}
++
++/*
++ * Compute Kcrypt given the initial session key and the plaintext seqnum.
++ * Set the key of cipher kctx->enc.
++ */
++int
++krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
++		       s32 seqnum)
++{
++	struct crypto_hash *hmac;
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
++	u8 zeroconstant[4] = {0};
++	u8 seqnumarray[4];
++	int err, i;
++
++	dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
++
++	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld, allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
++		return PTR_ERR(hmac);
++	}
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err;
++
++	/* Compute intermediate Kcrypt from session key */
++	for (i = 0; i < kctx->gk5e->keylength; i++)
++		Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
++
++	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, zeroconstant, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
++	if (err)
++		goto out_err;
++
++	/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
++	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
++	seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
++	seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
++	seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
++
++	sg_set_buf(sg, seqnumarray, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
++	if (err)
++		goto out_err;
++
++	err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	err = 0;
++
++out_err:
++	crypto_free_hash(hmac);
++	dprintk("%s: returning %d\n", __func__, err);
++	return err;
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c.orig	2010-08-23 11:01:00.390553891 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c	2010-08-23 11:01:00.391564137 -0400
+@@ -0,0 +1,336 @@
++/*
++ * COPYRIGHT (c) 2008
++ * The Regents of the University of Michigan
++ * ALL RIGHTS RESERVED
++ *
++ * Permission is granted to use, copy, create derivative works
++ * and redistribute this software and such derivative works
++ * for any purpose, so long as the name of The University of
++ * Michigan is not used in any advertising or publicity
++ * pertaining to the use of distribution of this software
++ * without specific, written prior authorization.  If the
++ * above copyright notice or any other identification of the
++ * University of Michigan is included in any copy of any
++ * portion of this software, then the disclaimer below must
++ * also be included.
++ *
++ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
++ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
++ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
++ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
++ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
++ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
++ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
++ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
++ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGES.
++ */
++
++/*
++ * Copyright (C) 1998 by the FundsXpress, INC.
++ *
++ * All rights reserved.
++ *
++ * Export of this software from the United States of America may require
++ * a specific license from the United States Government.  It is the
++ * responsibility of any person or organization contemplating export to
++ * obtain such a license before exporting.
++ *
++ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
++ * distribute this software and its documentation for any purpose and
++ * without fee is hereby granted, provided that the above copyright
++ * notice appear in all copies and that both that copyright notice and
++ * this permission notice appear in supporting documentation, and that
++ * the name of FundsXpress. not be used in advertising or publicity pertaining
++ * to distribution of the software without specific, written prior
++ * permission.  FundsXpress makes no representations about the suitability of
++ * this software for any purpose.  It is provided "as is" without express
++ * or implied warranty.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
++ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
++ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
++ */
++
++#include <linux/err.h>
++#include <linux/types.h>
++#include <linux/crypto.h>
++#include <linux/sunrpc/gss_krb5.h>
++#include <linux/sunrpc/xdr.h>
++
++#ifdef RPC_DEBUG
++# define RPCDBG_FACILITY        RPCDBG_AUTH
++#endif
++
++/*
++ * This is the n-fold function as described in rfc3961, sec 5.1
++ * Taken from MIT Kerberos and modified.
++ */
++
++static void krb5_nfold(u32 inbits, const u8 *in,
++		       u32 outbits, u8 *out)
++{
++	int a, b, c, lcm;
++	int byte, i, msbit;
++
++	/* the code below is more readable if I make these bytes
++	   instead of bits */
++
++	inbits >>= 3;
++	outbits >>= 3;
++
++	/* first compute lcm(n,k) */
++
++	a = outbits;
++	b = inbits;
++
++	while (b != 0) {
++		c = b;
++		b = a%b;
++		a = c;
++	}
++
++	lcm = outbits*inbits/a;
++
++	/* now do the real work */
++
++	memset(out, 0, outbits);
++	byte = 0;
++
++	/* this will end up cycling through k lcm(k,n)/k times, which
++	   is correct */
++	for (i = lcm-1; i >= 0; i--) {
++		/* compute the msbit in k which gets added into this byte */
++		msbit = (
++			/* first, start with the msbit in the first,
++			 * unrotated byte */
++			 ((inbits << 3) - 1)
++			 /* then, for each byte, shift to the right
++			  * for each repetition */
++			 + (((inbits << 3) + 13) * (i/inbits))
++			 /* last, pick out the correct byte within
++			  * that shifted repetition */
++			 + ((inbits - (i % inbits)) << 3)
++			 ) % (inbits << 3);
++
++		/* pull out the byte value itself */
++		byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
++				  (in[((inbits) - (msbit >> 3)) % inbits]))
++				 >> ((msbit & 7) + 1)) & 0xff;
++
++		/* do the addition */
++		byte += out[i % outbits];
++		out[i % outbits] = byte & 0xff;
++
++		/* keep around the carry bit, if any */
++		byte >>= 8;
++
++	}
++
++	/* if there's a carry bit left over, add it back in */
++	if (byte) {
++		for (i = outbits - 1; i >= 0; i--) {
++			/* do the addition */
++			byte += out[i];
++			out[i] = byte & 0xff;
++
++			/* keep around the carry bit, if any */
++			byte >>= 8;
++		}
++	}
++}
++
++/*
++ * This is the DK (derive_key) function as described in rfc3961, sec 5.1
++ * Taken from MIT Kerberos and modified.
++ */
++
++u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
++		    const struct xdr_netobj *inkey,
++		    struct xdr_netobj *outkey,
++		    const struct xdr_netobj *in_constant,
++		    gfp_t gfp_mask)
++{
++	size_t blocksize, keybytes, keylength, n;
++	unsigned char *inblockdata, *outblockdata, *rawkey;
++	struct xdr_netobj inblock, outblock;
++	struct crypto_blkcipher *cipher;
++	u32 ret = EINVAL;
++
++	blocksize = gk5e->blocksize;
++	keybytes = gk5e->keybytes;
++	keylength = gk5e->keylength;
++
++	if ((inkey->len != keylength) || (outkey->len != keylength))
++		goto err_return;
++
++	cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		goto err_return;
++	if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
++		goto err_return;
++
++	/* allocate and set up buffers */
++
++	ret = ENOMEM;
++	inblockdata = kmalloc(blocksize, gfp_mask);
++	if (inblockdata == NULL)
++		goto err_free_cipher;
++
++	outblockdata = kmalloc(blocksize, gfp_mask);
++	if (outblockdata == NULL)
++		goto err_free_in;
++
++	rawkey = kmalloc(keybytes, gfp_mask);
++	if (rawkey == NULL)
++		goto err_free_out;
++
++	inblock.data = (char *) inblockdata;
++	inblock.len = blocksize;
++
++	outblock.data = (char *) outblockdata;
++	outblock.len = blocksize;
++
++	/* initialize the input block */
++
++	if (in_constant->len == inblock.len) {
++		memcpy(inblock.data, in_constant->data, inblock.len);
++	} else {
++		krb5_nfold(in_constant->len * 8, in_constant->data,
++			   inblock.len * 8, inblock.data);
++	}
++
++	/* loop encrypting the blocks until enough key bytes are generated */
++
++	n = 0;
++	while (n < keybytes) {
++		(*(gk5e->encrypt))(cipher, NULL, inblock.data,
++				   outblock.data, inblock.len);
++
++		if ((keybytes - n) <= outblock.len) {
++			memcpy(rawkey + n, outblock.data, (keybytes - n));
++			break;
++		}
++
++		memcpy(rawkey + n, outblock.data, outblock.len);
++		memcpy(inblock.data, outblock.data, outblock.len);
++		n += outblock.len;
++	}
++
++	/* postprocess the key */
++
++	inblock.data = (char *) rawkey;
++	inblock.len = keybytes;
++
++	BUG_ON(gk5e->mk_key == NULL);
++	ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
++	if (ret) {
++		dprintk("%s: got %d from mk_key function for '%s'\n",
++			__func__, ret, gk5e->encrypt_name);
++		goto err_free_raw;
++	}
++
++	/* clean memory, free resources and exit */
++
++	ret = 0;
++
++err_free_raw:
++	memset(rawkey, 0, keybytes);
++	kfree(rawkey);
++err_free_out:
++	memset(outblockdata, 0, blocksize);
++	kfree(outblockdata);
++err_free_in:
++	memset(inblockdata, 0, blocksize);
++	kfree(inblockdata);
++err_free_cipher:
++	crypto_free_blkcipher(cipher);
++err_return:
++	return ret;
++}
++
++#define smask(step) ((1<<step)-1)
++#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
++#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
++
++static void mit_des_fixup_key_parity(u8 key[8])
++{
++	int i;
++	for (i = 0; i < 8; i++) {
++		key[i] &= 0xfe;
++		key[i] |= 1^parity_char(key[i]);
++	}
++}
++
++/*
++ * This is the des3 key derivation postprocess function
++ */
++u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
++			   struct xdr_netobj *randombits,
++			   struct xdr_netobj *key)
++{
++	int i;
++	u32 ret = EINVAL;
++
++	if (key->len != 24) {
++		dprintk("%s: key->len is %d\n", __func__, key->len);
++		goto err_out;
++	}
++	if (randombits->len != 21) {
++		dprintk("%s: randombits->len is %d\n",
++			__func__, randombits->len);
++		goto err_out;
++	}
++
++	/* take the seven bytes, move them around into the top 7 bits of the
++	   8 key bytes, then compute the parity bits.  Do this three times. */
++
++	for (i = 0; i < 3; i++) {
++		memcpy(key->data + i*8, randombits->data + i*7, 7);
++		key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
++				    ((key->data[i*8+1]&1)<<2) |
++				    ((key->data[i*8+2]&1)<<3) |
++				    ((key->data[i*8+3]&1)<<4) |
++				    ((key->data[i*8+4]&1)<<5) |
++				    ((key->data[i*8+5]&1)<<6) |
++				    ((key->data[i*8+6]&1)<<7));
++
++		mit_des_fixup_key_parity(key->data + i*8);
++	}
++	ret = 0;
++err_out:
++	return ret;
++}
++
++/*
++ * This is the aes key derivation postprocess function
++ */
++u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
++			  struct xdr_netobj *randombits,
++			  struct xdr_netobj *key)
++{
++	u32 ret = EINVAL;
++
++	if (key->len != 16 && key->len != 32) {
++		dprintk("%s: key->len is %d\n", __func__, key->len);
++		goto err_out;
++	}
++	if (randombits->len != 16 && randombits->len != 32) {
++		dprintk("%s: randombits->len is %d\n",
++			__func__, randombits->len);
++		goto err_out;
++	}
++	if (randombits->len != key->len) {
++		dprintk("%s: randombits->len is %d, key->len is %d\n",
++			__func__, randombits->len, key->len);
++		goto err_out;
++	}
++	memcpy(key->data, randombits->data, key->len);
++	ret = 0;
++err_out:
++	return ret;
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c	2010-08-23 11:01:00.392564136 -0400
+@@ -1,7 +1,7 @@
+ /*
+  *  linux/net/sunrpc/gss_krb5_mech.c
+  *
+- *  Copyright (c) 2001 The Regents of the University of Michigan.
++ *  Copyright (c) 2001-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson <andros@umich.edu>
+@@ -48,6 +48,143 @@
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+ #endif
+ 
++static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
++
++static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
++	/*
++	 * DES (All DES enctypes are mapped to the same gss functionality)
++	 */
++	{
++	  .etype = ENCTYPE_DES_CBC_RAW,
++	  .ctype = CKSUMTYPE_RSA_MD5,
++	  .name = "des-cbc-crc",
++	  .encrypt_name = "cbc(des)",
++	  .cksum_name = "md5",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = NULL,
++	  .signalg = SGN_ALG_DES_MAC_MD5,
++	  .sealalg = SEAL_ALG_DES,
++	  .keybytes = 7,
++	  .keylength = 8,
++	  .blocksize = 8,
++	  .conflen = 8,
++	  .cksumlength = 8,
++	  .keyed_cksum = 0,
++	},
++	/*
++	 * RC4-HMAC
++	 */
++	{
++	  .etype = ENCTYPE_ARCFOUR_HMAC,
++	  .ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
++	  .name = "rc4-hmac",
++	  .encrypt_name = "ecb(arc4)",
++	  .cksum_name = "hmac(md5)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = NULL,
++	  .signalg = SGN_ALG_HMAC_MD5,
++	  .sealalg = SEAL_ALG_MICROSOFT_RC4,
++	  .keybytes = 16,
++	  .keylength = 16,
++	  .blocksize = 1,
++	  .conflen = 8,
++	  .cksumlength = 8,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * 3DES
++	 */
++	{
++	  .etype = ENCTYPE_DES3_CBC_RAW,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
++	  .name = "des3-hmac-sha1",
++	  .encrypt_name = "cbc(des3_ede)",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_des3_make_key,
++	  .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
++	  .sealalg = SEAL_ALG_DES3KD,
++	  .keybytes = 21,
++	  .keylength = 24,
++	  .blocksize = 8,
++	  .conflen = 8,
++	  .cksumlength = 20,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * AES128
++	 */
++	{
++	  .etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
++	  .name = "aes128-cts",
++	  .encrypt_name = "cts(cbc(aes))",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_aes_make_key,
++	  .encrypt_v2 = gss_krb5_aes_encrypt,
++	  .decrypt_v2 = gss_krb5_aes_decrypt,
++	  .signalg = -1,
++	  .sealalg = -1,
++	  .keybytes = 16,
++	  .keylength = 16,
++	  .blocksize = 16,
++	  .conflen = 16,
++	  .cksumlength = 12,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * AES256
++	 */
++	{
++	  .etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
++	  .name = "aes256-cts",
++	  .encrypt_name = "cts(cbc(aes))",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_aes_make_key,
++	  .encrypt_v2 = gss_krb5_aes_encrypt,
++	  .decrypt_v2 = gss_krb5_aes_decrypt,
++	  .signalg = -1,
++	  .sealalg = -1,
++	  .keybytes = 32,
++	  .keylength = 32,
++	  .blocksize = 16,
++	  .conflen = 16,
++	  .cksumlength = 12,
++	  .keyed_cksum = 1,
++	},
++};
++
++static const int num_supported_enctypes =
++	ARRAY_SIZE(supported_gss_krb5_enctypes);
++
++static int
++supported_gss_krb5_enctype(int etype)
++{
++	int i;
++	for (i = 0; i < num_supported_enctypes; i++)
++		if (supported_gss_krb5_enctypes[i].etype == etype)
++			return 1;
++	return 0;
++}
++
++static const struct gss_krb5_enctype *
++get_gss_krb5_enctype(int etype)
++{
++	int i;
++	for (i = 0; i < num_supported_enctypes; i++)
++		if (supported_gss_krb5_enctypes[i].etype == etype)
++			return &supported_gss_krb5_enctypes[i];
++	return NULL;
++}
++
+ static const void *
+ simple_get_bytes(const void *p, const void *end, void *res, int len)
+ {
+@@ -78,35 +215,45 @@ simple_get_netobj(const void *p, const v
+ }
+ 
+ static inline const void *
+-get_key(const void *p, const void *end, struct crypto_blkcipher **res)
++get_key(const void *p, const void *end,
++	struct krb5_ctx *ctx, struct crypto_blkcipher **res)
+ {
+ 	struct xdr_netobj	key;
+ 	int			alg;
+-	char			*alg_name;
+ 
+ 	p = simple_get_bytes(p, end, &alg, sizeof(alg));
+ 	if (IS_ERR(p))
+ 		goto out_err;
++
++	switch (alg) {
++	case ENCTYPE_DES_CBC_CRC:
++	case ENCTYPE_DES_CBC_MD4:
++	case ENCTYPE_DES_CBC_MD5:
++		/* Map all these key types to ENCTYPE_DES_CBC_RAW */
++		alg = ENCTYPE_DES_CBC_RAW;
++		break;
++	}
++
++	if (!supported_gss_krb5_enctype(alg)) {
++		printk(KERN_WARNING "gss_kerberos_mech: unsupported "
++			"encryption key algorithm %d\n", alg);
++		goto out_err;
++	}
+ 	p = simple_get_netobj(p, end, &key);
+ 	if (IS_ERR(p))
+ 		goto out_err;
+ 
+-	switch (alg) {
+-		case ENCTYPE_DES_CBC_RAW:
+-			alg_name = "cbc(des)";
+-			break;
+-		default:
+-			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
+-			goto out_err_free_key;
+-	}
+-	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
++	*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++							CRYPTO_ALG_ASYNC);
+ 	if (IS_ERR(*res)) {
+-		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
++		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
++			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+ 		*res = NULL;
+ 		goto out_err_free_key;
+ 	}
+ 	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
+-		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
++		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
++			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+ 		goto out_err_free_tfm;
+ 	}
+ 
+@@ -123,56 +270,55 @@ out_err:
+ }
+ 
+ static int
+-gss_import_sec_context_kerberos(const void *p,
+-				size_t len,
+-				struct gss_ctx *ctx_id)
++gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
+ {
+-	const void *end = (const void *)((const char *)p + len);
+-	struct	krb5_ctx *ctx;
+ 	int tmp;
+ 
+-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) {
+-		p = ERR_PTR(-ENOMEM);
+-		goto out_err;
+-	}
+-
+ 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
++
++	/* Old format supports only DES!  Any other enctype uses new format */
++	ctx->enctype = ENCTYPE_DES_CBC_RAW;
++
++	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
++	if (ctx->gk5e == NULL)
++		goto out_err;
++
+ 	/* The downcall format was designed before we completely understood
+ 	 * the uses of the context fields; so it includes some stuff we
+ 	 * just give some minimal sanity-checking, and some we ignore
+ 	 * completely (like the next twenty bytes): */
+ 	if (unlikely(p + 20 > end || p + 20 < p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p += 20;
+ 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	if (tmp != SGN_ALG_DES_MAC_MD5) {
+ 		p = ERR_PTR(-ENOSYS);
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	}
+ 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	if (tmp != SEAL_ALG_DES) {
+ 		p = ERR_PTR(-ENOSYS);
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	}
+ 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p = simple_get_netobj(p, end, &ctx->mech_used);
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
+-	p = get_key(p, end, &ctx->enc);
++		goto out_err;
++	p = get_key(p, end, ctx, &ctx->enc);
+ 	if (IS_ERR(p))
+ 		goto out_err_free_mech;
+-	p = get_key(p, end, &ctx->seq);
++	p = get_key(p, end, ctx, &ctx->seq);
+ 	if (IS_ERR(p))
+ 		goto out_err_free_key1;
+ 	if (p != end) {
+@@ -180,9 +326,6 @@ gss_import_sec_context_kerberos(const vo
+ 		goto out_err_free_key2;
+ 	}
+ 
+-	ctx_id->internal_ctx_id = ctx;
+-
+-	dprintk("RPC:       Successfully imported new context.\n");
+ 	return 0;
+ 
+ out_err_free_key2:
+@@ -191,18 +334,378 @@ out_err_free_key1:
+ 	crypto_free_blkcipher(ctx->enc);
+ out_err_free_mech:
+ 	kfree(ctx->mech_used.data);
+-out_err_free_ctx:
+-	kfree(ctx);
+ out_err:
+ 	return PTR_ERR(p);
+ }
+ 
++struct crypto_blkcipher *
++context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
++{
++	struct crypto_blkcipher *cp;
++
++	cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cp)) {
++		dprintk("gss_kerberos_mech: unable to initialize "
++			"crypto algorithm %s\n", cname);
++		return NULL;
++	}
++	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
++		dprintk("gss_kerberos_mech: error setting key for "
++			"crypto algorithm %s\n", cname);
++		crypto_free_blkcipher(cp);
++		return NULL;
++	}
++	return cp;
++}
++
++static inline void
++set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
++{
++	cdata[0] = (usage>>24)&0xff;
++	cdata[1] = (usage>>16)&0xff;
++	cdata[2] = (usage>>8)&0xff;
++	cdata[3] = usage&0xff;
++	cdata[4] = seed;
++}
++
++static int
++context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
++{
++	struct xdr_netobj c, keyin, keyout;
++	u8 cdata[GSS_KRB5_K5CLENGTH];
++	u32 err;
++
++	c.len = GSS_KRB5_K5CLENGTH;
++	c.data = cdata;
++
++	keyin.data = ctx->Ksess;
++	keyin.len = ctx->gk5e->keylength;
++	keyout.len = ctx->gk5e->keylength;
++
++	/* seq uses the raw key */
++	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
++					   ctx->Ksess);
++	if (ctx->seq == NULL)
++		goto out_err;
++
++	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
++					   ctx->Ksess);
++	if (ctx->enc == NULL)
++		goto out_free_seq;
++
++	/* derive cksum */
++	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->cksum;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving cksum key\n",
++			__func__, err);
++		goto out_free_enc;
++	}
++
++	return 0;
++
++out_free_enc:
++	crypto_free_blkcipher(ctx->enc);
++out_free_seq:
++	crypto_free_blkcipher(ctx->seq);
++out_err:
++	return -EINVAL;
++}
++
++/*
++ * Note that RC4 depends on deriving keys using the sequence
++ * number or the checksum of a token.  Therefore, the final keys
++ * cannot be calculated until the token is being constructed!
++ */
++static int
++context_derive_keys_rc4(struct krb5_ctx *ctx)
++{
++	struct crypto_hash *hmac;
++	char sigkeyconstant[] = "signaturekey";
++	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	int err;
++
++	dprintk("RPC:       %s: entered\n", __func__);
++	/*
++	 * derive cksum (aka Ksign) key
++	 */
++	hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
++		err = PTR_ERR(hmac);
++		goto out_err;
++	}
++
++	err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
++	if (err)
++		goto out_err_free_hmac;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, sigkeyconstant, slen);
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err_free_hmac;
++
++	err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
++	if (err)
++		goto out_err_free_hmac;
++	/*
++	 * allocate hash, and blkciphers for data and seqnum encryption
++	 */
++	ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++					  CRYPTO_ALG_ASYNC);
++	if (IS_ERR(ctx->enc)) {
++		err = PTR_ERR(ctx->enc);
++		goto out_err_free_hmac;
++	}
++
++	ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++					  CRYPTO_ALG_ASYNC);
++	if (IS_ERR(ctx->seq)) {
++		crypto_free_blkcipher(ctx->enc);
++		err = PTR_ERR(ctx->seq);
++		goto out_err_free_hmac;
++	}
++
++	dprintk("RPC:       %s: returning success\n", __func__);
++
++	err = 0;
++
++out_err_free_hmac:
++	crypto_free_hash(hmac);
++out_err:
++	dprintk("RPC:       %s: returning %d\n", __func__, err);
++	return err;
++}
++
++static int
++context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
++{
++	struct xdr_netobj c, keyin, keyout;
++	u8 cdata[GSS_KRB5_K5CLENGTH];
++	u32 err;
++
++	c.len = GSS_KRB5_K5CLENGTH;
++	c.data = cdata;
++
++	keyin.data = ctx->Ksess;
++	keyin.len = ctx->gk5e->keylength;
++	keyout.len = ctx->gk5e->keylength;
++
++	/* initiator seal encryption */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
++	keyout.data = ctx->initiator_seal;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_seal key\n",
++			__func__, err);
++		goto out_err;
++	}
++	ctx->initiator_enc = context_v2_alloc_cipher(ctx,
++						     ctx->gk5e->encrypt_name,
++						     ctx->initiator_seal);
++	if (ctx->initiator_enc == NULL)
++		goto out_err;
++
++	/* acceptor seal encryption */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
++	keyout.data = ctx->acceptor_seal;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_seal key\n",
++			__func__, err);
++		goto out_free_initiator_enc;
++	}
++	ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
++						    ctx->gk5e->encrypt_name,
++						    ctx->acceptor_seal);
++	if (ctx->acceptor_enc == NULL)
++		goto out_free_initiator_enc;
++
++	/* initiator sign checksum */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->initiator_sign;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_sign key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* acceptor sign checksum */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->acceptor_sign;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_sign key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* initiator seal integrity */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
++	keyout.data = ctx->initiator_integ;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_integ key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* acceptor seal integrity */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
++	keyout.data = ctx->acceptor_integ;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_integ key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	switch (ctx->enctype) {
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		ctx->initiator_enc_aux =
++			context_v2_alloc_cipher(ctx, "cbc(aes)",
++						ctx->initiator_seal);
++		if (ctx->initiator_enc_aux == NULL)
++			goto out_free_acceptor_enc;
++		ctx->acceptor_enc_aux =
++			context_v2_alloc_cipher(ctx, "cbc(aes)",
++						ctx->acceptor_seal);
++		if (ctx->acceptor_enc_aux == NULL) {
++			crypto_free_blkcipher(ctx->initiator_enc_aux);
++			goto out_free_acceptor_enc;
++		}
++	}
++
++	return 0;
++
++out_free_acceptor_enc:
++	crypto_free_blkcipher(ctx->acceptor_enc);
++out_free_initiator_enc:
++	crypto_free_blkcipher(ctx->initiator_enc);
++out_err:
++	return -EINVAL;
++}
++
++static int
++gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
++		gfp_t gfp_mask)
++{
++	int keylen;
++
++	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
++	if (IS_ERR(p))
++		goto out_err;
++	ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
++
++	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
++	if (IS_ERR(p))
++		goto out_err;
++	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
++	if (IS_ERR(p))
++		goto out_err;
++	/* set seq_send for use by "older" enctypes */
++	ctx->seq_send = ctx->seq_send64;
++	if (ctx->seq_send64 != ctx->seq_send) {
++		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
++			(long unsigned)ctx->seq_send64, ctx->seq_send);
++		goto out_err;
++	}
++	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
++	if (IS_ERR(p))
++		goto out_err;
++	/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
++	if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
++		ctx->enctype = ENCTYPE_DES3_CBC_RAW;
++	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
++	if (ctx->gk5e == NULL) {
++		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
++			ctx->enctype);
++		p = ERR_PTR(-EINVAL);
++		goto out_err;
++	}
++	keylen = ctx->gk5e->keylength;
++
++	p = simple_get_bytes(p, end, ctx->Ksess, keylen);
++	if (IS_ERR(p))
++		goto out_err;
++
++	if (p != end) {
++		p = ERR_PTR(-EINVAL);
++		goto out_err;
++	}
++
++	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
++				      gss_kerberos_mech.gm_oid.len, gfp_mask);
++	if (unlikely(ctx->mech_used.data == NULL)) {
++		p = ERR_PTR(-ENOMEM);
++		goto out_err;
++	}
++	ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
++
++	switch (ctx->enctype) {
++	case ENCTYPE_DES3_CBC_RAW:
++		return context_derive_keys_des3(ctx, gfp_mask);
++	case ENCTYPE_ARCFOUR_HMAC:
++		return context_derive_keys_rc4(ctx);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return context_derive_keys_new(ctx, gfp_mask);
++	default:
++		return -EINVAL;
++	}
++
++out_err:
++	return PTR_ERR(p);
++}
++
++static int
++gss_import_sec_context_kerberos(const void *p, size_t len,
++				struct gss_ctx *ctx_id,
++				gfp_t gfp_mask)
++{
++	const void *end = (const void *)((const char *)p + len);
++	struct  krb5_ctx *ctx;
++	int ret;
++
++	ctx = kzalloc(sizeof(*ctx), gfp_mask);
++	if (ctx == NULL)
++		return -ENOMEM;
++
++	if (len == 85)
++		ret = gss_import_v1_context(p, end, ctx);
++	else
++		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
++
++	if (ret == 0)
++		ctx_id->internal_ctx_id = ctx;
++	else
++		kfree(ctx);
++
++	dprintk("RPC:       %s: returning %d\n", __func__, ret);
++	return ret;
++}
++
+ static void
+ gss_delete_sec_context_kerberos(void *internal_ctx) {
+ 	struct krb5_ctx *kctx = internal_ctx;
+ 
+ 	crypto_free_blkcipher(kctx->seq);
+ 	crypto_free_blkcipher(kctx->enc);
++	crypto_free_blkcipher(kctx->acceptor_enc);
++	crypto_free_blkcipher(kctx->initiator_enc);
++	crypto_free_blkcipher(kctx->acceptor_enc_aux);
++	crypto_free_blkcipher(kctx->initiator_enc_aux);
+ 	kfree(kctx->mech_used.data);
+ 	kfree(kctx);
+ }
+@@ -241,6 +744,7 @@ static struct gss_api_mech gss_kerberos_
+ 	.gm_ops		= &gss_kerberos_ops,
+ 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
+ 	.gm_pfs		= gss_kerberos_pfs,
++	.gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ",
+ };
+ 
+ static int __init init_kerberos_module(void)
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c	2010-08-23 11:01:00.392564136 -0400
+@@ -3,7 +3,7 @@
+  *
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson	<andros@umich.edu>
+@@ -70,53 +70,154 @@
+ 
+ DEFINE_SPINLOCK(krb5_seq_lock);
+ 
+-u32
+-gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
++static char *
++setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
++{
++	__be16 *ptr, *krb5_hdr;
++	int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
++
++	token->len = g_token_size(&ctx->mech_used, body_size);
++
++	ptr = (__be16 *)token->data;
++	g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
++
++	/* ptr now at start of header described in rfc 1964, section 1.2.1: */
++	krb5_hdr = ptr;
++	*ptr++ = KG_TOK_MIC_MSG;
++	*ptr++ = cpu_to_le16(ctx->gk5e->signalg);
++	*ptr++ = SEAL_ALG_NONE;
++	*ptr++ = 0xffff;
++
++	return (char *)krb5_hdr;
++}
++
++static void *
++setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
++{
++	__be16 *ptr, *krb5_hdr;
++	u8 *p, flags = 0x00;
++
++	if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
++		flags |= 0x01;
++	if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
++		flags |= 0x04;
++
++	/* Per rfc 4121, sec 4.2.6.1, there is no header,
++	 * just start the token */
++	krb5_hdr = ptr = (__be16 *)token->data;
++
++	*ptr++ = KG2_TOK_MIC;
++	p = (u8 *)ptr;
++	*p++ = flags;
++	*p++ = 0xff;
++	ptr = (__be16 *)p;
++	*ptr++ = 0xffff;
++	*ptr++ = 0xffff;
++
++	token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
++	return krb5_hdr;
++}
++
++static u32
++gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
+ 		struct xdr_netobj *token)
+ {
+-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+-	unsigned char		*ptr, *msg_start;
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
++	void			*ptr;
+ 	s32			now;
+ 	u32			seq_send;
++	u8			*cksumkey;
+ 
+-	dprintk("RPC:       gss_krb5_seal\n");
++	dprintk("RPC:       %s\n", __func__);
+ 	BUG_ON(ctx == NULL);
+ 
+ 	now = get_seconds();
+ 
+-	token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
++	ptr = setup_token(ctx, token);
+ 
+-	ptr = token->data;
+-	g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
++	if (ctx->gk5e->keyed_cksum)
++		cksumkey = ctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+-	/* ptr now at header described in rfc 1964, section 1.2.1: */
+-	ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
+-	ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
++	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
++			  KG_USAGE_SIGN, &md5cksum))
++		return GSS_S_FAILURE;
+ 
+-	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
++	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+ 
+-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+-	memset(ptr + 4, 0xff, 4);
++	spin_lock(&krb5_seq_lock);
++	seq_send = ctx->seq_send++;
++	spin_unlock(&krb5_seq_lock);
+ 
+-	if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
++	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
++			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
+ 		return GSS_S_FAILURE;
+ 
+-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+-			  md5cksum.data, md5cksum.len))
+-		return GSS_S_FAILURE;
++	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
++}
++
++u32
++gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
++		struct xdr_netobj *token)
++{
++	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
++				       .data = cksumdata};
++	void *krb5_hdr;
++	s32 now;
++	u64 seq_send;
++	u8 *cksumkey;
++	unsigned int cksum_usage;
++
++	dprintk("RPC:       %s\n", __func__);
+ 
+-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
++	krb5_hdr = setup_token_v2(ctx, token);
+ 
++	/* Set up the sequence number. Now 64-bits in clear
++	 * text and w/o direction indicator */
+ 	spin_lock(&krb5_seq_lock);
+-	seq_send = ctx->seq_send++;
++	seq_send = ctx->seq_send64++;
+ 	spin_unlock(&krb5_seq_lock);
++	*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+ 
+-	if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
+-			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
+-			      ptr + 8))
++	if (ctx->initiate) {
++		cksumkey = ctx->initiator_sign;
++		cksum_usage = KG_USAGE_INITIATOR_SIGN;
++	} else {
++		cksumkey = ctx->acceptor_sign;
++		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
++	}
++
++	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
++			     text, 0, cksumkey, cksum_usage, &cksumobj))
+ 		return GSS_S_FAILURE;
+ 
++	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
++
++	now = get_seconds();
++
+ 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+ }
++
++u32
++gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
++		     struct xdr_netobj *token)
++{
++	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
++
++	switch (ctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_get_mic_v1(ctx, text, token);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_get_mic_v2(ctx, text, token);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c	2010-08-23 11:01:00.393496180 -0400
+@@ -39,14 +39,51 @@
+ # define RPCDBG_FACILITY        RPCDBG_AUTH
+ #endif
+ 
++static s32
++krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
++		      unsigned char *cksum, unsigned char *buf)
++{
++	struct crypto_blkcipher *cipher;
++	unsigned char plain[8];
++	s32 code;
++
++	dprintk("RPC:       %s:\n", __func__);
++	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		return PTR_ERR(cipher);
++
++	plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
++	plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
++	plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
++	plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
++	plain[4] = direction;
++	plain[5] = direction;
++	plain[6] = direction;
++	plain[7] = direction;
++
++	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
++	if (code)
++		goto out;
++
++	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
++out:
++	crypto_free_blkcipher(cipher);
++	return code;
++}
+ s32
+-krb5_make_seq_num(struct crypto_blkcipher *key,
++krb5_make_seq_num(struct krb5_ctx *kctx,
++		struct crypto_blkcipher *key,
+ 		int direction,
+ 		u32 seqnum,
+ 		unsigned char *cksum, unsigned char *buf)
+ {
+ 	unsigned char plain[8];
+ 
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
++		return krb5_make_rc4_seq_num(kctx, direction, seqnum,
++					     cksum, buf);
++
+ 	plain[0] = (unsigned char) (seqnum & 0xff);
+ 	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
+ 	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
+@@ -60,17 +97,59 @@ krb5_make_seq_num(struct crypto_blkciphe
+ 	return krb5_encrypt(key, cksum, plain, buf, 8);
+ }
+ 
++static s32
++krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
++		     unsigned char *buf, int *direction, s32 *seqnum)
++{
++	struct crypto_blkcipher *cipher;
++	unsigned char plain[8];
++	s32 code;
++
++	dprintk("RPC:       %s:\n", __func__);
++	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		return PTR_ERR(cipher);
++
++	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
++	if (code)
++		goto out;
++
++	code = krb5_decrypt(cipher, cksum, buf, plain, 8);
++	if (code)
++		goto out;
++
++	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
++				   || (plain[4] != plain[7])) {
++		code = (s32)KG_BAD_SEQ;
++		goto out;
++	}
++
++	*direction = plain[4];
++
++	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
++					(plain[2] << 8) | (plain[3]));
++out:
++	crypto_free_blkcipher(cipher);
++	return code;
++}
++
+ s32
+-krb5_get_seq_num(struct crypto_blkcipher *key,
++krb5_get_seq_num(struct krb5_ctx *kctx,
+ 	       unsigned char *cksum,
+ 	       unsigned char *buf,
+ 	       int *direction, u32 *seqnum)
+ {
+ 	s32 code;
+ 	unsigned char plain[8];
++	struct crypto_blkcipher *key = kctx->seq;
+ 
+ 	dprintk("RPC:       krb5_get_seq_num:\n");
+ 
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
++		return krb5_get_rc4_seq_num(kctx, cksum, buf,
++					    direction, seqnum);
++
+ 	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
+ 		return code;
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c	2010-08-23 11:01:00.393496180 -0400
+@@ -3,7 +3,7 @@
+  *
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -70,20 +70,21 @@
+ /* read_token is a mic token, and message_buffer is the data that the mic was
+  * supposedly taken over. */
+ 
+-u32
+-gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
++static u32
++gss_verify_mic_v1(struct krb5_ctx *ctx,
+ 		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+ {
+-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+ 	int			signalg;
+ 	int			sealalg;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	s32			now;
+ 	int			direction;
+ 	u32			seqnum;
+ 	unsigned char		*ptr = (unsigned char *)read_token->data;
+ 	int			bodysize;
++	u8			*cksumkey;
+ 
+ 	dprintk("RPC:       krb5_read_token\n");
+ 
+@@ -98,7 +99,7 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 	/* XXX sanity-check bodysize?? */
+ 
+ 	signalg = ptr[2] + (ptr[3] << 8);
+-	if (signalg != SGN_ALG_DES_MAC_MD5)
++	if (signalg != ctx->gk5e->signalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	sealalg = ptr[4] + (ptr[5] << 8);
+@@ -108,13 +109,17 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+-	if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
+-		return GSS_S_FAILURE;
++	if (ctx->gk5e->keyed_cksum)
++		cksumkey = ctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
++	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
++			  cksumkey, KG_USAGE_SIGN, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 
+-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
++	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++					ctx->gk5e->cksumlength))
+ 		return GSS_S_BAD_SIG;
+ 
+ 	/* it got through unscathed.  Make sure the context is unexpired */
+@@ -126,7 +131,8 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 
+ 	/* do sequencing checks */
+ 
+-	if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
++	if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
++			     &direction, &seqnum))
+ 		return GSS_S_FAILURE;
+ 
+ 	if ((ctx->initiate && direction != 0xff) ||
+@@ -135,3 +141,86 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 
+ 	return GSS_S_COMPLETE;
+ }
++
++static u32
++gss_verify_mic_v2(struct krb5_ctx *ctx,
++		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
++{
++	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
++				      .data = cksumdata};
++	s32 now;
++	u64 seqnum;
++	u8 *ptr = read_token->data;
++	u8 *cksumkey;
++	u8 flags;
++	int i;
++	unsigned int cksum_usage;
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	flags = ptr[2];
++	if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
++	    (ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
++		return GSS_S_BAD_SIG;
++
++	if (flags & KG2_TOKEN_FLAG_SEALED) {
++		dprintk("%s: token has unexpected sealed flag\n", __func__);
++		return GSS_S_FAILURE;
++	}
++
++	for (i = 3; i < 8; i++)
++		if (ptr[i] != 0xff)
++			return GSS_S_DEFECTIVE_TOKEN;
++
++	if (ctx->initiate) {
++		cksumkey = ctx->acceptor_sign;
++		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
++	} else {
++		cksumkey = ctx->initiator_sign;
++		cksum_usage = KG_USAGE_INITIATOR_SIGN;
++	}
++
++	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
++			     cksumkey, cksum_usage, &cksumobj))
++		return GSS_S_FAILURE;
++
++	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++				ctx->gk5e->cksumlength))
++		return GSS_S_BAD_SIG;
++
++	/* it got through unscathed.  Make sure the context is unexpired */
++	now = get_seconds();
++	if (now > ctx->endtime)
++		return GSS_S_CONTEXT_EXPIRED;
++
++	/* do sequencing checks */
++
++	seqnum = be64_to_cpup((__be64 *)ptr + 8);
++
++	return GSS_S_COMPLETE;
++}
++
++u32
++gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
++			struct xdr_buf *message_buffer,
++			struct xdr_netobj *read_token)
++{
++	struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
++
++	switch (ctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_verify_mic_v1(ctx, message_buffer, read_token);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_verify_mic_v2(ctx, message_buffer, read_token);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c	2010-08-23 11:01:00.394576083 -0400
+@@ -1,3 +1,33 @@
++/*
++ * COPYRIGHT (c) 2008
++ * The Regents of the University of Michigan
++ * ALL RIGHTS RESERVED
++ *
++ * Permission is granted to use, copy, create derivative works
++ * and redistribute this software and such derivative works
++ * for any purpose, so long as the name of The University of
++ * Michigan is not used in any advertising or publicity
++ * pertaining to the use of distribution of this software
++ * without specific, written prior authorization.  If the
++ * above copyright notice or any other identification of the
++ * University of Michigan is included in any copy of any
++ * portion of this software, then the disclaimer below must
++ * also be included.
++ *
++ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
++ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
++ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
++ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
++ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
++ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
++ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
++ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
++ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGES.
++ */
++
+ #include <linux/types.h>
+ #include <linux/jiffies.h>
+ #include <linux/sunrpc/gss_krb5.h>
+@@ -12,10 +42,7 @@
+ static inline int
+ gss_krb5_padding(int blocksize, int length)
+ {
+-	/* Most of the code is block-size independent but currently we
+-	 * use only 8: */
+-	BUG_ON(blocksize != 8);
+-	return 8 - (length & 7);
++	return blocksize - (length % blocksize);
+ }
+ 
+ static inline void
+@@ -86,8 +113,8 @@ out:
+ 	return 0;
+ }
+ 
+-static void
+-make_confounder(char *p, u32 conflen)
++void
++gss_krb5_make_confounder(char *p, u32 conflen)
+ {
+ 	static u64 i = 0;
+ 	u64 *q = (u64 *)p;
+@@ -127,69 +154,73 @@ make_confounder(char *p, u32 conflen)
+ 
+ /* XXX factor out common code with seal/unseal. */
+ 
+-u32
+-gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
++static u32
++gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
+ 		struct xdr_buf *buf, struct page **pages)
+ {
+-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	int			blocksize = 0, plainlen;
+ 	unsigned char		*ptr, *msg_start;
+ 	s32			now;
+ 	int			headlen;
+ 	struct page		**tmp_pages;
+ 	u32			seq_send;
++	u8			*cksumkey;
++	u32			conflen = kctx->gk5e->conflen;
+ 
+-	dprintk("RPC:       gss_wrap_kerberos\n");
++	dprintk("RPC:       %s\n", __func__);
+ 
+ 	now = get_seconds();
+ 
+ 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+ 	gss_krb5_add_padding(buf, offset, blocksize);
+ 	BUG_ON((buf->len - offset) % blocksize);
+-	plainlen = blocksize + buf->len - offset;
++	plainlen = conflen + buf->len - offset;
+ 
+-	headlen = g_token_size(&kctx->mech_used, 24 + plainlen) -
+-						(buf->len - offset);
++	headlen = g_token_size(&kctx->mech_used,
++		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
++		(buf->len - offset);
+ 
+ 	ptr = buf->head[0].iov_base + offset;
+ 	/* shift data to make room for header. */
++	xdr_extend_head(buf, offset, headlen);
++
+ 	/* XXX Would be cleverer to encrypt while copying. */
+-	/* XXX bounds checking, slack, etc. */
+-	memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
+-	buf->head[0].iov_len += headlen;
+-	buf->len += headlen;
+ 	BUG_ON((buf->len - offset - headlen) % blocksize);
+ 
+ 	g_make_token_header(&kctx->mech_used,
+-				GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
++				GSS_KRB5_TOK_HDR_LEN +
++				kctx->gk5e->cksumlength + plainlen, &ptr);
+ 
+ 
+ 	/* ptr now at header described in rfc 1964, section 1.2.1: */
+ 	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
+ 	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
+ 
+-	msg_start = ptr + 24;
++	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
+ 
+-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
++	*(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
+ 	memset(ptr + 4, 0xff, 4);
+-	*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
++	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
+ 
+-	make_confounder(msg_start, blocksize);
++	gss_krb5_make_confounder(msg_start, conflen);
++
++	if (kctx->gk5e->keyed_cksum)
++		cksumkey = kctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+ 	/* XXXJBF: UGH!: */
+ 	tmp_pages = buf->pages;
+ 	buf->pages = pages;
+-	if (make_checksum("md5", ptr, 8, buf,
+-				offset + headlen - blocksize, &md5cksum))
++	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
++					cksumkey, KG_USAGE_SEAL, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 	buf->pages = tmp_pages;
+ 
+-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+-			  md5cksum.data, md5cksum.len))
+-		return GSS_S_FAILURE;
+-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
++	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+ 
+ 	spin_lock(&krb5_seq_lock);
+ 	seq_send = kctx->seq_send++;
+@@ -197,25 +228,42 @@ gss_wrap_kerberos(struct gss_ctx *ctx, i
+ 
+ 	/* XXX would probably be more efficient to compute checksum
+ 	 * and encrypt at the same time: */
+-	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
++	if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
+ 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
+ 		return GSS_S_FAILURE;
+ 
+-	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
+-									pages))
+-		return GSS_S_FAILURE;
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
++		struct crypto_blkcipher *cipher;
++		int err;
++		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++						CRYPTO_ALG_ASYNC);
++		if (IS_ERR(cipher))
++			return GSS_S_FAILURE;
++
++		krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
++
++		err = gss_encrypt_xdr_buf(cipher, buf,
++					  offset + headlen - conflen, pages);
++		crypto_free_blkcipher(cipher);
++		if (err)
++			return GSS_S_FAILURE;
++	} else {
++		if (gss_encrypt_xdr_buf(kctx->enc, buf,
++					offset + headlen - conflen, pages))
++			return GSS_S_FAILURE;
++	}
+ 
+ 	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+ }
+ 
+-u32
+-gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
++static u32
++gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+ {
+-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+ 	int			signalg;
+ 	int			sealalg;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	s32			now;
+ 	int			direction;
+ 	s32			seqnum;
+@@ -224,6 +272,9 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 	void			*data_start, *orig_start;
+ 	int			data_len;
+ 	int			blocksize;
++	u32			conflen = kctx->gk5e->conflen;
++	int			crypt_offset;
++	u8			*cksumkey;
+ 
+ 	dprintk("RPC:       gss_unwrap_kerberos\n");
+ 
+@@ -241,29 +292,65 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 	/* get the sign and seal algorithms */
+ 
+ 	signalg = ptr[2] + (ptr[3] << 8);
+-	if (signalg != SGN_ALG_DES_MAC_MD5)
++	if (signalg != kctx->gk5e->signalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	sealalg = ptr[4] + (ptr[5] << 8);
+-	if (sealalg != SEAL_ALG_DES)
++	if (sealalg != kctx->gk5e->sealalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+-	if (gss_decrypt_xdr_buf(kctx->enc, buf,
+-			ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
+-		return GSS_S_DEFECTIVE_TOKEN;
++	/*
++	 * Data starts after token header and checksum.  ptr points
++	 * to the beginning of the token header
++	 */
++	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
++					(unsigned char *)buf->head[0].iov_base;
++
++	/*
++	 * Need plaintext seqnum to derive encryption key for arcfour-hmac
++	 */
++	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
++			     ptr + 8, &direction, &seqnum))
++		return GSS_S_BAD_SIG;
+ 
+-	if (make_checksum("md5", ptr, 8, buf,
+-		 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+-		return GSS_S_FAILURE;
++	if ((kctx->initiate && direction != 0xff) ||
++	    (!kctx->initiate && direction != 0))
++		return GSS_S_BAD_SIG;
++
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
++		struct crypto_blkcipher *cipher;
++		int err;
++
++		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++						CRYPTO_ALG_ASYNC);
++		if (IS_ERR(cipher))
++			return GSS_S_FAILURE;
++
++		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
++
++		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
++		crypto_free_blkcipher(cipher);
++		if (err)
++			return GSS_S_DEFECTIVE_TOKEN;
++	} else {
++		if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
++			return GSS_S_DEFECTIVE_TOKEN;
++	}
+ 
+-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+-			   md5cksum.data, md5cksum.len))
++	if (kctx->gk5e->keyed_cksum)
++		cksumkey = kctx->cksum;
++	else
++		cksumkey = NULL;
++
++	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
++					cksumkey, KG_USAGE_SEAL, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 
+-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
++	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++						kctx->gk5e->cksumlength))
+ 		return GSS_S_BAD_SIG;
+ 
+ 	/* it got through unscathed.  Make sure the context is unexpired */
+@@ -275,19 +362,12 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 
+ 	/* do sequencing checks */
+ 
+-	if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+-				    &direction, &seqnum))
+-		return GSS_S_BAD_SIG;
+-
+-	if ((kctx->initiate && direction != 0xff) ||
+-	    (!kctx->initiate && direction != 0))
+-		return GSS_S_BAD_SIG;
+-
+ 	/* Copy the data back to the right position.  XXX: Would probably be
+ 	 * better to copy and encrypt at the same time. */
+ 
+ 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+-	data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
++	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
++					conflen;
+ 	orig_start = buf->head[0].iov_base + offset;
+ 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
+ 	memmove(orig_start, data_start, data_len);
+@@ -299,3 +379,209 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 
+ 	return GSS_S_COMPLETE;
+ }
++
++/*
++ * We cannot currently handle tokens with rotated data.  We need a
++ * generalized routine to rotate the data in place.  It is anticipated
++ * that we won't encounter rotated data in the general case.
++ */
++static u32
++rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc)
++{
++	unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN);
++
++	if (realrrc == 0)
++		return 0;
++
++	dprintk("%s: cannot process token with rotated data: "
++		"rrc %u, realrrc %u\n", __func__, rrc, realrrc);
++	return 1;
++}
++
++static u32
++gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, struct page **pages)
++{
++	int		blocksize;
++	u8		*ptr, *plainhdr;
++	s32		now;
++	u8		flags = 0x00;
++	__be16		*be16ptr, ec = 0;
++	__be64		*be64ptr;
++	u32		err;
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (kctx->gk5e->encrypt_v2 == NULL)
++		return GSS_S_FAILURE;
++
++	/* make room for gss token header */
++	if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
++		return GSS_S_FAILURE;
++
++	/* construct gss token header */
++	ptr = plainhdr = buf->head[0].iov_base + offset;
++	*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
++	*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
++
++	if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
++		flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
++	if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
++		flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
++	/* We always do confidentiality in wrap tokens */
++	flags |= KG2_TOKEN_FLAG_SEALED;
++
++	*ptr++ = flags;
++	*ptr++ = 0xff;
++	be16ptr = (__be16 *)ptr;
++
++	blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
++	*be16ptr++ = cpu_to_be16(ec);
++	/* "inner" token header always uses 0 for RRC */
++	*be16ptr++ = cpu_to_be16(0);
++
++	be64ptr = (__be64 *)be16ptr;
++	spin_lock(&krb5_seq_lock);
++	*be64ptr = cpu_to_be64(kctx->seq_send64++);
++	spin_unlock(&krb5_seq_lock);
++
++	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages);
++	if (err)
++		return err;
++
++	now = get_seconds();
++	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
++}
++
++static u32
++gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
++{
++	s32		now;
++	u64		seqnum;
++	u8		*ptr;
++	u8		flags = 0x00;
++	u16		ec, rrc;
++	int		err;
++	u32		headskip, tailskip;
++	u8		decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
++	unsigned int	movelen;
++
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (kctx->gk5e->decrypt_v2 == NULL)
++		return GSS_S_FAILURE;
++
++	ptr = buf->head[0].iov_base + offset;
++
++	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	flags = ptr[2];
++	if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
++	    (kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
++		return GSS_S_BAD_SIG;
++
++	if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
++		dprintk("%s: token missing expected sealed flag\n", __func__);
++		return GSS_S_DEFECTIVE_TOKEN;
++	}
++
++	if (ptr[3] != 0xff)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	ec = be16_to_cpup((__be16 *)(ptr + 4));
++	rrc = be16_to_cpup((__be16 *)(ptr + 6));
++
++	seqnum = be64_to_cpup((__be64 *)(ptr + 8));
++
++	if (rrc != 0) {
++		err = rotate_left(kctx, offset, buf, rrc);
++		if (err)
++			return GSS_S_FAILURE;
++	}
++
++	err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
++					&headskip, &tailskip);
++	if (err)
++		return GSS_S_FAILURE;
++
++	/*
++	 * Retrieve the decrypted gss token header and verify
++	 * it against the original
++	 */
++	err = read_bytes_from_xdr_buf(buf,
++				buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
++				decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
++	if (err) {
++		dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
++		return GSS_S_FAILURE;
++	}
++	if (memcmp(ptr, decrypted_hdr, 6)
++				|| memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
++		dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
++		return GSS_S_FAILURE;
++	}
++
++	/* do sequencing checks */
++
++	/* it got through unscathed.  Make sure the context is unexpired */
++	now = get_seconds();
++	if (now > kctx->endtime)
++		return GSS_S_CONTEXT_EXPIRED;
++
++	/*
++	 * Move the head data back to the right position in xdr_buf.
++	 * We ignore any "ec" data since it might be in the head or
++	 * the tail, and we really don't need to deal with it.
++	 * Note that buf->head[0].iov_len may indicate the available
++	 * head buffer space rather than that actually occupied.
++	 */
++	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
++	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
++	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
++							buf->head[0].iov_len);
++	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
++	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
++	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
++
++	return GSS_S_COMPLETE;
++}
++
++u32
++gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
++		  struct xdr_buf *buf, struct page **pages)
++{
++	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
++
++	switch (kctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
++	}
++}
++
++u32
++gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
++{
++	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
++
++	switch (kctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_unwrap_kerberos_v1(kctx, offset, buf);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_unwrap_kerberos_v2(kctx, offset, buf);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c	2010-08-23 11:01:00.395574706 -0400
+@@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put);
+ int
+ gss_import_sec_context(const void *input_token, size_t bufsize,
+ 		       struct gss_api_mech	*mech,
+-		       struct gss_ctx		**ctx_id)
++		       struct gss_ctx		**ctx_id,
++		       gfp_t gfp_mask)
+ {
+-	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
++	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
+ 		return -ENOMEM;
+ 	(*ctx_id)->mech_type = gss_mech_get(mech);
+ 
+ 	return mech->gm_ops
+-		->gss_import_sec_context(input_token, bufsize, *ctx_id);
++		->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
+ }
+ 
+ /* gss_get_mic: compute a mic over message and return mic_token. */
+@@ -285,6 +286,20 @@ gss_verify_mic(struct gss_ctx		*context_
+ 				 mic_token);
+ }
+ 
++/*
++ * This function is called from both the client and server code.
++ * Each makes guarantees about how much "slack" space is available
++ * for the underlying function in "buf"'s head and tail while
++ * performing the wrap.
++ *
++ * The client and server code allocate RPC_MAX_AUTH_SIZE extra
++ * space in both the head and tail which is available for use by
++ * the wrap function.
++ *
++ * Underlying functions should verify they do not use more than
++ * RPC_MAX_AUTH_SIZE of extra space in either the head or tail
++ * when performing the wrap.
++ */
+ u32
+ gss_wrap(struct gss_ctx	*ctx_id,
+ 	 int		offset,
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c	2010-08-23 11:01:00.396574085 -0400
+@@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const v
+ 
+ static int
+ gss_import_sec_context_spkm3(const void *p, size_t len,
+-				struct gss_ctx *ctx_id)
++				struct gss_ctx *ctx_id,
++				gfp_t gfp_mask)
+ {
+ 	const void *end = (const void *)((const char *)p + len);
+ 	struct	spkm3_ctx *ctx;
+ 	int	version;
+ 
+-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
++	if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
+ 		goto out_err;
+ 
+ 	p = simple_get_bytes(p, end, &version, sizeof(version));
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile	2010-08-23 11:01:00.387574079 -0400
+@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_gener
+ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
+ 
+ rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+-	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
++	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+ 
+ obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c	2010-08-23 11:01:00.396574085 -0400
+@@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail
+ 		len = qword_get(&mesg, buf, mlen);
+ 		if (len < 0)
+ 			goto out;
+-		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
++		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
+ 		if (status)
+ 			goto out;
+ 
+@@ -1315,6 +1315,14 @@ svcauth_gss_wrap_resp_priv(struct svc_rq
+ 	inpages = resbuf->pages;
+ 	/* XXX: Would be better to write some xdr helper functions for
+ 	 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
++
++	/*
++	 * If there is currently tail data, make sure there is
++	 * room for the head, tail, and 2 * RPC_MAX_AUTH_SIZE in
++	 * the page, and move the current tail data such that
++	 * there is RPC_MAX_AUTH_SIZE slack space available in
++	 * both the head and tail.
++	 */
+ 	if (resbuf->tail[0].iov_base) {
+ 		BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base
+ 							+ PAGE_SIZE);
+@@ -1327,6 +1335,13 @@ svcauth_gss_wrap_resp_priv(struct svc_rq
+ 			resbuf->tail[0].iov_len);
+ 		resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
+ 	}
++	/*
++	 * If there is no current tail data, make sure there is
++	 * room for the head data, and 2 * RPC_MAX_AUTH_SIZE in the
++	 * allotted page, and set up tail information such that there
++	 * is RPC_MAX_AUTH_SIZE slack space available in both the
++	 * head and tail.
++	 */
+ 	if (resbuf->tail[0].iov_base == NULL) {
+ 		if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ 			return -ENOMEM;
+diff -up linux-2.6.34.noarch/net/sunrpc/clnt.c.orig linux-2.6.34.noarch/net/sunrpc/clnt.c
+--- linux-2.6.34.noarch/net/sunrpc/clnt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/clnt.c	2010-08-23 11:01:00.397622347 -0400
+@@ -556,26 +556,16 @@ static const struct rpc_call_ops rpc_def
+  */
+ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
+ {
+-	struct rpc_task *task, *ret;
++	struct rpc_task *task;
+ 
+ 	task = rpc_new_task(task_setup_data);
+-	if (task == NULL) {
+-		rpc_release_calldata(task_setup_data->callback_ops,
+-				task_setup_data->callback_data);
+-		ret = ERR_PTR(-ENOMEM);
++	if (IS_ERR(task))
+ 		goto out;
+-	}
+ 
+-	if (task->tk_status != 0) {
+-		ret = ERR_PTR(task->tk_status);
+-		rpc_put_task(task);
+-		goto out;
+-	}
+ 	atomic_inc(&task->tk_count);
+ 	rpc_execute(task);
+-	ret = task;
+ out:
+-	return ret;
++	return task;
+ }
+ EXPORT_SYMBOL_GPL(rpc_run_task);
+ 
+@@ -657,9 +647,8 @@ struct rpc_task *rpc_run_bc_task(struct 
+ 	 * Create an rpc_task to send the data
+ 	 */
+ 	task = rpc_new_task(&task_setup_data);
+-	if (!task) {
++	if (IS_ERR(task)) {
+ 		xprt_free_bc_request(req);
+-		task = ERR_PTR(-ENOMEM);
+ 		goto out;
+ 	}
+ 	task->tk_rqstp = req;
+diff -up linux-2.6.34.noarch/net/sunrpc/sched.c.orig linux-2.6.34.noarch/net/sunrpc/sched.c
+--- linux-2.6.34.noarch/net/sunrpc/sched.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/sched.c	2010-08-23 11:01:00.398564598 -0400
+@@ -25,7 +25,6 @@
+ 
+ #ifdef RPC_DEBUG
+ #define RPCDBG_FACILITY		RPCDBG_SCHED
+-#define RPC_TASK_MAGIC_ID	0xf00baa
+ #endif
+ 
+ /*
+@@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struc
+ {
+ 	static atomic_t rpc_pid;
+ 
+-	task->tk_magic = RPC_TASK_MAGIC_ID;
+ 	task->tk_pid = atomic_inc_return(&rpc_pid);
+ }
+ #else
+@@ -360,9 +358,6 @@ static void __rpc_do_wake_up_task(struct
+ 	dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
+ 			task->tk_pid, jiffies);
+ 
+-#ifdef RPC_DEBUG
+-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+-#endif
+ 	/* Has the task been executed yet? If not, we cannot wake it up! */
+ 	if (!RPC_IS_ACTIVATED(task)) {
+ 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
+@@ -834,7 +829,7 @@ static void rpc_init_task(struct rpc_tas
+ 	}
+ 
+ 	/* starting timestamp */
+-	task->tk_start = jiffies;
++	task->tk_start = ktime_get();
+ 
+ 	dprintk("RPC:       new task initialized, procpid %u\n",
+ 				task_pid_nr(current));
+@@ -856,16 +851,23 @@ struct rpc_task *rpc_new_task(const stru
+ 
+ 	if (task == NULL) {
+ 		task = rpc_alloc_task();
+-		if (task == NULL)
+-			goto out;
++		if (task == NULL) {
++			rpc_release_calldata(setup_data->callback_ops,
++					setup_data->callback_data);
++			return ERR_PTR(-ENOMEM);
++		}
+ 		flags = RPC_TASK_DYNAMIC;
+ 	}
+ 
+ 	rpc_init_task(task, setup_data);
++	if (task->tk_status < 0) {
++		int err = task->tk_status;
++		rpc_put_task(task);
++		return ERR_PTR(err);
++	}
+ 
+ 	task->tk_flags |= flags;
+ 	dprintk("RPC:       allocated task %p\n", task);
+-out:
+ 	return task;
+ }
+ 
+@@ -909,9 +911,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task);
+ 
+ static void rpc_release_task(struct rpc_task *task)
+ {
+-#ifdef RPC_DEBUG
+-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+-#endif
+ 	dprintk("RPC: %5u release task\n", task->tk_pid);
+ 
+ 	if (!list_empty(&task->tk_task)) {
+@@ -923,9 +922,6 @@ static void rpc_release_task(struct rpc_
+ 	}
+ 	BUG_ON (RPC_IS_QUEUED(task));
+ 
+-#ifdef RPC_DEBUG
+-	task->tk_magic = 0;
+-#endif
+ 	/* Wake up anyone who is waiting for task completion */
+ 	rpc_mark_complete_task(task);
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/stats.c.orig linux-2.6.34.noarch/net/sunrpc/stats.c
+--- linux-2.6.34.noarch/net/sunrpc/stats.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/stats.c	2010-08-23 11:01:00.399574225 -0400
+@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *
+ 	struct rpc_rqst *req = task->tk_rqstp;
+ 	struct rpc_iostats *stats;
+ 	struct rpc_iostats *op_metrics;
+-	long rtt, execute, queue;
++	ktime_t delta;
+ 
+ 	if (!task->tk_client || !task->tk_client->cl_metrics || !req)
+ 		return;
+@@ -156,23 +156,16 @@ void rpc_count_iostats(struct rpc_task *
+ 	op_metrics->om_ntrans += req->rq_ntrans;
+ 	op_metrics->om_timeouts += task->tk_timeouts;
+ 
+-	op_metrics->om_bytes_sent += task->tk_bytes_sent;
++	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
+ 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
+ 
+-	queue = (long)req->rq_xtime - task->tk_start;
+-	if (queue < 0)
+-		queue = -queue;
+-	op_metrics->om_queue += queue;
+-
+-	rtt = task->tk_rtt;
+-	if (rtt < 0)
+-		rtt = -rtt;
+-	op_metrics->om_rtt += rtt;
+-
+-	execute = (long)jiffies - task->tk_start;
+-	if (execute < 0)
+-		execute = -execute;
+-	op_metrics->om_execute += execute;
++	delta = ktime_sub(req->rq_xtime, task->tk_start);
++	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
++
++	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
++
++	delta = ktime_sub(ktime_get(), task->tk_start);
++	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+ }
+ 
+ static void _print_name(struct seq_file *seq, unsigned int op,
+@@ -186,8 +179,6 @@ static void _print_name(struct seq_file 
+ 		seq_printf(seq, "\t%12u: ", op);
+ }
+ 
+-#define MILLISECS_PER_JIFFY	(1000 / HZ)
+-
+ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+ {
+ 	struct rpc_iostats *stats = clnt->cl_metrics;
+@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *
+ 				metrics->om_timeouts,
+ 				metrics->om_bytes_sent,
+ 				metrics->om_bytes_recv,
+-				metrics->om_queue * MILLISECS_PER_JIFFY,
+-				metrics->om_rtt * MILLISECS_PER_JIFFY,
+-				metrics->om_execute * MILLISECS_PER_JIFFY);
++				ktime_to_ms(metrics->om_queue),
++				ktime_to_ms(metrics->om_rtt),
++				ktime_to_ms(metrics->om_execute));
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(rpc_print_iostats);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 11:01:00.400574086 -0400
+@@ -762,6 +762,7 @@ int write_bytes_to_xdr_buf(struct xdr_bu
+ 	__write_bytes_to_xdr_buf(&subbuf, obj, len);
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(write_bytes_to_xdr_buf);
+ 
+ int
+ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
+diff -up linux-2.6.34.noarch/net/sunrpc/xprt.c.orig linux-2.6.34.noarch/net/sunrpc/xprt.c
+--- linux-2.6.34.noarch/net/sunrpc/xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprt.c	2010-08-23 11:01:00.401372963 -0400
+@@ -43,6 +43,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/workqueue.h>
+ #include <linux/net.h>
++#include <linux/ktime.h>
+ 
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/metrics.h>
+@@ -62,7 +63,6 @@
+  * Local functions
+  */
+ static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
+-static inline void	do_xprt_reserve(struct rpc_task *);
+ static void	xprt_connect_status(struct rpc_task *task);
+ static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+ 
+@@ -711,12 +711,16 @@ void xprt_connect(struct rpc_task *task)
+ 		if (task->tk_rqstp)
+ 			task->tk_rqstp->rq_bytes_sent = 0;
+ 
+-		task->tk_timeout = xprt->connect_timeout;
++		task->tk_timeout = task->tk_rqstp->rq_timeout;
+ 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
++
++		if (test_bit(XPRT_CLOSING, &xprt->state))
++			return;
++		if (xprt_test_and_set_connecting(xprt))
++			return;
+ 		xprt->stat.connect_start = jiffies;
+ 		xprt->ops->connect(task);
+ 	}
+-	return;
+ }
+ 
+ static void xprt_connect_status(struct rpc_task *task)
+@@ -771,25 +775,19 @@ struct rpc_rqst *xprt_lookup_rqst(struct
+ }
+ EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
+ 
+-/**
+- * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
+- * @task: RPC request that recently completed
+- *
+- */
+-void xprt_update_rtt(struct rpc_task *task)
++static void xprt_update_rtt(struct rpc_task *task)
+ {
+ 	struct rpc_rqst *req = task->tk_rqstp;
+ 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
+ 	unsigned timer = task->tk_msg.rpc_proc->p_timer;
++	long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
+ 
+ 	if (timer) {
+ 		if (req->rq_ntrans == 1)
+-			rpc_update_rtt(rtt, timer,
+-					(long)jiffies - req->rq_xtime);
++			rpc_update_rtt(rtt, timer, m);
+ 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
+ 	}
+ }
+-EXPORT_SYMBOL_GPL(xprt_update_rtt);
+ 
+ /**
+  * xprt_complete_rqst - called when reply processing is complete
+@@ -807,7 +805,9 @@ void xprt_complete_rqst(struct rpc_task 
+ 			task->tk_pid, ntohl(req->rq_xid), copied);
+ 
+ 	xprt->stat.recvs++;
+-	task->tk_rtt = (long)jiffies - req->rq_xtime;
++	req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
++	if (xprt->ops->timer != NULL)
++		xprt_update_rtt(task);
+ 
+ 	list_del_init(&req->rq_list);
+ 	req->rq_private_buf.len = copied;
+@@ -906,7 +906,7 @@ void xprt_transmit(struct rpc_task *task
+ 		return;
+ 
+ 	req->rq_connect_cookie = xprt->connect_cookie;
+-	req->rq_xtime = jiffies;
++	req->rq_xtime = ktime_get();
+ 	status = xprt->ops->send_request(task);
+ 	if (status != 0) {
+ 		task->tk_status = status;
+@@ -935,7 +935,7 @@ void xprt_transmit(struct rpc_task *task
+ 	spin_unlock_bh(&xprt->transport_lock);
+ }
+ 
+-static inline void do_xprt_reserve(struct rpc_task *task)
++static void xprt_alloc_slot(struct rpc_task *task)
+ {
+ 	struct rpc_xprt	*xprt = task->tk_xprt;
+ 
+@@ -955,6 +955,16 @@ static inline void do_xprt_reserve(struc
+ 	rpc_sleep_on(&xprt->backlog, task, NULL);
+ }
+ 
++static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
++{
++	memset(req, 0, sizeof(*req));	/* mark unused */
++
++	spin_lock(&xprt->reserve_lock);
++	list_add(&req->rq_list, &xprt->free);
++	rpc_wake_up_next(&xprt->backlog);
++	spin_unlock(&xprt->reserve_lock);
++}
++
+ /**
+  * xprt_reserve - allocate an RPC request slot
+  * @task: RPC task requesting a slot allocation
+@@ -968,7 +978,7 @@ void xprt_reserve(struct rpc_task *task)
+ 
+ 	task->tk_status = -EIO;
+ 	spin_lock(&xprt->reserve_lock);
+-	do_xprt_reserve(task);
++	xprt_alloc_slot(task);
+ 	spin_unlock(&xprt->reserve_lock);
+ }
+ 
+@@ -1006,14 +1016,10 @@ void xprt_release(struct rpc_task *task)
+ {
+ 	struct rpc_xprt	*xprt;
+ 	struct rpc_rqst	*req;
+-	int is_bc_request;
+ 
+ 	if (!(req = task->tk_rqstp))
+ 		return;
+ 
+-	/* Preallocated backchannel request? */
+-	is_bc_request = bc_prealloc(req);
+-
+ 	xprt = req->rq_xprt;
+ 	rpc_count_iostats(task);
+ 	spin_lock_bh(&xprt->transport_lock);
+@@ -1027,21 +1033,16 @@ void xprt_release(struct rpc_task *task)
+ 		mod_timer(&xprt->timer,
+ 				xprt->last_used + xprt->idle_timeout);
+ 	spin_unlock_bh(&xprt->transport_lock);
+-	if (!bc_prealloc(req))
++	if (req->rq_buffer)
+ 		xprt->ops->buf_free(req->rq_buffer);
+ 	task->tk_rqstp = NULL;
+ 	if (req->rq_release_snd_buf)
+ 		req->rq_release_snd_buf(req);
+ 
+ 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
+-	if (likely(!is_bc_request)) {
+-		memset(req, 0, sizeof(*req));	/* mark unused */
+-
+-		spin_lock(&xprt->reserve_lock);
+-		list_add(&req->rq_list, &xprt->free);
+-		rpc_wake_up_next(&xprt->backlog);
+-		spin_unlock(&xprt->reserve_lock);
+-	} else
++	if (likely(!bc_prealloc(req)))
++		xprt_free_slot(xprt, req);
++	else
+ 		xprt_free_bc_request(req);
+ }
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c	2010-08-23 11:01:00.402563985 -0400
+@@ -305,7 +305,6 @@ xprt_setup_rdma(struct xprt_create *args
+ 	/* 60 second timeout, no retries */
+ 	xprt->timeout = &xprt_rdma_default_timeout;
+ 	xprt->bind_timeout = (60U * HZ);
+-	xprt->connect_timeout = (60U * HZ);
+ 	xprt->reestablish_timeout = (5U * HZ);
+ 	xprt->idle_timeout = (5U * 60 * HZ);
+ 
+@@ -449,21 +448,19 @@ xprt_rdma_connect(struct rpc_task *task)
+ 	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ 
+-	if (!xprt_test_and_set_connecting(xprt)) {
+-		if (r_xprt->rx_ep.rep_connected != 0) {
+-			/* Reconnect */
+-			schedule_delayed_work(&r_xprt->rdma_connect,
+-				xprt->reestablish_timeout);
+-			xprt->reestablish_timeout <<= 1;
+-			if (xprt->reestablish_timeout > (30 * HZ))
+-				xprt->reestablish_timeout = (30 * HZ);
+-			else if (xprt->reestablish_timeout < (5 * HZ))
+-				xprt->reestablish_timeout = (5 * HZ);
+-		} else {
+-			schedule_delayed_work(&r_xprt->rdma_connect, 0);
+-			if (!RPC_IS_ASYNC(task))
+-				flush_scheduled_work();
+-		}
++	if (r_xprt->rx_ep.rep_connected != 0) {
++		/* Reconnect */
++		schedule_delayed_work(&r_xprt->rdma_connect,
++			xprt->reestablish_timeout);
++		xprt->reestablish_timeout <<= 1;
++		if (xprt->reestablish_timeout > (30 * HZ))
++			xprt->reestablish_timeout = (30 * HZ);
++		else if (xprt->reestablish_timeout < (5 * HZ))
++			xprt->reestablish_timeout = (5 * HZ);
++	} else {
++		schedule_delayed_work(&r_xprt->rdma_connect, 0);
++		if (!RPC_IS_ASYNC(task))
++			flush_scheduled_work();
+ 	}
+ }
+ 
+@@ -677,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *
+ 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ 		goto drop_connection;
+ 
+-	task->tk_bytes_sent += rqst->rq_snd_buf.len;
++	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
+ 	rqst->rq_bytes_sent = 0;
+ 	return 0;
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtsock.c.orig linux-2.6.34.noarch/net/sunrpc/xprtsock.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtsock.c.orig	2010-08-23 11:00:23.890501549 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtsock.c	2010-08-23 11:01:00.403564023 -0400
+@@ -138,20 +138,6 @@ static ctl_table sunrpc_table[] = {
+ #endif
+ 
+ /*
+- * Time out for an RPC UDP socket connect.  UDP socket connects are
+- * synchronous, but we set a timeout anyway in case of resource
+- * exhaustion on the local host.
+- */
+-#define XS_UDP_CONN_TO		(5U * HZ)
+-
+-/*
+- * Wait duration for an RPC TCP connection to be established.  Solaris
+- * NFS over TCP uses 60 seconds, for example, which is in line with how
+- * long a server takes to reboot.
+- */
+-#define XS_TCP_CONN_TO		(60U * HZ)
+-
+-/*
+  * Wait duration for a reply from the RPC portmapper.
+  */
+ #define XS_BIND_TO		(60U * HZ)
+@@ -543,7 +529,7 @@ static int xs_udp_send_request(struct rp
+ 			xdr->len - req->rq_bytes_sent, status);
+ 
+ 	if (status >= 0) {
+-		task->tk_bytes_sent += status;
++		req->rq_xmit_bytes_sent += status;
+ 		if (status >= req->rq_slen)
+ 			return 0;
+ 		/* Still some bytes left; set up for a retry later. */
+@@ -639,7 +625,7 @@ static int xs_tcp_send_request(struct rp
+ 		/* If we've sent the entire packet, immediately
+ 		 * reset the count of bytes sent. */
+ 		req->rq_bytes_sent += status;
+-		task->tk_bytes_sent += status;
++		req->rq_xmit_bytes_sent += status;
+ 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ 			req->rq_bytes_sent = 0;
+ 			return 0;
+@@ -859,7 +845,6 @@ static void xs_udp_data_ready(struct soc
+ 	dst_confirm(skb_dst(skb));
+ 
+ 	xprt_adjust_cwnd(task, copied);
+-	xprt_update_rtt(task);
+ 	xprt_complete_rqst(task, copied);
+ 
+  out_unlock:
+@@ -2022,9 +2007,6 @@ static void xs_connect(struct rpc_task *
+ 	struct rpc_xprt *xprt = task->tk_xprt;
+ 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ 
+-	if (xprt_test_and_set_connecting(xprt))
+-		return;
+-
+ 	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
+ 		dprintk("RPC:       xs_connect delayed xprt %p for %lu "
+ 				"seconds\n",
+@@ -2044,16 +2026,6 @@ static void xs_connect(struct rpc_task *
+ 	}
+ }
+ 
+-static void xs_tcp_connect(struct rpc_task *task)
+-{
+-	struct rpc_xprt *xprt = task->tk_xprt;
+-
+-	/* Exit if we need to wait for socket shutdown to complete */
+-	if (test_bit(XPRT_CLOSING, &xprt->state))
+-		return;
+-	xs_connect(task);
+-}
+-
+ /**
+  * xs_udp_print_stats - display UDP socket-specifc stats
+  * @xprt: rpc_xprt struct containing statistics
+@@ -2252,7 +2224,7 @@ static struct rpc_xprt_ops xs_tcp_ops = 
+ 	.release_xprt		= xs_tcp_release_xprt,
+ 	.rpcbind		= rpcb_getport_async,
+ 	.set_port		= xs_set_port,
+-	.connect		= xs_tcp_connect,
++	.connect		= xs_connect,
+ 	.buf_alloc		= rpc_malloc,
+ 	.buf_free		= rpc_free,
+ 	.send_request		= xs_tcp_send_request,
+@@ -2343,7 +2315,6 @@ static struct rpc_xprt *xs_setup_udp(str
+ 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
+ 
+ 	xprt->bind_timeout = XS_BIND_TO;
+-	xprt->connect_timeout = XS_UDP_CONN_TO;
+ 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
+ 	xprt->idle_timeout = XS_IDLE_DISC_TO;
+ 
+@@ -2418,7 +2389,6 @@ static struct rpc_xprt *xs_setup_tcp(str
+ 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+ 
+ 	xprt->bind_timeout = XS_BIND_TO;
+-	xprt->connect_timeout = XS_TCP_CONN_TO;
+ 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ 	xprt->idle_timeout = XS_IDLE_DISC_TO;
+ 
+@@ -2478,9 +2448,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(
+ 	struct sock_xprt *transport;
+ 	struct svc_sock *bc_sock;
+ 
+-	if (!args->bc_xprt)
+-		ERR_PTR(-EINVAL);
+-
+ 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ 	if (IS_ERR(xprt))
+ 		return xprt;
+@@ -2494,7 +2461,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(
+ 	/* backchannel */
+ 	xprt_set_bound(xprt);
+ 	xprt->bind_timeout = 0;
+-	xprt->connect_timeout = 0;
+ 	xprt->reestablish_timeout = 0;
+ 	xprt->idle_timeout = 0;
+ 
diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
new file mode 100644
index 000000000..ef99b4995
--- /dev/null
+++ b/nfsd-35-fc.patch
@@ -0,0 +1,1808 @@
+diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
+@@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
+    | READ                 | REQ        |              | Section 18.22  |
+    | READDIR              | REQ        |              | Section 18.23  |
+    | READLINK             | OPT        |              | Section 18.24  |
+-NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
++   | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
+    | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
+    | REMOVE               | REQ        |              | Section 18.25  |
+    | RENAME               | REQ        |              | Section 18.26  |
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
+@@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
+ 	.alloc		= expkey_alloc,
+ };
+ 
+-static struct svc_expkey *
+-svc_expkey_lookup(struct svc_expkey *item)
++static int
++svc_expkey_hash(struct svc_expkey *item)
+ {
+-	struct cache_head *ch;
+ 	int hash = item->ek_fsidtype;
+ 	char * cp = (char*)item->ek_fsid;
+ 	int len = key_len(item->ek_fsidtype);
+@@ -270,6 +269,14 @@ svc_expkey_lookup(struct svc_expkey *ite
+ 	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+ 	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+ 	hash &= EXPKEY_HASHMASK;
++	return hash;
++}
++
++static struct svc_expkey *
++svc_expkey_lookup(struct svc_expkey *item)
++{
++	struct cache_head *ch;
++	int hash = svc_expkey_hash(item);
+ 
+ 	ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
+ 				 hash);
+@@ -283,13 +290,7 @@ static struct svc_expkey *
+ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
+ {
+ 	struct cache_head *ch;
+-	int hash = new->ek_fsidtype;
+-	char * cp = (char*)new->ek_fsid;
+-	int len = key_len(new->ek_fsidtype);
+-
+-	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+-	hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
+-	hash &= EXPKEY_HASHMASK;
++	int hash = svc_expkey_hash(new);
+ 
+ 	ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
+ 				 &old->h, hash);
+@@ -738,14 +739,22 @@ struct cache_detail svc_export_cache = {
+ 	.alloc		= svc_export_alloc,
+ };
+ 
+-static struct svc_export *
+-svc_export_lookup(struct svc_export *exp)
++static int
++svc_export_hash(struct svc_export *exp)
+ {
+-	struct cache_head *ch;
+ 	int hash;
++
+ 	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
+ 	hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
+ 	hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
++	return hash;
++}
++
++static struct svc_export *
++svc_export_lookup(struct svc_export *exp)
++{
++	struct cache_head *ch;
++	int hash = svc_export_hash(exp);
+ 
+ 	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
+ 				 hash);
+@@ -759,10 +768,7 @@ static struct svc_export *
+ svc_export_update(struct svc_export *new, struct svc_export *old)
+ {
+ 	struct cache_head *ch;
+-	int hash;
+-	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
+-	hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
+-	hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
++	int hash = svc_export_hash(old);
+ 
+ 	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
+ 				 &old->h,
+@@ -1071,9 +1077,9 @@ exp_export(struct nfsctl_export *nxp)
+ 		err = 0;
+ finish:
+ 	kfree(new.ex_pathname);
+-	if (exp)
++	if (!IS_ERR_OR_NULL(exp))
+ 		exp_put(exp);
+-	if (fsid_key && !IS_ERR(fsid_key))
++	if (!IS_ERR_OR_NULL(fsid_key))
+ 		cache_put(&fsid_key->h, &svc_expkey_cache);
+ 	path_put(&path);
+ out_put_clp:
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
+@@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
+ 
+-struct nfs4_rpc_args {
+-	void				*args_op;
+-	struct nfsd4_cb_sequence	args_seq;
+-};
+-
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+ */
+@@ -428,13 +423,19 @@ static struct rpc_procinfo     nfs4_cb_p
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
++/*
++ * Note on the callback rpc program version number: despite language in rfc
++ * 5661 section 18.36.3 requiring servers to use 4 in this field, the
++ * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
++ * in practice that appears to be what implementations use.  The section
++ * 18.36.3 language is expected to be fixed in an erratum.
++ */
+         .number                 = 1,
+         .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
+         .procs                  = nfs4_cb_procedures
+ };
+ 
+ static struct rpc_version *	nfs_cb_version[] = {
+-	NULL,
+ 	&nfs_cb_version4,
+ };
+ 
+@@ -456,15 +457,14 @@ static struct rpc_program cb_program = {
+ 
+ static int max_cb_time(void)
+ {
+-	return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
++	return max(nfsd4_lease/10, (time_t)1) * HZ;
+ }
+ 
+ /* Reference counting, callback cleanup, etc., all look racy as heck.
+- * And why is cb_set an atomic? */
++ * And why is cl_cb_set an atomic? */
+ 
+-int setup_callback_client(struct nfs4_client *clp)
++int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+ {
+-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
+ 	struct rpc_timeout	timeparms = {
+ 		.to_initval	= max_cb_time(),
+ 		.to_retries	= 0,
+@@ -476,7 +476,7 @@ int setup_callback_client(struct nfs4_cl
+ 		.timeout	= &timeparms,
+ 		.program	= &cb_program,
+ 		.prognumber	= cb->cb_prog,
+-		.version	= nfs_cb_version[1]->number,
++		.version	= 0,
+ 		.authflavor	= clp->cl_flavor,
+ 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+ 		.client_name    = clp->cl_principal,
+@@ -486,7 +486,7 @@ int setup_callback_client(struct nfs4_cl
+ 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+ 		return -EINVAL;
+ 	if (cb->cb_minorversion) {
+-		args.bc_xprt = clp->cl_cb_xprt;
++		args.bc_xprt = cb->cb_xprt;
+ 		args.protocol = XPRT_TRANSPORT_BC_TCP;
+ 	}
+ 	/* Create RPC client */
+@@ -496,7 +496,7 @@ int setup_callback_client(struct nfs4_cl
+ 			PTR_ERR(client));
+ 		return PTR_ERR(client);
+ 	}
+-	cb->cb_client = client;
++	nfsd4_set_callback_client(clp, client);
+ 	return 0;
+ 
+ }
+@@ -514,8 +514,7 @@ static void nfsd4_cb_probe_done(struct r
+ 	if (task->tk_status)
+ 		warn_no_callback_path(clp, task->tk_status);
+ 	else
+-		atomic_set(&clp->cl_cb_conn.cb_set, 1);
+-	put_nfs4_client(clp);
++		atomic_set(&clp->cl_cb_set, 1);
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
+@@ -537,7 +536,6 @@ int set_callback_cred(void)
+ 
+ void do_probe_callback(struct nfs4_client *clp)
+ {
+-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
+ 	struct rpc_message msg = {
+ 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+ 		.rpc_argp       = clp,
+@@ -545,34 +543,28 @@ void do_probe_callback(struct nfs4_clien
+ 	};
+ 	int status;
+ 
+-	status = rpc_call_async(cb->cb_client, &msg,
++	status = rpc_call_async(cb->cl_cb_client, &msg,
+ 				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ 				&nfsd4_cb_probe_ops, (void *)clp);
+-	if (status) {
++	if (status)
+ 		warn_no_callback_path(clp, status);
+-		put_nfs4_client(clp);
+-	}
+ }
+ 
+ /*
+  * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+  */
+-void
+-nfsd4_probe_callback(struct nfs4_client *clp)
++void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+ {
+ 	int status;
+ 
+-	BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
++	BUG_ON(atomic_read(&clp->cl_cb_set));
+ 
+-	status = setup_callback_client(clp);
++	status = setup_callback_client(clp, cb);
+ 	if (status) {
+ 		warn_no_callback_path(clp, status);
+ 		return;
+ 	}
+ 
+-	/* the task holds a reference to the nfs4_client struct */
+-	atomic_inc(&clp->cl_count);
+-
+ 	do_probe_callback(clp);
+ }
+ 
+@@ -658,18 +650,32 @@ static void nfsd4_cb_done(struct rpc_tas
+ 	}
+ }
+ 
++
+ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+ 	struct nfs4_client *clp = dp->dl_client;
++	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+ 	nfsd4_cb_done(task, calldata);
+ 
++	if (current_rpc_client == NULL) {
++		/* We're shutting down; give up. */
++		/* XXX: err, or is it ok just to fall through
++		 * and rpc_restart_call? */
++		return;
++	}
++
+ 	switch (task->tk_status) {
+ 	case -EIO:
+ 		/* Network partition? */
+-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
++		atomic_set(&clp->cl_cb_set, 0);
+ 		warn_no_callback_path(clp, task->tk_status);
++		if (current_rpc_client != task->tk_client) {
++			/* queue a callback on the new connection: */
++			nfsd4_cb_recall(dp);
++			return;
++		}
+ 	case -EBADHANDLE:
+ 	case -NFS4ERR_BAD_STATEID:
+ 		/* Race: client probably got cb_recall
+@@ -677,7 +683,7 @@ static void nfsd4_cb_recall_done(struct 
+ 		break;
+ 	default:
+ 		/* success, or error we can't handle */
+-		goto done;
++		return;
+ 	}
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+@@ -685,20 +691,16 @@ static void nfsd4_cb_recall_done(struct 
+ 		rpc_restart_call(task);
+ 		return;
+ 	} else {
+-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
++		atomic_set(&clp->cl_cb_set, 0);
+ 		warn_no_callback_path(clp, task->tk_status);
+ 	}
+-done:
+-	kfree(task->tk_msg.rpc_argp);
+ }
+ 
+ static void nfsd4_cb_recall_release(void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 
+ 	nfs4_put_delegation(dp);
+-	put_nfs4_client(clp);
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+@@ -707,33 +709,75 @@ static const struct rpc_call_ops nfsd4_c
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+ 
++static struct workqueue_struct *callback_wq;
++
++int nfsd4_create_callback_queue(void)
++{
++	callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
++	if (!callback_wq)
++		return -ENOMEM;
++	return 0;
++}
++
++void nfsd4_destroy_callback_queue(void)
++{
++	destroy_workqueue(callback_wq);
++}
++
++/* must be called under the state lock */
++void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
++{
++	struct rpc_clnt *old = clp->cl_cb_client;
++
++	clp->cl_cb_client = new;
++	/*
++	 * After this, any work that saw the old value of cl_cb_client will
++	 * be gone:
++	 */
++	flush_workqueue(callback_wq);
++	/* So we can safely shut it down: */
++	if (old)
++		rpc_shutdown_client(old);
++}
++
+ /*
+  * called with dp->dl_count inc'ed.
+  */
+-void
+-nfsd4_cb_recall(struct nfs4_delegation *dp)
++static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
+ {
+ 	struct nfs4_client *clp = dp->dl_client;
+-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+-	struct nfs4_rpc_args *args;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
+ 		.rpc_cred = callback_cred
+ 	};
+-	int status = -ENOMEM;
++	int status;
++
++	if (clnt == NULL)
++		return; /* Client is shutting down; give up. */
+ 
+-	args = kzalloc(sizeof(*args), GFP_KERNEL);
+-	if (!args)
+-		goto out;
+ 	args->args_op = dp;
+ 	msg.rpc_argp = args;
+ 	dp->dl_retries = 1;
+ 	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
+ 				&nfsd4_cb_recall_ops, dp);
+-out:
+-	if (status) {
+-		kfree(args);
+-		put_nfs4_client(clp);
++	if (status)
+ 		nfs4_put_delegation(dp);
+-	}
++}
++
++void nfsd4_do_callback_rpc(struct work_struct *w)
++{
++	/* XXX: for now, just send off delegation recall. */
++	/* In future, generalize to handle any sort of callback. */
++	struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
++	struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
++
++	_nfsd4_cb_recall(dp);
++}
++
++
++void nfsd4_cb_recall(struct nfs4_delegation *dp)
++{
++	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
+@@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
+ static const char *nfsd4_op_name(unsigned opnum);
+ 
+ /*
+- * Enforce NFSv4.1 COMPOUND ordering rules.
++ * Enforce NFSv4.1 COMPOUND ordering rules:
+  *
+- * TODO:
+- * - enforce NFS4ERR_NOT_ONLY_OP,
+- * - DESTROY_SESSION MUST be the final operation in the COMPOUND request.
++ * Also note, enforced elsewhere:
++ *	- SEQUENCE other than as first op results in
++ *	  NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
++ *	- BIND_CONN_TO_SESSION must be the only op in its compound
++ *	  (Will be enforced in nfsd4_bind_conn_to_session().)
++ *	- DESTROY_SESSION must be the final operation in a compound, if
++ *	  sessionid's in SEQUENCE and DESTROY_SESSION are the same.
++ *	  (Enforced in nfsd4_destroy_session().)
+  */
+-static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args)
++static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
+ {
+-	if (args->minorversion && args->opcnt > 0) {
+-		struct nfsd4_op *op = &args->ops[0];
+-		return (op->status == nfserr_op_illegal) ||
+-		       (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP);
+-	}
+-	return true;
++	struct nfsd4_op *op = &args->ops[0];
++
++	/* These ordering requirements don't apply to NFSv4.0: */
++	if (args->minorversion == 0)
++		return nfs_ok;
++	/* This is weird, but OK, not our problem: */
++	if (args->opcnt == 0)
++		return nfs_ok;
++	if (op->status == nfserr_op_illegal)
++		return nfs_ok;
++	if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
++		return nfserr_op_not_in_session;
++	if (op->opnum == OP_SEQUENCE)
++		return nfs_ok;
++	if (args->opcnt != 1)
++		return nfserr_not_only_op;
++	return nfs_ok;
+ }
+ 
+ /*
+@@ -1012,6 +1028,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ 	resp->rqstp = rqstp;
+ 	resp->cstate.minorversion = args->minorversion;
+ 	resp->cstate.replay_owner = NULL;
++	resp->cstate.session = NULL;
+ 	fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
+ 	fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
+ 	/* Use the deferral mechanism only for NFSv4.0 compounds */
+@@ -1024,13 +1041,13 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ 	if (args->minorversion > nfsd_supported_minorversion)
+ 		goto out;
+ 
+-	if (!nfs41_op_ordering_ok(args)) {
++	status = nfs41_check_op_ordering(args);
++	if (status) {
+ 		op = &args->ops[0];
+-		op->status = nfserr_sequence_pos;
++		op->status = status;
+ 		goto encode_op;
+ 	}
+ 
+-	status = nfs_ok;
+ 	while (!status && resp->opcnt < args->opcnt) {
+ 		op = &args->ops[resp->opcnt++];
+ 
+@@ -1295,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ 		.op_name = "OP_SEQUENCE",
+ 	},
++	[OP_RECLAIM_COMPLETE] = {
++		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_RECLAIM_COMPLETE",
++	},
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
+@@ -45,8 +45,8 @@
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+-static time_t lease_time = 90;     /* default lease time */
+-static time_t user_lease_time = 90;
++time_t nfsd4_lease = 90;     /* default lease time */
++time_t nfsd4_grace = 90;
+ static time_t boot_time;
+ static u32 current_ownerid = 1;
+ static u32 current_fileid = 1;
+@@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp
+ 	dp->dl_vfs_file = stp->st_vfs_file;
+ 	dp->dl_type = type;
+ 	dp->dl_ident = cb->cb_ident;
+-	dp->dl_stateid.si_boot = get_seconds();
++	dp->dl_stateid.si_boot = boot_time;
+ 	dp->dl_stateid.si_stateownerid = current_delegid++;
+ 	dp->dl_stateid.si_fileid = 0;
+ 	dp->dl_stateid.si_generation = 0;
+@@ -199,6 +199,7 @@ alloc_init_deleg(struct nfs4_client *clp
+ 	atomic_set(&dp->dl_count, 1);
+ 	list_add(&dp->dl_perfile, &fp->fi_delegations);
+ 	list_add(&dp->dl_perclnt, &clp->cl_delegations);
++	INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
+ 	return dp;
+ }
+ 
+@@ -249,6 +250,9 @@ unhash_delegation(struct nfs4_delegation
+  * SETCLIENTID state 
+  */
+ 
++/* client_lock protects the client lru list and session hash table */
++static DEFINE_SPINLOCK(client_lock);
++
+ /* Hash tables for nfs4_clientid state */
+ #define CLIENT_HASH_BITS                 4
+ #define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
+@@ -367,7 +371,6 @@ static void release_openowner(struct nfs
+ 	nfs4_put_stateowner(sop);
+ }
+ 
+-static DEFINE_SPINLOCK(sessionid_lock);
+ #define SESSION_HASH_SIZE	512
+ static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
+ 
+@@ -565,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqst
+ 
+ 	new->se_flags = cses->flags;
+ 	kref_init(&new->se_ref);
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	list_add(&new->se_hash, &sessionid_hashtbl[idx]);
+ 	list_add(&new->se_perclnt, &clp->cl_sessions);
+-	spin_unlock(&sessionid_lock);
++	spin_unlock(&client_lock);
+ 
+ 	status = nfs_ok;
+ out:
+@@ -579,7 +582,7 @@ out_free:
+ 	goto out;
+ }
+ 
+-/* caller must hold sessionid_lock */
++/* caller must hold client_lock */
+ static struct nfsd4_session *
+ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
+ {
+@@ -602,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_se
+ 	return NULL;
+ }
+ 
+-/* caller must hold sessionid_lock */
++/* caller must hold client_lock */
+ static void
+ unhash_session(struct nfsd4_session *ses)
+ {
+@@ -610,15 +613,6 @@ unhash_session(struct nfsd4_session *ses
+ 	list_del(&ses->se_perclnt);
+ }
+ 
+-static void
+-release_session(struct nfsd4_session *ses)
+-{
+-	spin_lock(&sessionid_lock);
+-	unhash_session(ses);
+-	spin_unlock(&sessionid_lock);
+-	nfsd4_put_session(ses);
+-}
+-
+ void
+ free_session(struct kref *kref)
+ {
+@@ -634,9 +628,18 @@ free_session(struct kref *kref)
+ 	kfree(ses);
+ }
+ 
++/* must be called under the client_lock */
+ static inline void
+-renew_client(struct nfs4_client *clp)
++renew_client_locked(struct nfs4_client *clp)
+ {
++	if (is_client_expired(clp)) {
++		dprintk("%s: client (clientid %08x/%08x) already expired\n",
++			__func__,
++			clp->cl_clientid.cl_boot,
++			clp->cl_clientid.cl_id);
++		return;
++	}
++
+ 	/*
+ 	* Move client to the end to the LRU list.
+ 	*/
+@@ -647,6 +650,14 @@ renew_client(struct nfs4_client *clp)
+ 	clp->cl_time = get_seconds();
+ }
+ 
++static inline void
++renew_client(struct nfs4_client *clp)
++{
++	spin_lock(&client_lock);
++	renew_client_locked(clp);
++	spin_unlock(&client_lock);
++}
++
+ /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
+ static int
+ STALE_CLIENTID(clientid_t *clid)
+@@ -680,27 +691,9 @@ static struct nfs4_client *alloc_client(
+ 	return clp;
+ }
+ 
+-static void
+-shutdown_callback_client(struct nfs4_client *clp)
+-{
+-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+-
+-	if (clnt) {
+-		/*
+-		 * Callback threads take a reference on the client, so there
+-		 * should be no outstanding callbacks at this point.
+-		 */
+-		clp->cl_cb_conn.cb_client = NULL;
+-		rpc_shutdown_client(clnt);
+-	}
+-}
+-
+ static inline void
+ free_client(struct nfs4_client *clp)
+ {
+-	shutdown_callback_client(clp);
+-	if (clp->cl_cb_xprt)
+-		svc_xprt_put(clp->cl_cb_xprt);
+ 	if (clp->cl_cred.cr_group_info)
+ 		put_group_info(clp->cl_cred.cr_group_info);
+ 	kfree(clp->cl_principal);
+@@ -709,10 +702,34 @@ free_client(struct nfs4_client *clp)
+ }
+ 
+ void
+-put_nfs4_client(struct nfs4_client *clp)
++release_session_client(struct nfsd4_session *session)
+ {
+-	if (atomic_dec_and_test(&clp->cl_count))
++	struct nfs4_client *clp = session->se_client;
++
++	if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
++		return;
++	if (is_client_expired(clp)) {
+ 		free_client(clp);
++		session->se_client = NULL;
++	} else
++		renew_client_locked(clp);
++	spin_unlock(&client_lock);
++	nfsd4_put_session(session);
++}
++
++/* must be called under the client_lock */
++static inline void
++unhash_client_locked(struct nfs4_client *clp)
++{
++	mark_client_expired(clp);
++	list_del(&clp->cl_lru);
++	while (!list_empty(&clp->cl_sessions)) {
++		struct nfsd4_session  *ses;
++		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
++				 se_perclnt);
++		unhash_session(ses);
++		nfsd4_put_session(ses);
++	}
+ }
+ 
+ static void
+@@ -722,9 +739,6 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
+-	dprintk("NFSD: expire_client cl_count %d\n",
+-	                    atomic_read(&clp->cl_count));
+-
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -740,20 +754,20 @@ expire_client(struct nfs4_client *clp)
+ 		list_del_init(&dp->dl_recall_lru);
+ 		unhash_delegation(dp);
+ 	}
+-	list_del(&clp->cl_idhash);
+-	list_del(&clp->cl_strhash);
+-	list_del(&clp->cl_lru);
+ 	while (!list_empty(&clp->cl_openowners)) {
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
+-	while (!list_empty(&clp->cl_sessions)) {
+-		struct nfsd4_session  *ses;
+-		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
+-				 se_perclnt);
+-		release_session(ses);
+-	}
+-	put_nfs4_client(clp);
++	nfsd4_set_callback_client(clp, NULL);
++	if (clp->cl_cb_conn.cb_xprt)
++		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
++	list_del(&clp->cl_idhash);
++	list_del(&clp->cl_strhash);
++	spin_lock(&client_lock);
++	unhash_client_locked(clp);
++	if (atomic_read(&clp->cl_refcount) == 0)
++		free_client(clp);
++	spin_unlock(&client_lock);
+ }
+ 
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+@@ -839,14 +853,15 @@ static struct nfs4_client *create_client
+ 	}
+ 
+ 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
+-	atomic_set(&clp->cl_count, 1);
+-	atomic_set(&clp->cl_cb_conn.cb_set, 0);
++	atomic_set(&clp->cl_refcount, 0);
++	atomic_set(&clp->cl_cb_set, 0);
+ 	INIT_LIST_HEAD(&clp->cl_idhash);
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
++	clp->cl_time = get_seconds();
+ 	clear_bit(0, &clp->cl_cb_slot_busy);
+ 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
+ 	copy_verf(clp, verf);
+@@ -877,8 +892,7 @@ add_to_unconfirmed(struct nfs4_client *c
+ 	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+ 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+ 	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+-	list_add_tail(&clp->cl_lru, &client_lru);
+-	clp->cl_time = get_seconds();
++	renew_client(clp);
+ }
+ 
+ static void
+@@ -888,10 +902,9 @@ move_to_confirmed(struct nfs4_client *cl
+ 	unsigned int strhashval;
+ 
+ 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
+-	list_del_init(&clp->cl_strhash);
+ 	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
+ 	strhashval = clientstr_hashval(clp->cl_recdir);
+-	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
++	list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+ 	renew_client(clp);
+ }
+ 
+@@ -1327,15 +1340,9 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		/*
+-		 * We do not support RDMA or persistent sessions
+-		 */
+-		cr_ses->flags &= ~SESSION4_PERSIST;
+-		cr_ses->flags &= ~SESSION4_RDMA;
+-
+ 		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(unconf->cl_cb_xprt);
++			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++			svc_xprt_get(rqstp->rq_xprt);
+ 			rpc_copy_addr(
+ 				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+ 				sa);
+@@ -1344,7 +1351,7 @@ nfsd4_create_session(struct svc_rqst *rq
+ 				cstate->minorversion;
+ 			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+ 			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf);
++			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+ 		}
+ 		conf = unconf;
+ 	} else {
+@@ -1352,6 +1359,12 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		goto out;
+ 	}
+ 
++	/*
++	 * We do not support RDMA or persistent sessions
++	 */
++	cr_ses->flags &= ~SESSION4_PERSIST;
++	cr_ses->flags &= ~SESSION4_RDMA;
++
+ 	status = alloc_init_session(rqstp, conf, cr_ses);
+ 	if (status)
+ 		goto out;
+@@ -1369,6 +1382,21 @@ out:
+ 	return status;
+ }
+ 
++static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
++{
++	struct nfsd4_compoundres *resp = rqstp->rq_resp;
++	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
++
++	return argp->opcnt == resp->opcnt;
++}
++
++static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
++{
++	if (!session)
++		return 0;
++	return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
++}
++
+ __be32
+ nfsd4_destroy_session(struct svc_rqst *r,
+ 		      struct nfsd4_compound_state *cstate,
+@@ -1384,19 +1412,25 @@ nfsd4_destroy_session(struct svc_rqst *r
+ 	 * - Do we need to clear any callback info from previous session?
+ 	 */
+ 
++	if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
++		if (!nfsd4_last_compound_op(r))
++			return nfserr_not_only_op;
++	}
+ 	dump_sessionid(__func__, &sessionid->sessionid);
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
+ 	if (!ses) {
+-		spin_unlock(&sessionid_lock);
++		spin_unlock(&client_lock);
+ 		goto out;
+ 	}
+ 
+ 	unhash_session(ses);
+-	spin_unlock(&sessionid_lock);
++	spin_unlock(&client_lock);
+ 
++	nfs4_lock_state();
+ 	/* wait for callbacks */
+-	shutdown_callback_client(ses->se_client);
++	nfsd4_set_callback_client(ses->se_client, NULL);
++	nfs4_unlock_state();
+ 	nfsd4_put_session(ses);
+ 	status = nfs_ok;
+ out:
+@@ -1417,7 +1451,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
+ 	if (resp->opcnt != 1)
+ 		return nfserr_sequence_pos;
+ 
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	status = nfserr_badsession;
+ 	session = find_in_sessionid_hashtbl(&seq->sessionid);
+ 	if (!session)
+@@ -1456,23 +1490,47 @@ nfsd4_sequence(struct svc_rqst *rqstp,
+ 	cstate->slot = slot;
+ 	cstate->session = session;
+ 
+-	/* Hold a session reference until done processing the compound:
+-	 * nfsd4_put_session called only if the cstate slot is set.
+-	 */
+-	nfsd4_get_session(session);
+ out:
+-	spin_unlock(&sessionid_lock);
+-	/* Renew the clientid on success and on replay */
++	/* Hold a session reference until done processing the compound. */
+ 	if (cstate->session) {
+-		nfs4_lock_state();
+-		renew_client(session->se_client);
+-		nfs4_unlock_state();
++		nfsd4_get_session(cstate->session);
++		atomic_inc(&session->se_client->cl_refcount);
+ 	}
++	spin_unlock(&client_lock);
+ 	dprintk("%s: return %d\n", __func__, ntohl(status));
+ 	return status;
+ }
+ 
+ __be32
++nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
++{
++	if (rc->rca_one_fs) {
++		if (!cstate->current_fh.fh_dentry)
++			return nfserr_nofilehandle;
++		/*
++		 * We don't take advantage of the rca_one_fs case.
++		 * That's OK, it's optional, we can safely ignore it.
++		 */
++		 return nfs_ok;
++	}
++	nfs4_lock_state();
++	if (is_client_expired(cstate->session->se_client)) {
++		nfs4_unlock_state();
++		/*
++		 * The following error isn't really legal.
++		 * But we only get here if the client just explicitly
++		 * destroyed the client.  Surely it no longer cares what
++		 * error it gets back on an operation for the dead
++		 * client.
++		 */
++		return nfserr_stale_clientid;
++	}
++	nfsd4_create_clid_dir(cstate->session->se_client);
++	nfs4_unlock_state();
++	return nfs_ok;
++}
++
++__be32
+ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 		  struct nfsd4_setclientid *setclid)
+ {
+@@ -1631,9 +1689,8 @@ nfsd4_setclientid_confirm(struct svc_rqs
+ 		if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
+ 			status = nfserr_clid_inuse;
+ 		else {
+-			/* XXX: We just turn off callbacks until we can handle
+-			  * change request correctly. */
+-			atomic_set(&conf->cl_cb_conn.cb_set, 0);
++			atomic_set(&conf->cl_cb_set, 0);
++			nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
+ 			expire_client(unconf);
+ 			status = nfs_ok;
+ 
+@@ -1667,7 +1724,7 @@ nfsd4_setclientid_confirm(struct svc_rqs
+ 			}
+ 			move_to_confirmed(unconf);
+ 			conf = unconf;
+-			nfsd4_probe_callback(conf);
++			nfsd4_probe_callback(conf, &conf->cl_cb_conn);
+ 			status = nfs_ok;
+ 		}
+ 	} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
+@@ -1700,12 +1757,12 @@ alloc_init_file(struct inode *ino)
+ 		INIT_LIST_HEAD(&fp->fi_hash);
+ 		INIT_LIST_HEAD(&fp->fi_stateids);
+ 		INIT_LIST_HEAD(&fp->fi_delegations);
+-		spin_lock(&recall_lock);
+-		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+-		spin_unlock(&recall_lock);
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++		spin_lock(&recall_lock);
++		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
++		spin_unlock(&recall_lock);
+ 		return fp;
+ 	}
+ 	return NULL;
+@@ -1827,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	stp->st_stateowner = sop;
+ 	get_nfs4_file(fp);
+ 	stp->st_file = fp;
+-	stp->st_stateid.si_boot = get_seconds();
++	stp->st_stateid.si_boot = boot_time;
+ 	stp->st_stateid.si_stateownerid = sop->so_id;
+ 	stp->st_stateid.si_fileid = fp->fi_id;
+ 	stp->st_stateid.si_generation = 0;
+@@ -2028,7 +2085,6 @@ void nfsd_break_deleg_cb(struct file_loc
+ 	 * lock) we know the server hasn't removed the lease yet, we know
+ 	 * it's safe to take a reference: */
+ 	atomic_inc(&dp->dl_count);
+-	atomic_inc(&dp->dl_client->cl_count);
+ 
+ 	spin_lock(&recall_lock);
+ 	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+@@ -2347,7 +2403,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ {
+ 	struct nfs4_delegation *dp;
+ 	struct nfs4_stateowner *sop = stp->st_stateowner;
+-	struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
++	int cb_up = atomic_read(&sop->so_client->cl_cb_set);
+ 	struct file_lock fl, *flp = &fl;
+ 	int status, flag = 0;
+ 
+@@ -2355,7 +2411,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ 	open->op_recall = 0;
+ 	switch (open->op_claim_type) {
+ 		case NFS4_OPEN_CLAIM_PREVIOUS:
+-			if (!atomic_read(&cb->cb_set))
++			if (!cb_up)
+ 				open->op_recall = 1;
+ 			flag = open->op_delegate_type;
+ 			if (flag == NFS4_OPEN_DELEGATE_NONE)
+@@ -2366,7 +2422,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ 			 * had the chance to reclaim theirs.... */
+ 			if (locks_in_grace())
+ 				goto out;
+-			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
++			if (!cb_up || !sop->so_confirmed)
+ 				goto out;
+ 			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ 				flag = NFS4_OPEN_DELEGATE_WRITE;
+@@ -2483,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 	}
+ 	memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
+ 
+-	if (nfsd4_has_session(&resp->cstate)) {
++	if (nfsd4_has_session(&resp->cstate))
+ 		open->op_stateowner->so_confirmed = 1;
+-		nfsd4_create_clid_dir(open->op_stateowner->so_client);
+-	}
+ 
+ 	/*
+ 	* Attempt to hand out a delegation. No error return, because the
+@@ -2537,7 +2591,7 @@ nfsd4_renew(struct svc_rqst *rqstp, stru
+ 	renew_client(clp);
+ 	status = nfserr_cb_path_down;
+ 	if (!list_empty(&clp->cl_delegations)
+-			&& !atomic_read(&clp->cl_cb_conn.cb_set))
++			&& !atomic_read(&clp->cl_cb_set))
+ 		goto out;
+ 	status = nfs_ok;
+ out:
+@@ -2554,6 +2608,12 @@ nfsd4_end_grace(void)
+ 	dprintk("NFSD: end of grace period\n");
+ 	nfsd4_recdir_purge_old();
+ 	locks_end_grace(&nfsd4_manager);
++	/*
++	 * Now that every NFSv4 client has had the chance to recover and
++	 * to see the (possibly new, possibly shorter) lease time, we
++	 * can safely set the next grace time to the current lease time:
++	 */
++	nfsd4_grace = nfsd4_lease;
+ }
+ 
+ static time_t
+@@ -2563,15 +2623,17 @@ nfs4_laundromat(void)
+ 	struct nfs4_stateowner *sop;
+ 	struct nfs4_delegation *dp;
+ 	struct list_head *pos, *next, reaplist;
+-	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
+-	time_t t, clientid_val = NFSD_LEASE_TIME;
+-	time_t u, test_val = NFSD_LEASE_TIME;
++	time_t cutoff = get_seconds() - nfsd4_lease;
++	time_t t, clientid_val = nfsd4_lease;
++	time_t u, test_val = nfsd4_lease;
+ 
+ 	nfs4_lock_state();
+ 
+ 	dprintk("NFSD: laundromat service - starting\n");
+ 	if (locks_in_grace())
+ 		nfsd4_end_grace();
++	INIT_LIST_HEAD(&reaplist);
++	spin_lock(&client_lock);
+ 	list_for_each_safe(pos, next, &client_lru) {
+ 		clp = list_entry(pos, struct nfs4_client, cl_lru);
+ 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+@@ -2580,12 +2642,22 @@ nfs4_laundromat(void)
+ 				clientid_val = t;
+ 			break;
+ 		}
++		if (atomic_read(&clp->cl_refcount)) {
++			dprintk("NFSD: client in use (clientid %08x)\n",
++				clp->cl_clientid.cl_id);
++			continue;
++		}
++		unhash_client_locked(clp);
++		list_add(&clp->cl_lru, &reaplist);
++	}
++	spin_unlock(&client_lock);
++	list_for_each_safe(pos, next, &reaplist) {
++		clp = list_entry(pos, struct nfs4_client, cl_lru);
+ 		dprintk("NFSD: purging unused client (clientid %08x)\n",
+ 			clp->cl_clientid.cl_id);
+ 		nfsd4_remove_clid_dir(clp);
+ 		expire_client(clp);
+ 	}
+-	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	list_for_each_safe(pos, next, &del_recall_lru) {
+ 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+@@ -2605,7 +2677,7 @@ nfs4_laundromat(void)
+ 		list_del_init(&dp->dl_recall_lru);
+ 		unhash_delegation(dp);
+ 	}
+-	test_val = NFSD_LEASE_TIME;
++	test_val = nfsd4_lease;
+ 	list_for_each_safe(pos, next, &close_lru) {
+ 		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+ 		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
+@@ -2661,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ static int
+ STALE_STATEID(stateid_t *stateid)
+ {
+-	if (time_after((unsigned long)boot_time,
+-			(unsigned long)stateid->si_boot)) {
+-		dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+-			STATEID_VAL(stateid));
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-static int
+-EXPIRED_STATEID(stateid_t *stateid)
+-{
+-	if (time_before((unsigned long)boot_time,
+-			((unsigned long)stateid->si_boot)) &&
+-	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
+-		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
+-			STATEID_VAL(stateid));
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-static __be32
+-stateid_error_map(stateid_t *stateid)
+-{
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
+-	if (EXPIRED_STATEID(stateid))
+-		return nfserr_expired;
+-
+-	dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
++	if (stateid->si_boot == boot_time)
++		return 0;
++	dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+ 		STATEID_VAL(stateid));
+-	return nfserr_bad_stateid;
++	return 1;
+ }
+ 
+ static inline int
+@@ -2817,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	status = nfserr_bad_stateid;
+ 	if (is_delegation_stateid(stateid)) {
+ 		dp = find_delegation_stateid(ino, stateid);
+-		if (!dp) {
+-			status = stateid_error_map(stateid);
++		if (!dp)
+ 			goto out;
+-		}
+ 		status = check_stateid_generation(stateid, &dp->dl_stateid,
+ 						  flags);
+ 		if (status)
+@@ -2833,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 			*filpp = dp->dl_vfs_file;
+ 	} else { /* open or lock stateid */
+ 		stp = find_stateid(stateid, flags);
+-		if (!stp) {
+-			status = stateid_error_map(stateid);
++		if (!stp)
+ 			goto out;
+-		}
+ 		if (nfs4_check_fh(current_fh, stp))
+ 			goto out;
+ 		if (!stp->st_stateowner->so_confirmed)
+@@ -2908,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 		 */
+ 		sop = search_close_lru(stateid->si_stateownerid, flags);
+ 		if (sop == NULL)
+-			return stateid_error_map(stateid);
++			return nfserr_bad_stateid;
+ 		*sopp = sop;
+ 		goto check_replay;
+ 	}
+@@ -3175,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (!is_delegation_stateid(stateid))
+ 		goto out;
+ 	dp = find_delegation_stateid(inode, stateid);
+-	if (!dp) {
+-		status = stateid_error_map(stateid);
++	if (!dp)
+ 		goto out;
+-	}
+ 	status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
+ 	if (status)
+ 		goto out;
+@@ -3404,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	stp->st_stateowner = sop;
+ 	get_nfs4_file(fp);
+ 	stp->st_file = fp;
+-	stp->st_stateid.si_boot = get_seconds();
++	stp->st_stateid.si_boot = boot_time;
+ 	stp->st_stateid.si_stateownerid = sop->so_id;
+ 	stp->st_stateid.si_fileid = fp->fi_id;
+ 	stp->st_stateid.si_generation = 0;
+@@ -3976,12 +4014,6 @@ nfsd4_load_reboot_recovery_data(void)
+ 		printk("NFSD: Failure reading reboot recovery data\n");
+ }
+ 
+-unsigned long
+-get_nfs4_grace_period(void)
+-{
+-	return max(user_lease_time, lease_time) * HZ;
+-}
+-
+ /*
+  * Since the lifetime of a delegation isn't limited to that of an open, a
+  * client may quite reasonably hang on to a delegation as long as it has
+@@ -4008,20 +4040,27 @@ set_max_delegations(void)
+ static int
+ __nfs4_state_start(void)
+ {
+-	unsigned long grace_time;
++	int ret;
+ 
+ 	boot_time = get_seconds();
+-	grace_time = get_nfs4_grace_period();
+-	lease_time = user_lease_time;
+ 	locks_start_grace(&nfsd4_manager);
+ 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
+-	       grace_time/HZ);
++	       nfsd4_grace);
++	ret = set_callback_cred();
++	if (ret)
++		return -ENOMEM;
+ 	laundry_wq = create_singlethread_workqueue("nfsd4");
+ 	if (laundry_wq == NULL)
+ 		return -ENOMEM;
+-	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
++	ret = nfsd4_create_callback_queue();
++	if (ret)
++		goto out_free_laundry;
++	queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
+ 	set_max_delegations();
+-	return set_callback_cred();
++	return 0;
++out_free_laundry:
++	destroy_workqueue(laundry_wq);
++	return ret;
+ }
+ 
+ int
+@@ -4039,12 +4078,6 @@ nfs4_state_start(void)
+ 	return 0;
+ }
+ 
+-time_t
+-nfs4_lease_time(void)
+-{
+-	return lease_time;
+-}
+-
+ static void
+ __nfs4_state_shutdown(void)
+ {
+@@ -4089,6 +4122,7 @@ nfs4_state_shutdown(void)
+ 	nfs4_lock_state();
+ 	nfs4_release_reclaim();
+ 	__nfs4_state_shutdown();
++	nfsd4_destroy_callback_queue();
+ 	nfs4_unlock_state();
+ }
+ 
+@@ -4128,21 +4162,3 @@ nfs4_recoverydir(void)
+ {
+ 	return user_recovery_dirname;
+ }
+-
+-/*
+- * Called when leasetime is changed.
+- *
+- * The only way the protocol gives us to handle on-the-fly lease changes is to
+- * simulate a reboot.  Instead of doing that, we just wait till the next time
+- * we start to register any changes in lease time.  If the administrator
+- * really wants to change the lease time *now*, they can go ahead and bring
+- * nfsd down and then back up again after changing the lease time.
+- *
+- * user_lease_time is protected by nfsd_mutex since it's only really accessed
+- * when nfsd is starting
+- */
+-void
+-nfs4_reset_lease(time_t leasetime)
+-{
+-	user_lease_time = leasetime;
+-}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
+@@ -46,6 +46,7 @@ enum {
+ 	 */
+ #ifdef CONFIG_NFSD_V4
+ 	NFSD_Leasetime,
++	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
+ };
+@@ -70,6 +71,7 @@ static ssize_t write_ports(struct file *
+ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
+ #ifdef CONFIG_NFSD_V4
+ static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
++static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
+ 
+@@ -91,6 +93,7 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_MaxBlkSize] = write_maxblksize,
+ #ifdef CONFIG_NFSD_V4
+ 	[NFSD_Leasetime] = write_leasetime,
++	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
+ };
+@@ -1204,29 +1207,45 @@ static ssize_t write_maxblksize(struct f
+ }
+ 
+ #ifdef CONFIG_NFSD_V4
+-extern time_t nfs4_leasetime(void);
+-
+-static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
++static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+ {
+-	/* if size > 10 seconds, call
+-	 * nfs4_reset_lease() then write out the new lease (seconds) as reply
+-	 */
+ 	char *mesg = buf;
+-	int rv, lease;
++	int rv, i;
+ 
+ 	if (size > 0) {
+ 		if (nfsd_serv)
+ 			return -EBUSY;
+-		rv = get_int(&mesg, &lease);
++		rv = get_int(&mesg, &i);
+ 		if (rv)
+ 			return rv;
+-		if (lease < 10 || lease > 3600)
++		/*
++		 * Some sanity checking.  We don't have a reason for
++		 * these particular numbers, but problems with the
++		 * extremes are:
++		 *	- Too short: the briefest network outage may
++		 *	  cause clients to lose all their locks.  Also,
++		 *	  the frequent polling may be wasteful.
++		 *	- Too long: do you really want reboot recovery
++		 *	  to take more than an hour?  Or to make other
++		 *	  clients wait an hour before being able to
++		 *	  revoke a dead client's locks?
++		 */
++		if (i < 10 || i > 3600)
+ 			return -EINVAL;
+-		nfs4_reset_lease(lease);
++		*time = i;
+ 	}
+ 
+-	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
+-							nfs4_lease_time());
++	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
++}
++
++static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __nfsd4_write_time(file, buf, size, time);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
+ }
+ 
+ /**
+@@ -1252,12 +1271,22 @@ static ssize_t __write_leasetime(struct 
+  */
+ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+ {
+-	ssize_t rv;
++	return nfsd4_write_time(file, buf, size, &nfsd4_lease);
++}
+ 
+-	mutex_lock(&nfsd_mutex);
+-	rv = __write_leasetime(file, buf, size);
+-	mutex_unlock(&nfsd_mutex);
+-	return rv;
++/**
++ * write_gracetime - Set or report current NFSv4 grace period time
++ *
++ * As above, but sets the time of the NFSv4 grace period.
++ *
++ * Note this should never be set to less than the *previous*
++ * lease-period time, but we don't try to enforce this.  (In the common
++ * case (a new boot), we don't know what the previous lease time was
++ * anyway.)
++ */
++static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
++{
++	return nfsd4_write_time(file, buf, size, &nfsd4_grace);
+ }
+ 
+ extern char *nfs4_recoverydir(void);
+@@ -1351,6 +1380,7 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+ #ifdef CONFIG_NFSD_V4
+ 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
++		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
+ 		/* last one */ {""}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
+@@ -82,7 +82,6 @@ int nfs4_state_init(void);
+ void nfsd4_free_slabs(void);
+ int nfs4_state_start(void);
+ void nfs4_state_shutdown(void);
+-time_t nfs4_lease_time(void);
+ void nfs4_reset_lease(time_t leasetime);
+ int nfs4_reset_recoverydir(char *recdir);
+ #else
+@@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) 
+ static inline void nfsd4_free_slabs(void) { }
+ static inline int nfs4_state_start(void) { return 0; }
+ static inline void nfs4_state_shutdown(void) { }
+-static inline time_t nfs4_lease_time(void) { return 0; }
+ static inline void nfs4_reset_lease(time_t leasetime) { }
+ static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+ #endif
+@@ -229,6 +227,9 @@ extern struct timeval	nfssvc_boot;
+ 
+ #ifdef CONFIG_NFSD_V4
+ 
++extern time_t nfsd4_lease;
++extern time_t nfsd4_grace;
++
+ /* before processing a COMPOUND operation, we have to check that there
+  * is enough space in the buffer for XDR encode to succeed.  otherwise,
+  * we might process an operation with side effects, and be unable to
+@@ -247,7 +248,6 @@ extern struct timeval	nfssvc_boot;
+ #define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
+ #define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
+ 
+-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
+ #define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
+ 
+ /*
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
+@@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
+ 	struct nfs4_client	*cbs_clp;
+ };
+ 
++struct nfs4_rpc_args {
++	void				*args_op;
++	struct nfsd4_cb_sequence	args_seq;
++};
++
++struct nfsd4_callback {
++	struct nfs4_rpc_args cb_args;
++	struct work_struct cb_work;
++};
++
+ struct nfs4_delegation {
+ 	struct list_head	dl_perfile;
+ 	struct list_head	dl_perclnt;
+@@ -86,6 +96,7 @@ struct nfs4_delegation {
+ 	stateid_t		dl_stateid;
+ 	struct knfsd_fh		dl_fh;
+ 	int			dl_retries;
++	struct nfsd4_callback	dl_recall;
+ };
+ 
+ /* client delegation callback info */
+@@ -96,9 +107,7 @@ struct nfs4_cb_conn {
+ 	u32                     cb_prog;
+ 	u32			cb_minorversion;
+ 	u32                     cb_ident;	/* minorversion 0 only */
+-	/* RPC client info */
+-	atomic_t		cb_set;     /* successful CB_NULL call */
+-	struct rpc_clnt *       cb_client;
++	struct svc_xprt		*cb_xprt;	/* minorversion 1 only */
+ };
+ 
+ /* Maximum number of slots per session. 160 is useful for long haul TCP */
+@@ -157,7 +166,7 @@ struct nfsd4_session {
+ 	struct list_head	se_hash;	/* hash by sessionid */
+ 	struct list_head	se_perclnt;
+ 	u32			se_flags;
+-	struct nfs4_client	*se_client;	/* for expire_client */
++	struct nfs4_client	*se_client;
+ 	struct nfs4_sessionid	se_sessionid;
+ 	struct nfsd4_channel_attrs se_fchannel;
+ 	struct nfsd4_channel_attrs se_bchannel;
+@@ -212,25 +221,41 @@ struct nfs4_client {
+ 	struct svc_cred		cl_cred; 	/* setclientid principal */
+ 	clientid_t		cl_clientid;	/* generated by server */
+ 	nfs4_verifier		cl_confirm;	/* generated by server */
+-	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
+-	atomic_t		cl_count;	/* ref count */
+ 	u32			cl_firststate;	/* recovery dir creation */
+ 
++	/* for v4.0 and v4.1 callbacks: */
++	struct nfs4_cb_conn	cl_cb_conn;
++	struct rpc_clnt		*cl_cb_client;
++	atomic_t		cl_cb_set;
++
+ 	/* for nfs41 */
+ 	struct list_head	cl_sessions;
+ 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_sessionid	cl_sessionid;
++	/* number of rpc's in progress over an associated session: */
++	atomic_t		cl_refcount;
+ 
+ 	/* for nfs41 callbacks */
+ 	/* We currently support a single back channel with a single slot */
+ 	unsigned long		cl_cb_slot_busy;
+ 	u32			cl_cb_seq_nr;
+-	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
+ };
+ 
++static inline void
++mark_client_expired(struct nfs4_client *clp)
++{
++	clp->cl_time = 0;
++}
++
++static inline bool
++is_client_expired(struct nfs4_client *clp)
++{
++	return clp->cl_time == 0;
++}
++
+ /* struct nfs4_client_reset
+  * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
+  * upon lease reset, or from upcall to state_daemon (to read in state
+@@ -377,11 +402,14 @@ extern void nfs4_lock_state(void);
+ extern void nfs4_unlock_state(void);
+ extern int nfs4_in_grace(void);
+ extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
+-extern void put_nfs4_client(struct nfs4_client *clp);
+ extern void nfs4_free_stateowner(struct kref *kref);
+ extern int set_callback_cred(void);
+-extern void nfsd4_probe_callback(struct nfs4_client *clp);
++extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
++extern void nfsd4_do_callback_rpc(struct work_struct *);
+ extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
++extern int nfsd4_create_callback_queue(void);
++extern void nfsd4_destroy_callback_queue(void);
++extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
+ extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+ extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+ extern void nfsd4_init_recdir(char *recdir_name);
+@@ -392,6 +420,7 @@ extern int nfs4_has_reclaimed_state(cons
+ extern void nfsd4_recdir_purge_old(void);
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
++extern void release_session_client(struct nfsd4_session *);
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
+@@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
+ 	struct nfs4_sessionid	sessionid;
+ };
+ 
++struct nfsd4_reclaim_complete {
++	u32 rca_one_fs;
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -421,6 +425,7 @@ struct nfsd4_op {
+ 		struct nfsd4_create_session	create_session;
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
++		struct nfsd4_reclaim_complete	reclaim_complete;
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+@@ -513,9 +518,8 @@ extern void nfsd4_store_cache_entry(stru
+ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+ 		struct nfsd4_sequence *seq);
+ extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
+-		struct nfsd4_compound_state *,
+-struct nfsd4_exchange_id *);
+-		extern __be32 nfsd4_create_session(struct svc_rqst *,
++		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
++extern __be32 nfsd4_create_session(struct svc_rqst *,
+ 		struct nfsd4_compound_state *,
+ 		struct nfsd4_create_session *);
+ extern __be32 nfsd4_sequence(struct svc_rqst *,
+@@ -524,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_
+ extern __be32 nfsd4_destroy_session(struct svc_rqst *,
+ 		struct nfsd4_compound_state *,
+ 		struct nfsd4_destroy_session *);
++__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
+ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
+ 		struct nfsd4_open *open);
+ extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
+@@ -40,12 +40,12 @@ struct nfs_fhbase_old {
+  * This is the new flexible, extensible style NFSv2/v3 file handle.
+  * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
+  *
+- * The file handle is seens as a list of 4byte words.
+- * The first word contains a version number (1) and four descriptor bytes
++ * The file handle starts with a sequence of four-byte words.
++ * The first word contains a version number (1) and three descriptor bytes
+  * that tell how the remaining 3 variable length fields should be handled.
+  * These three bytes are auth_type, fsid_type and fileid_type.
+  *
+- * All 4byte values are in host-byte-order.
++ * All four-byte values are in host-byte-order.
+  *
+  * The auth_type field specifies how the filehandle can be authenticated
+  * This might allow a file to be confirmed to be in a writable part of a
+diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
+--- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
+@@ -49,11 +49,17 @@ static void cache_init(struct cache_head
+ 	h->last_refresh = now;
+ }
+ 
++static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
++{
++	return  (h->expiry_time < get_seconds()) ||
++		(detail->flush_time > h->last_refresh);
++}
++
+ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
+ 				       struct cache_head *key, int hash)
+ {
+ 	struct cache_head **head,  **hp;
+-	struct cache_head *new = NULL;
++	struct cache_head *new = NULL, *freeme = NULL;
+ 
+ 	head = &detail->hash_table[hash];
+ 
+@@ -62,6 +68,9 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+ 		struct cache_head *tmp = *hp;
+ 		if (detail->match(tmp, key)) {
++			if (cache_is_expired(detail, tmp))
++				/* This entry is expired, we will discard it. */
++				break;
+ 			cache_get(tmp);
+ 			read_unlock(&detail->hash_lock);
+ 			return tmp;
+@@ -86,6 +95,13 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+ 		struct cache_head *tmp = *hp;
+ 		if (detail->match(tmp, key)) {
++			if (cache_is_expired(detail, tmp)) {
++				*hp = tmp->next;
++				tmp->next = NULL;
++				detail->entries --;
++				freeme = tmp;
++				break;
++			}
+ 			cache_get(tmp);
+ 			write_unlock(&detail->hash_lock);
+ 			cache_put(new, detail);
+@@ -98,6 +114,8 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	cache_get(new);
+ 	write_unlock(&detail->hash_lock);
+ 
++	if (freeme)
++		cache_put(freeme, detail);
+ 	return new;
+ }
+ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
+@@ -183,10 +201,7 @@ static int cache_make_upcall(struct cach
+ 
+ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
+ {
+-	if (!test_bit(CACHE_VALID, &h->flags) ||
+-	    h->expiry_time < get_seconds())
+-		return -EAGAIN;
+-	else if (detail->flush_time > h->last_refresh)
++	if (!test_bit(CACHE_VALID, &h->flags))
+ 		return -EAGAIN;
+ 	else {
+ 		/* entry is valid */
+diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
+--- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
+@@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
+ 			dprintk("svc: recvfrom returned error %d\n", -err);
+ 			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+ 		}
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		return -EAGAIN;
+ 	}
+ 	len = svc_addr_len(svc_addr(rqstp));
+@@ -562,11 +561,6 @@ static int svc_udp_recvfrom(struct svc_r
+ 	svsk->sk_sk->sk_stamp = skb->tstamp;
+ 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
+ 
+-	/*
+-	 * Maybe more packets - kick another thread ASAP.
+-	 */
+-	svc_xprt_received(&svsk->sk_xprt);
+-
+ 	len  = skb->len - sizeof(struct udphdr);
+ 	rqstp->rq_arg.len = len;
+ 
+@@ -917,7 +911,6 @@ static int svc_tcp_recv_record(struct sv
+ 		if (len < want) {
+ 			dprintk("svc: short recvfrom while reading record "
+ 				"length (%d of %d)\n", len, want);
+-			svc_xprt_received(&svsk->sk_xprt);
+ 			goto err_again; /* record header not complete */
+ 		}
+ 
+@@ -953,7 +946,6 @@ static int svc_tcp_recv_record(struct sv
+ 	if (len < svsk->sk_reclen) {
+ 		dprintk("svc: incomplete TCP record (%d of %d)\n",
+ 			len, svsk->sk_reclen);
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		goto err_again;	/* record not complete */
+ 	}
+ 	len = svsk->sk_reclen;
+@@ -961,10 +953,8 @@ static int svc_tcp_recv_record(struct sv
+ 
+ 	return len;
+  error:
+-	if (len == -EAGAIN) {
++	if (len == -EAGAIN)
+ 		dprintk("RPC: TCP recv_record got EAGAIN\n");
+-		svc_xprt_received(&svsk->sk_xprt);
+-	}
+ 	return len;
+  err_delete:
+ 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+@@ -1110,7 +1100,6 @@ out:
+ 	svsk->sk_tcplen = 0;
+ 
+ 	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
+-	svc_xprt_received(&svsk->sk_xprt);
+ 	if (serv->sv_stats)
+ 		serv->sv_stats->nettcpcnt++;
+ 
+@@ -1119,7 +1108,6 @@ out:
+ err_again:
+ 	if (len == -EAGAIN) {
+ 		dprintk("RPC: TCP recvfrom got EAGAIN\n");
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		return len;
+ 	}
+ error:
+diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
+--- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
+@@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
+ 		if (rqstp->rq_deferred) {
+ 			svc_xprt_received(xprt);
+ 			len = svc_deferred_recv(rqstp);
+-		} else
++		} else {
+ 			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
++			svc_xprt_received(xprt);
++		}
+ 		dprintk("svc: got len=%d\n", len);
+ 	}
+ 
+@@ -893,12 +895,12 @@ void svc_delete_xprt(struct svc_xprt *xp
+ 	 */
+ 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+ 		serv->sv_tmpcnt--;
++	spin_unlock_bh(&serv->sv_lock);
+ 
+ 	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
+ 		kfree(dr);
+ 
+ 	svc_xprt_put(xprt);
+-	spin_unlock_bh(&serv->sv_lock);
+ }
+ 
+ void svc_close_xprt(struct svc_xprt *xprt)
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
+@@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
+ 		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
+ 		rqstp->rq_arg.head[0].iov_len);
+ 
+-	svc_xprt_received(rqstp->rq_xprt);
+ 	return ret;
+ }
+ 
+@@ -665,7 +664,6 @@ int svc_rdma_recvfrom(struct svc_rqst *r
+ 		rqstp->rq_arg.head[0].iov_len);
+ 	rqstp->rq_prot = IPPROTO_MAX;
+ 	svc_xprt_copy_addrs(rqstp, xprt);
+-	svc_xprt_received(xprt);
+ 	return ret;
+ 
+  close_out:
+@@ -678,6 +676,5 @@ int svc_rdma_recvfrom(struct svc_rqst *r
+ 	 */
+ 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ defer:
+-	svc_xprt_received(xprt);
+ 	return 0;
+ }
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
new file mode 100644
index 000000000..a9d78ba0e
--- /dev/null
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -0,0 +1,31788 @@
+diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
+@@ -13,6 +13,7 @@
+ #include <sys/stat.h>
+ #include <sys/mman.h>
+ #include <sys/param.h>
++#include <sys/stat.h>
+ #include "init.h"
+ #include "kern_constants.h"
+ #include "os.h"
+diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
+--- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
+@@ -1009,6 +1009,7 @@ static void disk_release(struct device *
+ struct class block_class = {
+ 	.name		= "block",
+ };
++EXPORT_SYMBOL(block_class);
+ 
+ static char *block_devnode(struct device *dev, mode_t *mode)
+ {
+diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
+@@ -0,0 +1,211 @@
++(c) 2007 Network Appliance Inc.
++
++spNFS
++-----
++
++An spNFS system consists of a Meta Data Server (MDS), a number of Client machines (C) and a number of Data Servers (DS).
++
++A file system is mounted by the clients from the MDS, and all file data
++is striped across the DSs.
++
++Identify the machines that will be filling each of these roles.
++
++The spnfs kernel will be installed on all machines: clients, the MDS and DSs.
++
++
++Building and installing the spNFS kernel
++----------------------------------------
++
++Get the spNFS kernel from:
++
++	git://linux-nfs.org/~bhalevy/linux-pnfs.git
++
++Use the pnfs-all-latest branch and add these options to your .config file
++
++	CONFIG_NETWORK_FILESYSTEMS=y
++	CONFIG_NFS_FS=m
++	CONFIG_NFS_V4=y
++	CONFIG_NFS_V4_1=y
++	CONFIG_PNFS=y
++	CONFIG_NFSD=m
++	CONFIG_PNFSD=y
++	# CONFIG_PNFSD_LOCAL_EXPORT is not set
++	CONFIG_SPNFS=y
++
++By default, spNFS uses whole-file layouts.  Layout segments can be enabled
++by adding:
++
++	CONFIG_SPNFS_LAYOUTSEGMENTS=y
++
++to your .config file.
++
++Building and installation of kernel+modules is as usual.
++This kernel should be installed and booted on the client, MDS and DSs.
++
++Note that CONFIG_PNFSD_LOCAL_EXPORT must be disabled for spnfs as it
++takes over the pnfs export interface.
++
++Building nfs-utils
++------------------
++
++Get the nfs-utils package containing spnfsd from:
++
++	git://linux-nfs.org/~bhalevy/pnfs-nfs-utils.git
++
++Follow the standard instructions for building nfs-utils.
++
++After building, the spnfsd daemon will be located in utils/spnfsd.  The spnfsd
++daemon will only be needed on the MDS.
++
++
++Installation
++------------
++
++The nfs-utils package contains a default spnfsd.conf file in
++utils/spnfsd/spnfsd.conf.  Copy this file to /etc/spnfsd.conf.
++
++By default, the DS-Mount-Directory is set to /spnfs (see spnfsd.conf).  Under
++this directory, mount points must be created for each DS to
++be used for pNFS data stripes.  These mount points are named by the ip address
++of the corresponding DS.  In the sample spnfsd.conf, there are two
++DSs defined (172.16.28.134 and 172.16.28.141).
++
++Following the sample spnfsd.conf,
++
++	mkdir /spnfs
++
++on the MDS (corresponding to DS-Mount-Directory).  Then
++
++	mkdir /spnfs/172.16.28.134
++	mkdir /spnfs/172.16.28.141
++
++to create the mount points for the DSs.
++
++On the DSs, chose a directory where data stripes will be created by the MDS.
++For the sample file, this directory is /pnfs, so on each DS execute:
++
++	mkdir /pnfs
++
++This directory is specified in the spnfsd.conf file by the DS*_ROOT option
++(where * is replaced by the DS number).  DS_ROOT is specified relative to
++the directory being exported by the DSs.  In our example, our DSs are exporting
++the root directory (/) and therefore our DS_ROOT is /pnfs.  On the DSs, we have
++the following entry in /etc/exports:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check)
++
++N.B. If we had created a /exports directory and a /pnfs directory under
++/exports, and if we were exporting /exports, then DS_ROOT would still be /pnfs
++(not /exports/pnfs).
++
++It may be useful to add entries to /etc/fstab on the MDS to automatically
++mount the DS_ROOT file systems.  For this example, our MDS fstab would
++contain:
++
++	172.17.84.128:/pnfs /spnfs/172.17.84.128 nfs    defaults        1 2
++	172.17.84.122:/pnfs /spnfs/172.17.84.122 nfs    defaults        1 2
++
++The DS mounts must be performed manually or via fstab at this time (automatic
++mounting, directory creation, etc. are on the todo list).  To perform I/O
++through the MDS, the DS mounts MUST use NFSv3 at this time (this restriction
++will eventually be removed).
++
++
++On the MDS, choose a file system to use with spNFS and export it, e.g.:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check,pnfs)
++
++Make sure nfsd and all supporting processes are running on the MDS and DSs.
++
++
++Running
++-------
++
++If rpc_pipefs is not already mounted (if you're running idmapd it probably is),
++you may want to add the following line to /etc/fstab:
++
++	rpc_pipefs    /var/lib/nfs/rpc_pipefs rpc_pipefs defaults     0 0
++
++to automatically mount rpc_pipefs.
++
++With spnfsd.conf configured for your environment and the mounts mounted as
++described above, spnfsd can now be started.
++
++On the MDS, execute spnfsd:
++
++	spnfsd
++
++The executable is located in the directory where it was built, and
++may also have been installed elsewhere depending on how you built nfs-utils.
++It will run in the foreground by default, and in fact will do so despite
++any options suggesting the contrary (it's still a debugging build).
++
++On the client, make sure the nfslayoutdriver module is loaded:
++
++	modprobe nfslayoutdriver
++
++Then mount the file system from the MDS:
++
++	mount -t nfs4 -o minorversion=1 mds:/ /mnt
++
++I/O through the MDS is now supported.  To use it, do not load the
++nfslayoutdriver on the client, and mount the MDS using NFSv4 or 4.1
++(NFSv2 and v3 are not yet supported).
++
++You may now use spNFS by performing file system activities in /mnt.
++If you create files in /mnt, you should see stripe files corresponding to
++new files being created on the DSs.  The current implementation names the
++stripe files based on the inode number of the file on the MDS.  For example,
++if you create a file foo in /mnt and do an 'ls -li /mnt/foo':
++
++	# ls -li foo
++	1233 -rw-r--r-- 1 root root 0 Nov 29 15:54 foo
++
++You should see stripe files on each under /pnfs (per the sample) named
++1233.  The file /pnfs/1233 on DS1 will contain the first <stripe size> bytes
++of data written to foo, DS2 will contain the next <stripe size> bytes, etc.
++Removing /mnt/foo will remove the corresponding stripe files on the DSs.
++Other file system operations should behave (mostly :-) as expected.
++
++
++Layout Segments
++---------------
++
++If the kernel is compiled to support layout segments, there will
++be two files created under /proc/fs/spnfs for controlling layout
++segment functionality.
++
++To enable layout segments, write a '1' to /proc/fs/spnfs/layoutseg, e.g.:
++
++	echo 1 > /proc/fs/spnfs/layoutseg
++
++Layout segments can be disabled (returning to whole-file layouts) by
++writing a '0' to /proc/fs/spnfs/layoutseg:
++
++	echo 0 > /proc/fs/spnfs/layoutseg
++
++When layout segments are enabled, the size of the layouts returned can
++be specified by writing a decimal number (ascii representation) to
++/proc/fs/spnfs/layoutsegsize:
++
++	echo 1024 > /proc/fs/spnfs/layoutsegsize
++
++The value'0' has a special meaning--it causes the server to return a
++layout that is exactly the size requested by the client:
++
++	echo 0 > /proc/fs/spnfs/layoutsegsize
++
++
++Troubleshooting
++---------------
++
++If you see data being written to the files on the MDS rather than
++the stripe files, make sure the nfslayoutdriver is loaded on the client
++(see above).
++
++If you get a "permission denied" error, make sure mountd is running on the mds
++(it occasionally fails to start).
++
++Bugs, enhancements, compliments, complaints to: dmuntz@netapp.com
++
++
+diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
+@@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
+ 	return r;
+ }
+ 
++int dm_dev_create(struct dm_ioctl *param)
++{
++	return dev_create(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_create);
++
+ /*
+  * Always use UUID for lookups if it's present, otherwise use name or dev.
+  */
+@@ -745,6 +751,12 @@ static int dev_remove(struct dm_ioctl *p
+ 	return 0;
+ }
+ 
++int dm_dev_remove(struct dm_ioctl *param)
++{
++	return dev_remove(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_remove);
++
+ /*
+  * Check a string doesn't overrun the chunk of
+  * memory we copied from userland.
+@@ -917,6 +929,12 @@ static int do_resume(struct dm_ioctl *pa
+ 	return r;
+ }
+ 
++int dm_do_resume(struct dm_ioctl *param)
++{
++	return do_resume(param);
++}
++EXPORT_SYMBOL(dm_do_resume);
++
+ /*
+  * Set or unset the suspension state of a device.
+  * If the device already is in the requested state we just return its status.
+@@ -1194,6 +1212,12 @@ out:
+ 	return r;
+ }
+ 
++int dm_table_load(struct dm_ioctl *param, size_t param_size)
++{
++	return table_load(param, param_size);
++}
++EXPORT_SYMBOL(dm_table_load);
++
+ static int table_clear(struct dm_ioctl *param, size_t param_size)
+ {
+ 	int r;
+diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
+--- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
+@@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
+ 	put_device(&class_to_shost(dev)->shost_gendev);
+ }
+ 
+-static struct class shost_class = {
++struct class shost_class = {
+ 	.name		= "scsi_host",
+ 	.dev_release	= scsi_host_cls_release,
+ };
+diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
+--- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
+@@ -36,13 +36,9 @@
+ #include <linux/fs.h>
+ #include <linux/time.h>
+ #include <linux/backing-dev.h>
++#include <linux/pnfs_osd_xdr.h>
+ #include "common.h"
+ 
+-/* FIXME: Remove once pnfs hits mainline
+- * #include <linux/exportfs/pnfs_osd_xdr.h>
+- */
+-#include "pnfs.h"
+-
+ #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
+ 
+ #ifdef CONFIG_EXOFS_DEBUG
+@@ -103,6 +99,7 @@ struct exofs_sb_info {
+ struct exofs_i_info {
+ 	struct inode   vfs_inode;          /* normal in-memory inode          */
+ 	wait_queue_head_t i_wq;            /* wait queue for inode            */
++	spinlock_t     i_layout_lock;      /* lock for layout/return/recall   */
+ 	unsigned long  i_flags;            /* various atomic flags            */
+ 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
+ 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
+@@ -166,6 +163,9 @@ static inline unsigned exofs_io_state_si
+  */
+ #define OBJ_2BCREATED	0	/* object will be created soon*/
+ #define OBJ_CREATED	1	/* object has been created on the osd*/
++/* Below are not used atomic but reuse the same i_flags */
++#define OBJ_LAYOUT_IS_GIVEN  2  /* inode has given layouts to clients*/
++#define OBJ_IN_LAYOUT_RECALL 3  /* inode is in the middle of a layout recall*/
+ 
+ static inline int obj_2bcreated(struct exofs_i_info *oi)
+ {
+@@ -304,4 +304,20 @@ extern const struct inode_operations exo
+ extern const struct inode_operations exofs_symlink_inode_operations;
+ extern const struct inode_operations exofs_fast_symlink_inode_operations;
+ 
++/* export.c */
++typedef int (exofs_recall_fn)(struct inode *inode);
++#ifdef CONFIG_PNFSD
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo);
++void exofs_init_export(struct super_block *sb);
++#else
++static inline int exofs_inode_recall_layout(struct inode *inode,
++				enum pnfs_iomode iomode, exofs_recall_fn todo)
++{
++	return todo(inode);
++}
++
++static inline void exofs_init_export(struct super_block *sb) {}
++#endif
++
+ #endif
+diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
+@@ -0,0 +1,396 @@
++/*
++ * export.c - Implementation of the pnfs_export_operations
++ *
++ * Copyright (C) 2009 Panasas Inc.
++ * All rights reserved.
++ *
++ * Boaz Harrosh <bharrosh@panasas.com>
++ *
++ * This file is part of exofs.
++ *
++ * exofs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation.  Since it is based on ext2, and the only
++ * valid version of GPL for the Linux kernel is version 2, the only valid
++ * version of GPL for exofs is version 2.
++ *
++ * exofs is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with exofs; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include "exofs.h"
++
++static int exofs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_OSD2_OBJECTS;
++}
++
++static void set_dev_id(struct pnfs_deviceid *pnfs_devid, u64 sbid, u64 devid)
++{
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)pnfs_devid;
++
++	dev_id->sbid  = sbid;
++	dev_id->devid = devid;
++}
++
++static int cb_layout_recall(struct inode *inode, enum pnfs_iomode iomode,
++			    u64 offset, u64 length, void *cookie)
++{
++	struct nfsd4_pnfs_cb_layout cbl;
++	struct pnfsd_cb_ctl cb_ctl;
++	int status;
++
++	memset(&cb_ctl, 0, sizeof(cb_ctl));
++	status = pnfsd_get_cb_op(&cb_ctl);
++	if (unlikely(status)) {
++		EXOFS_ERR("%s: nfsd unloaded!! inode (0x%lx) status=%d\n",
++			  __func__, inode->i_ino, status);
++		goto err;
++	}
++
++	memset(&cbl, 0, sizeof(cbl));
++	cbl.cbl_recall_type = RETURN_FILE;
++	cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
++	cbl.cbl_seg.iomode = iomode;
++	cbl.cbl_seg.offset = offset;
++	cbl.cbl_seg.length = length;
++	cbl.cbl_cookie = cookie;
++
++	status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
++	pnfsd_put_cb_op(&cb_ctl);
++
++err:
++	return status;
++}
++
++static enum nfsstat4 exofs_layout_get(
++	struct inode *inode,
++	struct exp_xdr_stream *xdr,
++	const struct nfsd4_pnfs_layoutget_arg *args,
++	struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
++	struct exofs_layout *el = &sbi->layout;
++	struct pnfs_osd_object_cred *creds = NULL;
++	struct pnfs_osd_layout layout;
++	__be32 *start;
++	bool in_recall;
++	int i, err;
++	enum nfsstat4 nfserr;
++
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	res->lg_seg.iomode = IOMODE_RW;
++	res->lg_return_on_close = true; /* TODO: unused but will be soon */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	creds = kcalloc(el->s_numdevs, sizeof(*creds), GFP_KERNEL);
++	if (!creds) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto out;
++	}
++
++	/* Fill in a pnfs_osd_layout struct */
++	layout.olo_map = sbi->data_map;
++
++	for (i = 0; i < el->s_numdevs; i++) {
++		struct pnfs_osd_object_cred *cred = &creds[i];
++		osd_id id = exofs_oi_objno(oi);
++		unsigned dev = exofs_layout_od_id(el, id, i);
++
++		set_dev_id(&cred->oc_object_id.oid_device_id, args->lg_sbid,
++			   dev);
++		cred->oc_object_id.oid_partition_id = el->s_pid;
++		cred->oc_object_id.oid_object_id = id;
++		cred->oc_osd_version = osd_dev_is_ver1(el->s_ods[dev]) ?
++						PNFS_OSD_VERSION_1 :
++						PNFS_OSD_VERSION_2;
++		cred->oc_cap_key_sec = PNFS_OSD_CAP_KEY_SEC_NONE;
++
++		cred->oc_cap_key.cred_len	= 0;
++		cred->oc_cap_key.cred		= NULL;
++
++		cred->oc_cap.cred_len	= OSD_CAP_LEN;
++		cred->oc_cap.cred	= oi->i_cred;
++	}
++
++	layout.olo_comps_index = 0;
++	layout.olo_num_comps = el->s_numdevs;
++	layout.olo_comps = creds;
++
++	err = pnfs_osd_xdr_encode_layout(xdr, &layout);
++	if (err) {
++		nfserr = NFS4ERR_TOOSMALL; /* FIXME: Change osd_xdr error codes */
++		goto out;
++	}
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	if (!in_recall) {
++		__set_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		nfserr = NFS4_OK;
++	} else {
++		nfserr = NFS4ERR_RECALLCONFLICT;
++	}
++	spin_unlock(&oi->i_layout_lock);
++
++out:
++	kfree(creds);
++	EXOFS_DBGMSG("(0x%lx) nfserr=%u xdr_bytes=%zu\n",
++		     inode->i_ino, nfserr, exp_xdr_qbytes(xdr->p - start));
++	return nfserr;
++}
++
++/* NOTE: inode mutex must NOT be held */
++static int exofs_layout_commit(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutcommit_arg *args,
++	struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct timespec mtime;
++	loff_t i_size;
++	int in_recall;
++
++	/* In case of a recall we ignore the new size and mtime since they
++	 * are going to be changed again by truncate, and since we cannot take
++	 * the inode lock in that case.
++	 */
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	if (in_recall) {
++		EXOFS_DBGMSG("(0x%lx) commit was called during recall\n",
++			     inode->i_ino);
++		return 0;
++	}
++
++	/* NOTE: I would love to call inode_setattr here
++	 *	 but i cannot since this will cause an eventual vmtruncate,
++	 *	 which will cause a layout_recall. So open code the i_size
++	 *	 and mtime/atime changes under i_mutex.
++	 */
++	mutex_lock_nested(&inode->i_mutex, I_MUTEX_NORMAL);
++
++	if (args->lc_mtime.seconds) {
++		mtime.tv_sec = args->lc_mtime.seconds;
++		mtime.tv_nsec = args->lc_mtime.nseconds;
++
++		/* layout commit may only make time bigger, since there might
++		 * be reordering of the notifications and it might arrive after
++		 * A local change.
++		 * TODO: if mtime > ctime then we know set_attr did an mtime
++		 * in the future. and we can let this update through
++		 */
++		if (0 <= timespec_compare(&mtime, &inode->i_mtime))
++			mtime = inode->i_mtime;
++	} else {
++		mtime = current_fs_time(inode->i_sb);
++	}
++
++	/* TODO: Will below work? since mark_inode_dirty has it's own
++	 *       Time handling
++	 */
++	inode->i_atime = inode->i_mtime = mtime;
++
++	i_size = i_size_read(inode);
++	if (args->lc_newoffset) {
++		loff_t new_size = args->lc_last_wr + 1;
++
++		if (i_size < new_size) {
++			i_size_write(inode, i_size = new_size);
++			res->lc_size_chg = 1;
++			res->lc_newsize = new_size;
++		}
++	}
++	/* TODO: else { i_size = osd_get_object_length() } */
++
++/* TODO: exofs does not currently use the osd_xdr part of the layout_commit */
++
++	mark_inode_dirty_sync(inode);
++
++	mutex_unlock(&inode->i_mutex);
++	EXOFS_DBGMSG("(0x%lx) i_size=0x%llx lcp->off=0x%llx\n",
++		     inode->i_ino, i_size, args->lc_last_wr);
++	return 0;
++}
++
++static void exofs_handle_error(struct pnfs_osd_ioerr *ioerr)
++{
++	EXOFS_ERR("exofs_handle_error: errno=%d is_write=%d obj=0x%llx "
++		  "offset=0x%llx length=0x%llx\n",
++		  ioerr->oer_errno, ioerr->oer_iswrite,
++		  _LLU(ioerr->oer_component.oid_object_id),
++		  _LLU(ioerr->oer_comp_offset),
++		  _LLU(ioerr->oer_comp_length));
++}
++
++static int exofs_layout_return(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	__be32 *p = args->lrf_body;
++	unsigned len = exp_xdr_qwords(args->lrf_body_len);
++
++	EXOFS_DBGMSG("(0x%lx) cookie %p xdr_len %d\n",
++		     inode->i_ino, args->lr_cookie, len);
++
++	while (len >= pnfs_osd_ioerr_xdr_sz()) {
++		struct pnfs_osd_ioerr ioerr;
++
++		p = pnfs_osd_xdr_decode_ioerr(&ioerr, p);
++		len -= pnfs_osd_ioerr_xdr_sz();
++		exofs_handle_error(&ioerr);
++	}
++
++	if (args->lr_cookie) {
++		struct exofs_i_info *oi = exofs_i(inode);
++		bool in_recall;
++
++		spin_lock(&oi->i_layout_lock);
++		in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++		__clear_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		spin_unlock(&oi->i_layout_lock);
++
++		/* TODO: how to communicate cookie with the waiter */
++		if (in_recall)
++			wake_up(&oi->i_wq); /* wakeup any recalls */
++	}
++
++	return 0;
++}
++
++int exofs_get_device_info(struct super_block *sb, struct exp_xdr_stream *xdr,
++			  u32 layout_type,
++			  const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct exofs_sb_info *sbi = sb->s_fs_info;
++	struct pnfs_osd_deviceaddr devaddr;
++	const struct osd_dev_info *odi;
++	u64 devno = devid->devid;
++	__be32 *start;
++	int err;
++
++	memset(&devaddr, 0, sizeof(devaddr));
++
++	if (unlikely(devno >= sbi->layout.s_numdevs))
++		return -ENODEV;
++
++	odi = osduld_device_info(sbi->layout.s_ods[devno]);
++
++	devaddr.oda_systemid.len = odi->systemid_len;
++	devaddr.oda_systemid.data = (void *)odi->systemid; /* !const cast */
++
++	devaddr.oda_osdname.len = odi->osdname_len ;
++	devaddr.oda_osdname.data = (void *)odi->osdname;/* !const cast */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		err = -E2BIG;
++		goto err;
++	}
++
++	err = pnfs_osd_xdr_encode_deviceaddr(xdr, &devaddr);
++	if (err)
++		goto err;
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	EXOFS_DBGMSG("xdr_bytes=%Zu devno=%lld osdname-%s\n",
++		     exp_xdr_qbytes(xdr->p - start), devno, odi->osdname);
++	return 0;
++
++err:
++	EXOFS_DBGMSG("Error: err=%d at_byte=%zu\n",
++		     err, exp_xdr_qbytes(xdr->p - start));
++	return err;
++}
++
++struct pnfs_export_operations exofs_pnfs_ops = {
++	.layout_type	= exofs_layout_type,
++	.layout_get	= exofs_layout_get,
++	.layout_commit	= exofs_layout_commit,
++	.layout_return	= exofs_layout_return,
++	.get_device_info = exofs_get_device_info,
++};
++
++static bool is_layout_returned(struct exofs_i_info *oi)
++{
++	bool layout_given;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	return !layout_given;
++}
++
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	int layout_given;
++	int error = 0;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	__set_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	if (!layout_given)
++		goto exec;
++
++	for (;;) {
++		EXOFS_DBGMSG("(0x%lx) has_layout issue a recall\n",
++			     inode->i_ino);
++		error = cb_layout_recall(inode, iomode, 0, NFS4_MAX_UINT64,
++					 &oi->i_wq);
++		switch (error) {
++		case 0:
++		case -EAGAIN:
++			break;
++		case -ENOENT:
++			goto exec;
++		default:
++			goto err;
++		}
++
++		error = wait_event_interruptible(oi->i_wq,
++						 is_layout_returned(oi));
++		if (error)
++			goto err;
++	}
++
++exec:
++	error = todo(inode);
++
++err:
++	spin_lock(&oi->i_layout_lock);
++	__clear_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	EXOFS_DBGMSG("(0x%lx) return=>%d\n", inode->i_ino, error);
++	return error;
++}
++
++void exofs_init_export(struct super_block *sb)
++{
++	sb->s_pnfs_op = &exofs_pnfs_ops;
++}
+diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
+--- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
+@@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
+ 	if (unlikely(wait_obj_created(oi)))
+ 		goto fail;
+ 
+-	ret = _do_truncate(inode);
++	ret = exofs_inode_recall_layout(inode, IOMODE_ANY, _do_truncate);
+ 	if (ret)
+ 		goto fail;
+ 
+@@ -964,6 +964,7 @@ static void __oi_init(struct exofs_i_inf
+ {
+ 	init_waitqueue_head(&oi->i_wq);
+ 	oi->i_flags = 0;
++	spin_lock_init(&oi->i_layout_lock);
+ }
+ /*
+  * Fill in an inode read from the OSD and set it up for use
+diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
+--- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
+@@ -13,4 +13,5 @@
+ #
+ 
+ exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
++exofs-$(CONFIG_PNFSD) +=  export.o
+ obj-$(CONFIG_EXOFS_FS) += exofs.o
+diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
+--- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
+@@ -1,6 +1,7 @@
+ config EXOFS_FS
+ 	tristate "exofs: OSD based file system support"
+ 	depends on SCSI_OSD_ULD
++	select EXPORTFS_OSD_LAYOUT if PNFSD
+ 	help
+ 	  EXOFS is a file system that uses an OSD storage device,
+ 	  as its backing storage.
+diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
+--- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
+@@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
+ 	sb->s_fs_info = sbi;
+ 	sb->s_op = &exofs_sops;
+ 	sb->s_export_op = &exofs_export_ops;
++	exofs_init_export(sb);
+ 	root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
+ 	if (IS_ERR(root)) {
+ 		EXOFS_ERR("ERROR: exofs_iget failed\n");
+diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
+--- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
+@@ -16,6 +16,13 @@
+ #include <linux/namei.h>
+ #include <linux/sched.h>
+ 
++#if defined(CONFIG_PNFSD)
++struct pnfsd_cb_ctl pnfsd_cb_ctl = {
++	.lock = __SPIN_LOCK_UNLOCKED(pnfsd_cb_ctl.lock)
++};
++EXPORT_SYMBOL(pnfsd_cb_ctl);
++#endif /* CONFIG_PNFSD */
++
+ #define dprintk(fmt, args...) do{}while(0)
+ 
+ 
+diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
+--- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
+@@ -3,4 +3,7 @@
+ 
+ obj-$(CONFIG_EXPORTFS) += exportfs.o
+ 
+-exportfs-objs := expfs.o
++exportfs-y				:= expfs.o
++exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT)	+= nfs4filelayoutxdr.o
++exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
++exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+@@ -0,0 +1,158 @@
++/*
++ *  linux/fs/nfsd/nfs4blocklayoutxdr.c
++ *
++ *
++ *  Created by Rick McNeal on 3/31/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++#include <linux/module.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++static int
++bl_encode_simple(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  12 + 4 + bld->u.simple.bld_sig_len);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u32(p, 1);
++	p = exp_xdr_encode_u64(p, bld->u.simple.bld_offset);
++	exp_xdr_encode_opaque(p, bld->u.simple.bld_sig,
++			      bld->u.simple.bld_sig_len);
++
++	return 0;
++}
++
++static int
++bl_encode_slice(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2 + 2 + 1);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_start);
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_len);
++	exp_xdr_encode_u32(p, bld->u.slice.bld_index);
++
++	return 0;
++}
++
++static int
++bl_encode_concat(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	return -ENOTSUPP;
++}
++
++static int
++bl_encode_stripe(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	int i;
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  2 + 1 + bld->u.stripe.bld_stripes);
++
++	p = exp_xdr_encode_u64(p, bld->u.stripe.bld_chunk_size);
++	p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripes);
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++)
++		p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripe_indexs[i]);
++
++	return 0;
++}
++
++int
++blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			   const struct list_head *volumes)
++{
++	u32				num_vols	= 0,
++					*layoutlen_p	= xdr->p;
++	pnfs_blocklayout_devinfo_t	*bld;
++	int				status		= 0;
++	__be32 *p;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -ETOOSMALL;
++	p += 2;
++
++	/*
++	 * All simple volumes with their signature are required to be listed
++	 * first.
++	 */
++	list_for_each_entry(bld, volumes, bld_list) {
++		num_vols++;
++		p = exp_xdr_reserve_qwords(xdr, 1);
++		if (!p)
++			return -ETOOSMALL;
++		p = exp_xdr_encode_u32(p, bld->bld_type);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				status = bl_encode_simple(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_SLICE:
++				status = bl_encode_slice(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				status = bl_encode_concat(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				status = bl_encode_stripe(xdr, bld);
++				break;
++			default:
++				BUG();
++		}
++		if (status)
++			goto error;
++	}
++
++	/* ---- Fill in the overall length and number of volumes ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (xdr->p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, num_vols);
++
++error:
++	return status;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_devinfo);
++
++enum nfsstat4
++blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++			  const struct list_head *bl_head)
++{
++	struct pnfs_blocklayout_layout	*b;
++	u32				*layoutlen_p	= xdr->p,
++					extents		= 0;
++	__be32 *p;
++
++	/*
++	 * Save spot for opaque block layout length and number of extents,
++	 * fill-in later.
++	 */
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return NFS4ERR_TOOSMALL;
++	p += 2;
++
++	list_for_each_entry(b, bl_head, bll_list) {
++		extents++;
++		p = exp_xdr_reserve_qwords(xdr, 5 * 2 + 1);
++		if (!p)
++			return NFS4ERR_TOOSMALL;
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.sbid);
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.devid);
++		p = exp_xdr_encode_u64(p, b->bll_foff);
++		p = exp_xdr_encode_u64(p, b->bll_len);
++		p = exp_xdr_encode_u64(p, b->bll_soff);
++		p = exp_xdr_encode_u32(p, b->bll_es);
++	}
++
++	/* ---- Fill in the overall length and number of extents ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, extents);
++
++	return NFS4_OK;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+@@ -0,0 +1,218 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/exp_xdr.h>
++#include <linux/module.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++/* We do our-own dprintk so filesystems are not dependent on sunrpc */
++#ifdef dprintk
++#undef dprintk
++#endif
++#define dprintk(fmt, args, ...)	do { } while (0)
++
++/* Calculate the XDR length of the GETDEVICEINFO4resok structure
++ * excluding the gdir_notification and the gdir_device_addr da_layout_type.
++ */
++static int fl_devinfo_xdr_words(const struct pnfs_filelayout_device *fdev)
++{
++	struct pnfs_filelayout_devaddr *fl_addr;
++	struct pnfs_filelayout_multipath *mp;
++	int i, j, nwords;
++
++	/* da_addr_body length, indice length, indices,
++	 * multipath_list4 length */
++	nwords = 1 + 1 + fdev->fl_stripeindices_length + 1;
++	for (i = 0; i < fdev->fl_device_length; i++) {
++		mp = &fdev->fl_device_list[i];
++		nwords++; /* multipath list length */
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			fl_addr = mp->fl_multipath_list;
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_netid.len);
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_addr.len);
++		}
++	}
++	dprintk("<-- %s nwords %d\n", __func__, nwords);
++	return nwords;
++}
++
++/* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++int
++filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			  const struct pnfs_filelayout_device *fdev)
++{
++	unsigned int i, j, len = 0, opaque_words;
++	u32 *p_in;
++	u32 index_count = fdev->fl_stripeindices_length;
++	u32 dev_count = fdev->fl_device_length;
++	int error = 0;
++	__be32 *p;
++
++	opaque_words = fl_devinfo_xdr_words(fdev);
++	dprintk("%s: Begin indx_cnt: %u dev_cnt: %u total size %u\n",
++		__func__,
++		index_count,
++		dev_count,
++		opaque_words*4);
++
++	/* check space for opaque length */
++	p = p_in = exp_xdr_reserve_qwords(xdr, opaque_words);
++	if (!p) {
++		error =  -ETOOSMALL;
++		goto out;
++	}
++
++	/* Fill in length later */
++	p++;
++
++	/* encode device list indices */
++	p = exp_xdr_encode_u32(p, index_count);
++	for (i = 0; i < index_count; i++)
++		p = exp_xdr_encode_u32(p, fdev->fl_stripeindices_list[i]);
++
++	/* encode device list */
++	p = exp_xdr_encode_u32(p, dev_count);
++	for (i = 0; i < dev_count; i++) {
++		struct pnfs_filelayout_multipath *mp = &fdev->fl_device_list[i];
++
++		p = exp_xdr_encode_u32(p, mp->fl_multipath_length);
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			struct pnfs_filelayout_devaddr *da =
++						&mp->fl_multipath_list[j];
++
++			/* Encode device info */
++			p = exp_xdr_encode_opaque(p, da->r_netid.data,
++						     da->r_netid.len);
++			p = exp_xdr_encode_opaque(p, da->r_addr.data,
++						     da->r_addr.len);
++		}
++	}
++
++	/* backfill in length. Subtract 4 for da_addr_body size */
++	len = (char *)p - (char *)p_in;
++	exp_xdr_encode_u32(p_in, len - 4);
++
++	error = 0;
++out:
++	dprintk("%s: End err %d xdrlen %d\n",
++		__func__, error, len);
++	return error;
++}
++EXPORT_SYMBOL(filelayout_encode_devinfo);
++
++/* Encodes the loc_body structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++enum nfsstat4
++filelayout_encode_layout(struct exp_xdr_stream *xdr,
++			 const struct pnfs_filelayout_layout *flp)
++{
++	u32 len = 0, nfl_util, fhlen, i;
++	u32 *layoutlen_p;
++	enum nfsstat4 nfserr;
++	__be32 *p;
++
++	dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
++		__func__,
++		flp->device_id.pnfs_fsid,
++		flp->device_id.pnfs_devid,
++		flp->lg_first_stripe_index,
++		flp->lg_fh_length);
++
++	/* Ensure file system added at least one file handle */
++	if (flp->lg_fh_length <= 0) {
++		dprintk("%s: File Layout has no file handles!!\n", __func__);
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto out;
++	}
++
++	/* Ensure room for len, devid, util, first_stripe_index,
++	 * pattern_offset, number of filehandles */
++	p = layoutlen_p = exp_xdr_reserve_qwords(xdr, 1+2+2+1+1+2+1);
++	if (!p) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	/* save spot for opaque file layout length, fill-in later*/
++	p++;
++
++	/* encode device id */
++	p = exp_xdr_encode_u64(p, flp->device_id.sbid);
++	p = exp_xdr_encode_u64(p, flp->device_id.devid);
++
++	/* set and encode flags */
++	nfl_util = flp->lg_stripe_unit;
++	if (flp->lg_commit_through_mds)
++		nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
++	if (flp->lg_stripe_type == STRIPE_DENSE)
++		nfl_util |= NFL4_UFLG_DENSE;
++	p = exp_xdr_encode_u32(p, nfl_util);
++
++	/* encode first stripe index */
++	p = exp_xdr_encode_u32(p, flp->lg_first_stripe_index);
++
++	/* encode striping pattern start */
++	p = exp_xdr_encode_u64(p, flp->lg_pattern_offset);
++
++	/* encode number of file handles */
++	p = exp_xdr_encode_u32(p, flp->lg_fh_length);
++
++	/* encode file handles */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		fhlen = flp->lg_fh_list[i].fh_size;
++		p = exp_xdr_reserve_space(xdr, 4 + fhlen);
++		if (!p) {
++			nfserr = NFS4ERR_TOOSMALL;
++			goto out;
++		}
++		p = exp_xdr_encode_opaque(p, &flp->lg_fh_list[i].fh_base, fhlen);
++	}
++
++	/* Set number of bytes encoded =  total_bytes_encoded - length var */
++	len = (char *)p - (char *)layoutlen_p;
++	exp_xdr_encode_u32(layoutlen_p, len - 4);
++
++	nfserr = NFS4_OK;
++out:
++	dprintk("%s: End err %u xdrlen %d\n",
++		__func__, nfserr, len);
++	return nfserr;
++}
++EXPORT_SYMBOL(filelayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
+@@ -0,0 +1,289 @@
++/*
++ *  pnfs_osd_xdr_enc.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static int pnfs_osd_xdr_encode_data_map(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_data_map *data_map)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 1+2+1+1+1+1);
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, data_map->odm_num_comps);
++	p = exp_xdr_encode_u64(p, data_map->odm_stripe_unit);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_width);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_depth);
++	p = exp_xdr_encode_u32(p, data_map->odm_mirror_cnt);
++	p = exp_xdr_encode_u32(p, data_map->odm_raid_algorithm);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline int pnfs_osd_xdr_encode_objid(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_objid *object_id)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2+2+2+2);
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)&object_id->oid_device_id;
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u64(p, dev_id->sbid);
++	p = exp_xdr_encode_u64(p, dev_id->devid);
++	p = exp_xdr_encode_u64(p, object_id->oid_partition_id);
++	p = exp_xdr_encode_u64(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * enum pnfs_osd_cap_key_sec4 {
++ * 	PNFS_OSD_CAP_KEY_SEC_NONE = 0,
++ * 	PNFS_OSD_CAP_KEY_SEC_SSV  = 1
++ * };
++ *
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static int pnfs_osd_xdr_encode_object_cred(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_object_cred *olo_comp)
++{
++	__be32 *p;
++	int err;
++
++	err = pnfs_osd_xdr_encode_objid(xdr, &olo_comp->oc_object_id);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_space(xdr, 3*4 + 4+olo_comp->oc_cap.cred_len);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, olo_comp->oc_osd_version);
++
++	/* No sec for now */
++	p = exp_xdr_encode_u32(p, PNFS_OSD_CAP_KEY_SEC_NONE);
++	p = exp_xdr_encode_u32(p, 0); /* opaque oc_capability_key<> */
++
++	exp_xdr_encode_opaque(p, olo_comp->oc_cap.cred,
++			      olo_comp->oc_cap.cred_len);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_layout {
++ * 	struct pnfs_osd_data_map	olo_map;
++ * 	u32				olo_comps_index;
++ * 	u32				olo_num_comps;
++ * 	struct pnfs_osd_object_cred	*olo_comps;
++ * };
++ */
++int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *pol)
++{
++	__be32 *p;
++	u32 i;
++	int err;
++
++	err = pnfs_osd_xdr_encode_data_map(xdr, &pol->olo_map);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, pol->olo_comps_index);
++	p = exp_xdr_encode_u32(p, pol->olo_num_comps);
++
++	for (i = 0; i < pol->olo_num_comps; i++) {
++		err = pnfs_osd_xdr_encode_object_cred(xdr, &pol->olo_comps[i]);
++		if (err)
++			return err;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_layout);
++
++static int _encode_string(struct exp_xdr_stream *xdr,
++			  const struct nfs4_string *str)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr, 4 + str->len);
++
++	if (!p)
++		return -E2BIG;
++	exp_xdr_encode_opaque(p, str->data, str->len);
++	return 0;
++}
++
++/* struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr)
++{
++	__be32 *p;
++	int err;
++
++	p = exp_xdr_reserve_space(xdr, 4 + 4 + sizeof(devaddr->oda_lun));
++	if (!p)
++		return -E2BIG;
++
++	/* Empty oda_targetid */
++	p = exp_xdr_encode_u32(p, OBJ_TARGET_ANON);
++
++	/* Empty oda_targetaddr for now */
++	p = exp_xdr_encode_u32(p, 0);
++
++	/* oda_lun */
++	exp_xdr_encode_bytes(p, devaddr->oda_lun, sizeof(devaddr->oda_lun));
++
++	err = _encode_string(xdr, &devaddr->oda_systemid);
++	if (err)
++		return err;
++
++	err = pnfs_osd_xdr_encode_object_cred(xdr,
++					      &devaddr->oda_root_obj_cred);
++	if (err)
++		return err;
++
++	err = _encode_string(xdr, &devaddr->oda_osdname);
++	if (err)
++		return err;
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_deviceaddr);
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p)
++{
++	lou->dsu_valid = be32_to_cpu(*p++);
++	if (lou->dsu_valid)
++		p = xdr_decode_hyper(p, &lou->dsu_delta);
++	lou->olu_ioerr_flag = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_layoutupdate);
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline __be32 *
++pnfs_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
++{
++	/* FIXME: p = xdr_decode_fixed(...) */
++	memcpy(objid->oid_device_id.data, p, sizeof(objid->oid_device_id.data));
++	p += XDR_QUADLEN(sizeof(objid->oid_device_id.data));
++
++	p = xdr_decode_hyper(p, &objid->oid_partition_id);
++	p = xdr_decode_hyper(p, &objid->oid_object_id);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p)
++{
++	p = pnfs_osd_xdr_decode_objid(p, &ioerr->oer_component);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_offset);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_length);
++	ioerr->oer_iswrite = be32_to_cpu(*p++);
++	ioerr->oer_errno = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
+diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
+--- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
+@@ -19,6 +19,7 @@
+ #include <linux/gfs2_ondisk.h>
+ #include <linux/slow-work.h>
+ #include <linux/quotaops.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "gfs2.h"
+ #include "incore.h"
+@@ -1146,6 +1147,9 @@ static int fill_super(struct super_block
+ 	sb->s_magic = GFS2_MAGIC;
+ 	sb->s_op = &gfs2_super_ops;
+ 	sb->s_export_op = &gfs2_export_ops;
++#if defined(CONFIG_PNFSD)
++	sb->s_pnfs_op = &pnfs_dlm_export_ops;
++#endif /* CONFIG_PNFSD */
+ 	sb->s_xattr = gfs2_xattr_handlers;
+ 	sb->s_qcop = &gfs2_quotactl_ops;
+ 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
+--- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
+@@ -224,6 +224,31 @@ config LOCKD_V4
+ config EXPORTFS
+ 	tristate
+ 
++config EXPORTFS_FILE_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 files layout type.
++	  Must be automatically selected by supporting filesystems.
++
++config EXPORTFS_OSD_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 objects layout type.
++	  Must be automatically selected by supporting osd
++	  filesystems.
++
++	  If unsure, say N.
++
++config EXPORTFS_BLOCK_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 blocks layout type.
++	  Must be automatically selected by supporting filesystems.
++
++
+ config NFS_ACL_SUPPORT
+ 	tristate
+ 	select FS_POSIX_ACL
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
+@@ -0,0 +1,66 @@
++#include <linux/module.h>
++#include <linux/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/sched.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY NFSDBG_PNFS_LD
++
++struct pipefs_list bl_device_list;
++struct dentry *bl_device_pipe;
++
++ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t len)
++{
++	int err;
++	struct pipefs_hdr *msg;
++
++	dprintk("Entering %s...\n", __func__);
++
++	msg = pipefs_readmsg(filp, src, len);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: unable to read pipefs message.\n");
++		return PTR_ERR(msg);
++	}
++
++	/* now assign the result, which wakes the blocked thread */
++	err = pipefs_assign_upcall_reply(msg, &bl_device_list);
++	if (err) {
++		dprintk("ERROR: failed to assign upcall with id %u\n",
++			msg->msgid);
++		kfree(msg);
++	}
++	return len;
++}
++
++static const struct rpc_pipe_ops bl_pipe_ops = {
++	.upcall         = pipefs_generic_upcall,
++	.downcall       = bl_pipe_downcall,
++	.destroy_msg    = pipefs_generic_destroy_msg,
++};
++
++int bl_pipe_init(void)
++{
++	dprintk("%s: block_device pipefs registering...\n", __func__);
++	bl_device_pipe = pipefs_mkpipe("bl_device_pipe", &bl_pipe_ops, 1);
++	if (IS_ERR(bl_device_pipe))
++		dprintk("ERROR, unable to make block_device pipe\n");
++
++	if (!bl_device_pipe)
++		dprintk("bl_device_pipe is NULL!\n");
++	else
++	dprintk("bl_device_pipe created!\n");
++	pipefs_init_list(&bl_device_list);
++	return 0;
++}
++
++void bl_pipe_exit(void)
++{
++	dprintk("%s: block_device pipefs unregistering...\n", __func__);
++	if (IS_ERR(bl_device_pipe))
++		return ;
++	pipefs_closepipe(bl_device_pipe);
++	return;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
+@@ -0,0 +1,1160 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++
++#include <linux/buffer_head.h> /* various write calls */
++#include <linux/bio.h> /* struct bio */
++#include <linux/vmalloc.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
++
++/* Callback operations to the pNFS client */
++static struct pnfs_client_operations *pnfs_block_callback_ops;
++
++static void print_page(struct page *page)
++{
++	dprintk("PRINTPAGE page %p\n", page);
++	dprintk("        PagePrivate %d\n", PagePrivate(page));
++	dprintk("        PageUptodate %d\n", PageUptodate(page));
++	dprintk("        PageError %d\n", PageError(page));
++	dprintk("        PageDirty %d\n", PageDirty(page));
++	dprintk("        PageReferenced %d\n", PageReferenced(page));
++	dprintk("        PageLocked %d\n", PageLocked(page));
++	dprintk("        PageWriteback %d\n", PageWriteback(page));
++	dprintk("        PageMappedToDisk %d\n", PageMappedToDisk(page));
++	dprintk("\n");
++}
++
++/* Given the be associated with isect, determine if page data needs to be
++ * initialized.
++ */
++static int is_hole(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_NONE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return !is_sector_initialized(be->be_inval, isect);
++}
++
++/* Given the be associated with isect, determine if page data can be
++ * written to disk.
++ */
++static int is_writable(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return is_sector_initialized(be->be_inval, isect);
++}
++
++static int
++dont_like_caller(struct nfs_page *req)
++{
++	if (atomic_read(&req->wb_complete)) {
++		/* Called by _multi */
++		return 1;
++	} else {
++		/* Called by _one */
++		return 0;
++	}
++}
++
++static enum pnfs_try_status
++bl_commit(struct nfs_write_data *nfs_data,
++	  int sync)
++{
++	dprintk("%s enter\n", __func__);
++	return PNFS_NOT_ATTEMPTED;
++}
++
++/* The data we are handed might be spread across several bios.  We need
++ * to track when the last one is finished.
++ */
++struct parallel_io {
++	struct kref refcnt;
++	struct rpc_call_ops call_ops;
++	void (*pnfs_callback) (void *data);
++	void *data;
++};
++
++static inline struct parallel_io *alloc_parallel(void *data)
++{
++	struct parallel_io *rv;
++
++	rv  = kmalloc(sizeof(*rv), GFP_KERNEL);
++	if (rv) {
++		rv->data = data;
++		kref_init(&rv->refcnt);
++	}
++	return rv;
++}
++
++static inline void get_parallel(struct parallel_io *p)
++{
++	kref_get(&p->refcnt);
++}
++
++static void destroy_parallel(struct kref *kref)
++{
++	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
++
++	dprintk("%s enter\n", __func__);
++	p->pnfs_callback(p->data);
++	kfree(p);
++}
++
++static inline void put_parallel(struct parallel_io *p)
++{
++	kref_put(&p->refcnt, destroy_parallel);
++}
++
++static struct bio *
++bl_submit_bio(int rw, struct bio *bio)
++{
++	if (bio) {
++		get_parallel(bio->bi_private);
++		dprintk("%s submitting %s bio %u@%llu\n", __func__,
++			rw == READ ? "read" : "write",
++			bio->bi_size, (u64)bio->bi_sector);
++		submit_bio(rw, bio);
++	}
++	return NULL;
++}
++
++static inline void
++bl_done_with_rpage(struct page *page, const int ok)
++{
++	if (ok) {
++		ClearPagePnfsErr(page);
++		SetPageUptodate(page);
++	} else {
++		ClearPageUptodate(page);
++		SetPageError(page);
++		SetPagePnfsErr(page);
++	}
++	/* Page is unlocked via rpc_release.  Should really be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_read(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_rpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++static void bl_read_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	pnfs_block_callback_ops->nfs_readlist_complete(rdata);
++}
++
++static void
++bl_end_par_io_read(void *data)
++{
++	struct nfs_read_data *rdata = data;
++
++	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
++	schedule_work(&rdata->task.u.tk_work);
++}
++
++/* We don't want normal .rpc_call_done callback used, so we replace it
++ * with this stub.
++ */
++static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
++{
++	return;
++}
++
++static enum pnfs_try_status
++bl_read_pagelist(struct nfs_read_data *rdata,
++		 unsigned nr_pages)
++{
++	int i, hole;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t f_offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct page **pages = rdata->args.pages;
++	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
++	       nr_pages, f_offset, count);
++
++	if (dont_like_caller(rdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		goto use_mds;
++	}
++	if ((nr_pages == 1) && PagePnfsErr(rdata->req->wb_page)) {
++		/* We want to fall back to mds in case of read_page
++		 * after error on read_pages.
++		 */
++		dprintk("%s PG_pnfserr set\n", __func__);
++		goto use_mds;
++	}
++	par = alloc_parallel(rdata);
++	if (!par)
++		goto use_mds;
++	par->call_ops = *rdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_read;
++	/* At this point, we can no longer jump to use_mds */
++
++	isect = (sector_t) (f_offset >> 9);
++	/* Code assumes extents are page-aligned */
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			put_extent(cow_read);
++			bio = bl_submit_bio(READ, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(rdata->pdata.lseg),
++					     isect, &cow_read);
++			if (!be) {
++				/* Error out this page */
++				bl_done_with_rpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++			if (cow_read) {
++				sector_t cow_length = cow_read->be_length -
++					(isect - cow_read->be_f_offset);
++				extent_length = min(extent_length, cow_length);
++			}
++		}
++		hole = is_hole(be, isect);
++		if (hole && !cow_read) {
++			bio = bl_submit_bio(READ, bio);
++			/* Fill hole w/ zeroes w/o accessing device */
++			dprintk("%s Zeroing page for hole\n", __func__);
++			zero_user(pages[i], 0,
++				  min_t(int, PAGE_CACHE_SIZE, count));
++			print_page(pages[i]);
++			bl_done_with_rpage(pages[i], 1);
++		} else {
++			struct pnfs_block_extent *be_read;
++
++			be_read = (hole && cow_read) ? cow_read : be;
++			for (;;) {
++				if (!bio) {
++					bio = bio_alloc(GFP_NOIO, nr_pages - i);
++					if (!bio) {
++						/* Error out this page */
++						bl_done_with_rpage(pages[i], 0);
++						break;
++					}
++					bio->bi_sector = isect -
++						be_read->be_f_offset +
++						be_read->be_v_offset;
++					bio->bi_bdev = be_read->be_mdev;
++					bio->bi_end_io = bl_end_io_read;
++					bio->bi_private = par;
++				}
++				if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++					break;
++				bio = bl_submit_bio(READ, bio);
++			}
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	if ((isect << 9) >= rdata->inode->i_size) {
++		rdata->res.eof = 1;
++		rdata->res.count = rdata->inode->i_size - f_offset;
++	} else {
++		rdata->res.count = (isect << 9) - f_offset;
++	}
++	put_extent(be);
++	put_extent(cow_read);
++	bl_submit_bio(READ, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++
++ use_mds:
++	dprintk("Giving up and using normal NFS\n");
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static void mark_extents_written(struct pnfs_block_layout *bl,
++				 __u64 offset, __u32 count)
++{
++	sector_t isect, end;
++	struct pnfs_block_extent *be;
++
++	dprintk("%s(%llu, %u)\n", __func__, offset, count);
++	if (count == 0)
++		return;
++	isect = (offset & (long)(PAGE_CACHE_MASK)) >> 9;
++	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
++	end >>= 9;
++	while (isect < end) {
++		sector_t len;
++		be = find_get_extent(bl, isect, NULL);
++		BUG_ON(!be); /* FIXME */
++		len = min(end, be->be_f_offset + be->be_length) - isect;
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			mark_for_commit(be, isect, len); /* What if fails? */
++		isect += len;
++		put_extent(be);
++	}
++}
++
++/* STUB - this needs thought */
++static inline void
++bl_done_with_wpage(struct page *page, const int ok)
++{
++	if (!ok) {
++		SetPageError(page);
++		SetPagePnfsErr(page);
++		/* This is an inline copy of nfs_zap_mapping */
++		/* This is oh so fishy, and needs deep thought */
++		if (page->mapping->nrpages != 0) {
++			struct inode *inode = page->mapping->host;
++			spin_lock(&inode->i_lock);
++			NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	/* end_page_writeback called in rpc_release.  Should be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_write(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_wpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++/* Function scheduled for call during bl_end_par_io_write,
++ * it marks sectors as written and extends the commitlist.
++ */
++static void bl_write_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	if (!wdata->task.tk_status) {
++		/* Marks for LAYOUTCOMMIT */
++		/* BUG - this should be called after each bio, not after
++		 * all finish, unless have some way of storing success/failure
++		 */
++		mark_extents_written(BLK_LSEG2EXT(wdata->pdata.lseg),
++				     wdata->args.offset, wdata->args.count);
++	}
++	pnfs_block_callback_ops->nfs_writelist_complete(wdata);
++}
++
++/* Called when last of bios associated with a bl_write_pagelist call finishes */
++static void
++bl_end_par_io_write(void *data)
++{
++	struct nfs_write_data *wdata = data;
++
++	/* STUB - ignoring error handling */
++	wdata->task.tk_status = 0;
++	wdata->verf.committed = NFS_FILE_SYNC;
++	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
++	schedule_work(&wdata->task.u.tk_work);
++}
++
++static enum pnfs_try_status
++bl_write_pagelist(struct nfs_write_data *wdata,
++		  unsigned nr_pages,
++		  int sync)
++{
++	int i;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t offset = wdata->args.offset;
++	size_t count = wdata->args.count;
++	struct page **pages = wdata->args.pages;
++	int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
++	if (!wdata->req->wb_lseg) {
++		dprintk("%s no lseg, falling back to MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	if (dont_like_caller(wdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
++	 * We want to write each, and if there is an error remove it from
++	 * list and call
++	 * nfs_retry_request(req) to have it redone using nfs.
++	 * QUEST? Do as block or per req?  Think have to do per block
++	 * as part of end_bio
++	 */
++	par = alloc_parallel(wdata);
++	if (!par)
++		return PNFS_NOT_ATTEMPTED;
++	par->call_ops = *wdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_write;
++	/* At this point, have to be more careful with error handling */
++
++	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> 9);
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			bio = bl_submit_bio(WRITE, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(wdata->pdata.lseg),
++					     isect, NULL);
++			if (!be || !is_writable(be, isect)) {
++				/* FIXME */
++				bl_done_with_wpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++		}
++		for (;;) {
++			if (!bio) {
++				bio = bio_alloc(GFP_NOIO, nr_pages - i);
++				if (!bio) {
++					/* Error out this page */
++					/* FIXME */
++					bl_done_with_wpage(pages[i], 0);
++					break;
++				}
++				bio->bi_sector = isect - be->be_f_offset +
++					be->be_v_offset;
++				bio->bi_bdev = be->be_mdev;
++				bio->bi_end_io = bl_end_io_write;
++				bio->bi_private = par;
++			}
++			if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++				break;
++			bio = bl_submit_bio(WRITE, bio);
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	wdata->res.count = (isect << 9) - (offset & (long)PAGE_CACHE_MASK);
++	put_extent(be);
++	bl_submit_bio(WRITE, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++}
++
++/* FIXME - range ignored */
++static void
++release_extents(struct pnfs_block_layout *bl,
++		struct nfs4_pnfs_layout_segment *range)
++{
++	int i;
++	struct pnfs_block_extent *be;
++
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		while (!list_empty(&bl->bl_extents[i])) {
++			be = list_first_entry(&bl->bl_extents[i],
++					      struct pnfs_block_extent,
++					      be_node);
++			list_del(&be->be_node);
++			put_extent(be);
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++}
++
++static void
++release_inval_marks(struct pnfs_inval_markings *marks)
++{
++	struct pnfs_inval_tracking *pos, *temp;
++
++	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
++		list_del(&pos->it_link);
++		kfree(pos);
++	}
++	return;
++}
++
++/* Note we are relying on caller locking to prevent nasty races. */
++static void
++bl_free_layout(struct pnfs_layout_type *lo)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++
++	dprintk("%s enter\n", __func__);
++	release_extents(bl, NULL);
++	release_inval_marks(&bl->bl_inval);
++	kfree(bl);
++}
++
++static struct pnfs_layout_type *
++bl_alloc_layout(struct inode *inode)
++{
++	struct pnfs_block_layout	*bl;
++
++	dprintk("%s enter\n", __func__);
++	bl = kzalloc(sizeof(*bl), GFP_KERNEL);
++	if (!bl)
++		return NULL;
++	spin_lock_init(&bl->bl_ext_lock);
++	INIT_LIST_HEAD(&bl->bl_extents[0]);
++	INIT_LIST_HEAD(&bl->bl_extents[1]);
++	INIT_LIST_HEAD(&bl->bl_commit);
++	bl->bl_count = 0;
++	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> 9;
++	INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
++	return &bl->bl_layout;
++}
++
++static void
++bl_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter\n", __func__);
++	kfree(lseg);
++}
++
++/* Because the generic infrastructure does not correctly merge layouts,
++ * we pretty much ignore lseg, and store all data layout wide, so we
++ * can correctly merge.  Eventually we should push some correct merge
++ * behavior up to the generic code, as the current behavior tends to
++ * cause lots of unnecessary overlapping LAYOUTGET requests.
++ */
++static struct pnfs_layout_segment *
++bl_alloc_lseg(struct pnfs_layout_type *lo,
++	      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct pnfs_layout_segment *lseg;
++	int status;
++
++	dprintk("%s enter\n", __func__);
++	lseg = kzalloc(sizeof(*lseg) + 0, GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++	status = nfs4_blk_process_layoutget(lo, lgr);
++	if (status) {
++		/* We don't want to call the full-blown bl_free_lseg,
++		 * since on error extents were not touched.
++		 */
++		/* STUB - we really want to distinguish between 2 error
++		 * conditions here.  This lseg failed, but lo data structures
++		 * are OK, or we hosed the lo data structures.  The calling
++		 * code probably needs to distinguish this too.
++		 */
++		kfree(lseg);
++		return ERR_PTR(status);
++	}
++	return lseg;
++}
++
++static int
++bl_setup_layoutcommit(struct pnfs_layout_type *lo,
++		      struct pnfs_layoutcommit_arg *arg)
++{
++	struct nfs_server *nfss = PNFS_NFS_SERVER(lo);
++	struct bl_layoutupdate_data *layoutupdate_data;
++
++	dprintk("%s enter\n", __func__);
++	/* Need to ensure commit is block-size aligned */
++	if (nfss->pnfs_blksize) {
++		u64 mask = nfss->pnfs_blksize - 1;
++		u64 offset = arg->lseg.offset & mask;
++
++		arg->lseg.offset -= offset;
++		arg->lseg.length += offset + mask;
++		arg->lseg.length &= ~mask;
++	}
++
++	layoutupdate_data = kmalloc(sizeof(struct bl_layoutupdate_data),
++					 GFP_KERNEL);
++	if (unlikely(!layoutupdate_data))
++		return -ENOMEM;
++	INIT_LIST_HEAD(&layoutupdate_data->ranges);
++	arg->layoutdriver_data = layoutupdate_data;
++
++	return 0;
++}
++
++static void
++bl_encode_layoutcommit(struct pnfs_layout_type *lo, struct xdr_stream *xdr,
++		       const struct pnfs_layoutcommit_arg *arg)
++{
++	dprintk("%s enter\n", __func__);
++	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
++}
++
++static void
++bl_cleanup_layoutcommit(struct pnfs_layout_type *lo,
++			struct pnfs_layoutcommit_arg *arg, int status)
++{
++	dprintk("%s enter\n", __func__);
++	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), arg, status);
++	kfree(arg->layoutdriver_data);
++}
++
++static void free_blk_mountid(struct block_mount_id *mid)
++{
++	if (mid) {
++		struct pnfs_block_dev *dev;
++		spin_lock(&mid->bm_lock);
++		while (!list_empty(&mid->bm_devlist)) {
++			dev = list_first_entry(&mid->bm_devlist,
++					       struct pnfs_block_dev,
++					       bm_node);
++			list_del(&dev->bm_node);
++			free_block_dev(dev);
++		}
++		spin_unlock(&mid->bm_lock);
++		kfree(mid);
++	}
++}
++
++/* This is mostly copied form the filelayout's get_device_info function.
++ * It seems much of this should be at the generic pnfs level.
++ */
++static struct pnfs_block_dev *
++nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
++			struct pnfs_deviceid *d_id,
++			struct list_head *sdlist)
++{
++	struct pnfs_device *dev;
++	struct pnfs_block_dev *rv = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	int i, rc;
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s max_resp_sz %u max_pages %d\n",
++		__func__, max_resp_sz, max_pages);
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev) {
++		dprintk("%s kmalloc failed\n", __func__);
++		return NULL;
++	}
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(dev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set dev->area */
++	dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!dev->area)
++		goto out_free;
++
++	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
++	dev->layout_type = LAYOUT_BLOCK_VOLUME;
++	dev->dev_notify_types = 0;
++	dev->pages = pages;
++	dev->pgbase = 0;
++	dev->pglen = PAGE_SIZE * max_pages;
++	dev->mincount = 0;
++
++	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
++	rc = pnfs_block_callback_ops->nfs_getdeviceinfo(server, dev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	rv = nfs4_blk_decode_device(server, dev, sdlist);
++ out_free:
++	if (dev->area != NULL)
++		vunmap(dev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(dev);
++	return rv;
++}
++
++
++/*
++ * Retrieve the list of available devices for the mountpoint.
++ */
++static int
++bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
++{
++	struct block_mount_id *b_mt_id = NULL;
++	struct pnfs_mount_type *mtype = NULL;
++	struct pnfs_devicelist *dlist = NULL;
++	struct pnfs_block_dev *bdev;
++	LIST_HEAD(block_disklist);
++	int status = 0, i;
++
++	dprintk("%s enter\n", __func__);
++
++	if (server->pnfs_blksize == 0) {
++		dprintk("%s Server did not return blksize\n", __func__);
++		return -EINVAL;
++	}
++	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
++	if (!b_mt_id) {
++		status = -ENOMEM;
++		goto out_error;
++	}
++	/* Initialize nfs4 block layout mount id */
++	spin_lock_init(&b_mt_id->bm_lock);
++	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
++
++	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
++	if (!dlist)
++		goto out_error;
++	dlist->eof = 0;
++	while (!dlist->eof) {
++		status = pnfs_block_callback_ops->nfs_getdevicelist(
++							server, fh, dlist);
++		if (status)
++			goto out_error;
++		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
++			__func__, dlist->num_devs, dlist->eof);
++		/* For each device returned in dlist, call GETDEVICEINFO, and
++		 * decode the opaque topology encoding to create a flat
++		 * volume topology, matching VOLUME_SIMPLE disk signatures
++		 * to disks in the visible block disk list.
++		 * Construct an LVM meta device from the flat volume topology.
++		 */
++		for (i = 0; i < dlist->num_devs; i++) {
++			bdev = nfs4_blk_get_deviceinfo(server, fh,
++						     &dlist->dev_id[i],
++						     &block_disklist);
++			if (!bdev)
++				goto out_error;
++			spin_lock(&b_mt_id->bm_lock);
++			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
++			spin_unlock(&b_mt_id->bm_lock);
++		}
++	}
++	dprintk("%s SUCCESS\n", __func__);
++	server->pnfs_ld_data = b_mt_id;
++
++ out_return:
++	kfree(dlist);
++	return status;
++
++ out_error:
++	free_blk_mountid(b_mt_id);
++	kfree(mtype);
++	goto out_return;
++}
++
++static int
++bl_uninitialize_mountpoint(struct nfs_server *server)
++{
++	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
++
++	dprintk("%s enter\n", __func__);
++	free_blk_mountid(b_mt_id);
++	dprintk("%s RETURNS\n", __func__);
++	return 0;
++}
++
++/* STUB - mark intersection of layout and page as bad, so is not
++ * used again.
++ */
++static void mark_bad_read(void)
++{
++	return;
++}
++
++/* Copied from buffer.c */
++static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
++{
++	if (uptodate) {
++		set_buffer_uptodate(bh);
++	} else {
++		/* This happens, due to failed READA attempts. */
++		clear_buffer_uptodate(bh);
++	}
++	unlock_buffer(bh);
++}
++
++/* Copied from buffer.c */
++static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
++{
++	__end_buffer_read_notouch(bh, uptodate);
++}
++
++/*
++ * map_block:  map a requested I/0 block (isect) into an offset in the LVM
++ * meta block_device
++ */
++static void
++map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
++{
++	dprintk("%s enter be=%p\n", __func__, be);
++
++	set_buffer_mapped(bh);
++	bh->b_bdev = be->be_mdev;
++	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
++		(be->be_mdev->bd_inode->i_blkbits - 9);
++
++	dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
++				__func__, (long)isect,
++				(long)bh->b_blocknr,
++				bh->b_size);
++	return;
++}
++
++/* Given an unmapped page, zero it (or read in page for COW),
++ * and set appropriate flags/markings, but it is safe to not initialize
++ * the range given in [from, to).
++ */
++/* This is loosely based on nobh_write_begin */
++static int
++init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
++		    unsigned from, unsigned to, sector_t **pages_to_mark)
++{
++	struct buffer_head *bh;
++	int inval, ret = -EIO;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect;
++
++	dprintk("%s enter, %p\n", __func__, page);
++	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
++	if (!bh) {
++		ret = -ENOMEM;
++		goto cleanup;
++	}
++
++	isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
++	be = find_get_extent(bl, isect, &cow_read);
++	if (!be)
++		goto cleanup;
++	inval = is_hole(be, isect);
++	dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
++	if (inval) {
++		if (be->be_state == PNFS_BLOCK_NONE_DATA) {
++			dprintk("%s PANIC - got NONE_DATA extent %p\n",
++				__func__, be);
++			goto cleanup;
++		}
++		map_block(isect, be, bh);
++		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
++	}
++	if (PageUptodate(page)) {
++		/* Do nothing */
++	} else if (inval & !cow_read) {
++		zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
++	} else if (0 < from || PAGE_CACHE_SIZE > to) {
++		struct pnfs_block_extent *read_extent;
++
++		read_extent = (inval && cow_read) ? cow_read : be;
++		map_block(isect, read_extent, bh);
++		lock_buffer(bh);
++		bh->b_end_io = end_buffer_read_nobh;
++		submit_bh(READ, bh);
++		dprintk("%s: Waiting for buffer read\n", __func__);
++		/* XXX Don't really want to hold layout lock here */
++		wait_on_buffer(bh);
++		if (!buffer_uptodate(bh))
++			goto cleanup;
++	}
++	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		/* There is a BUG here if is a short copy after write_begin,
++		 * but I think this is a generic fs bug.  The problem is that
++		 * we have marked the page as initialized, but it is possible
++		 * that the section not copied may never get copied.
++		 */
++		ret = mark_initialized_sectors(be->be_inval, isect,
++					       PAGE_CACHE_SECTORS,
++					       pages_to_mark);
++		/* Want to preallocate mem so above can't fail */
++		if (ret)
++			goto cleanup;
++	}
++	SetPageMappedToDisk(page);
++	ret = 0;
++
++cleanup:
++	free_buffer_head(bh);
++	put_extent(be);
++	put_extent(cow_read);
++	if (ret) {
++		/* Need to mark layout with bad read...should now
++		 * just use nfs4 for reads and writes.
++		 */
++		mark_bad_read();
++	}
++	return ret;
++}
++
++static int
++bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
++	       unsigned count, struct pnfs_fsdata *fsdata)
++{
++	unsigned from, to;
++	int ret;
++	sector_t *pages_to_mark = NULL;
++	struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
++
++	dprintk("%s enter, %u@%lld\n", __func__, count, pos);
++	print_page(page);
++	/* The following code assumes blocksize >= PAGE_CACHE_SIZE */
++	if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
++		dprintk("%s Can't handle blocksize %llu\n", __func__,
++			(u64)bl->bl_blocksize);
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		return 0;
++	}
++	if (PageMappedToDisk(page)) {
++		/* Basically, this is a flag that says we have
++		 * successfully called write_begin already on this page.
++		 */
++		/* NOTE - there are cache consistency issues here.
++		 * For example, what if the layout is recalled, then regained?
++		 * If the file is closed and reopened, will the page flags
++		 * be reset?  If not, we'll have to use layout info instead of
++		 * the page flag.
++		 */
++		return 0;
++	}
++	from = pos & (PAGE_CACHE_SIZE - 1);
++	to = from + count;
++	ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
++	if (ret) {
++		dprintk("%s init page failed with %i", __func__, ret);
++		/* Revert back to plain NFS and just continue on with
++		 * write.  This assumes there is no request attached, which
++		 * should be true if we get here.
++		 */
++		BUG_ON(PagePrivate(page));
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		kfree(pages_to_mark);
++		ret = 0;
++	} else {
++		fsdata->private = pages_to_mark;
++	}
++	return ret;
++}
++
++/* CAREFUL - what happens if copied < count??? */
++static int
++bl_write_end(struct inode *inode, struct page *page, loff_t pos,
++	     unsigned count, unsigned copied, struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter, %u@%lld, lseg=%p\n", __func__, count, pos, lseg);
++	print_page(page);
++	if (lseg)
++		SetPageUptodate(page);
++	return 0;
++}
++
++/* Return any memory allocated to fsdata->private, and take advantage
++ * of no page locks to mark pages noted in write_begin as needing
++ * initialization.
++ */
++static void
++bl_write_end_cleanup(struct file *filp, struct pnfs_fsdata *fsdata)
++{
++	struct page *page;
++	pgoff_t index;
++	sector_t *pos;
++	struct address_space *mapping = filp->f_mapping;
++	struct pnfs_fsdata *fake_data;
++	struct pnfs_layout_segment *lseg;
++
++	if (!fsdata)
++		return;
++	lseg = fsdata->lseg;
++	if (!lseg)
++		return;
++	pos = fsdata->private;
++	if (!pos)
++		return;
++	dprintk("%s enter with pos=%llu\n", __func__, (u64)(*pos));
++	for (; *pos != ~0; pos++) {
++		index = *pos >> (PAGE_CACHE_SHIFT - 9);
++		/* XXX How do we properly deal with failures here??? */
++		page = grab_cache_page_write_begin(mapping, index, 0);
++		if (!page) {
++			printk(KERN_ERR "%s BUG BUG BUG NoMem\n", __func__);
++			continue;
++		}
++		dprintk("%s: Examining block page\n", __func__);
++		print_page(page);
++		if (!PageMappedToDisk(page)) {
++			/* XXX How do we properly deal with failures here??? */
++			dprintk("%s Marking block page\n", __func__);
++			init_page_for_write(BLK_LSEG2EXT(fsdata->lseg), page,
++					    PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
++					    NULL);
++			print_page(page);
++			fake_data = kzalloc(sizeof(*fake_data), GFP_KERNEL);
++			if (!fake_data) {
++				printk(KERN_ERR "%s BUG BUG BUG NoMem\n",
++				       __func__);
++				unlock_page(page);
++				continue;
++			}
++			get_lseg(lseg);
++			fake_data->lseg = lseg;
++			fake_data->bypass_eof = 1;
++			mapping->a_ops->write_end(filp, mapping,
++						  index << PAGE_CACHE_SHIFT,
++						  PAGE_CACHE_SIZE,
++						  PAGE_CACHE_SIZE,
++						  page, fake_data);
++			/* Note fake_data is freed by nfs_write_end */
++		} else
++			unlock_page(page);
++	}
++	kfree(fsdata->private);
++	fsdata->private = NULL;
++}
++
++static ssize_t
++bl_get_stripesize(struct pnfs_layout_type *lo)
++{
++	dprintk("%s enter\n", __func__);
++	return 0;
++}
++
++/* This is called by nfs_can_coalesce_requests via nfs_pageio_do_add_request.
++ * Should return False if there is a reason requests can not be coalesced,
++ * otherwise, should default to returning True.
++ */
++static int
++bl_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++	   struct nfs_page *req)
++{
++	dprintk("%s enter\n", __func__);
++	if (pgio->pg_iswrite)
++		return prev->wb_lseg == req->wb_lseg;
++	else
++		return 1;
++}
++
++static struct layoutdriver_io_operations blocklayout_io_operations = {
++	.commit				= bl_commit,
++	.read_pagelist			= bl_read_pagelist,
++	.write_pagelist			= bl_write_pagelist,
++	.write_begin			= bl_write_begin,
++	.write_end			= bl_write_end,
++	.write_end_cleanup		= bl_write_end_cleanup,
++	.alloc_layout			= bl_alloc_layout,
++	.free_layout			= bl_free_layout,
++	.alloc_lseg			= bl_alloc_lseg,
++	.free_lseg			= bl_free_lseg,
++	.setup_layoutcommit		= bl_setup_layoutcommit,
++	.encode_layoutcommit		= bl_encode_layoutcommit,
++	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
++	.initialize_mountpoint		= bl_initialize_mountpoint,
++	.uninitialize_mountpoint	= bl_uninitialize_mountpoint,
++};
++
++static struct layoutdriver_policy_operations blocklayout_policy_operations = {
++	.get_stripesize			= bl_get_stripesize,
++	.pg_test			= bl_pg_test,
++};
++
++static struct pnfs_layoutdriver_type blocklayout_type = {
++	.id = LAYOUT_BLOCK_VOLUME,
++	.name = "LAYOUT_BLOCK_VOLUME",
++	.ld_io_ops = &blocklayout_io_operations,
++	.ld_policy_ops = &blocklayout_policy_operations,
++};
++
++static int __init nfs4blocklayout_init(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
++
++	pnfs_block_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
++	bl_pipe_init();
++	return 0;
++}
++
++static void __exit nfs4blocklayout_exit(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
++	       __func__);
++
++	pnfs_unregister_layoutdriver(&blocklayout_type);
++	bl_pipe_exit();
++}
++
++module_init(nfs4blocklayout_init);
++module_exit(nfs4blocklayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
+@@ -0,0 +1,335 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/buffer_head.h> /* __bread */
++
++#include <linux/genhd.h>
++#include <linux/blkdev.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
++{
++	uint32_t *q = p + XDR_QUADLEN(nbytes);
++	if (unlikely(q > end || q < p))
++		return NULL;
++	return p;
++}
++EXPORT_SYMBOL(blk_overflow);
++
++/* Open a block_device by device number. */
++struct block_device *nfs4_blkdev_get(dev_t dev)
++{
++	struct block_device *bd;
++
++	dprintk("%s enter\n", __func__);
++	bd = open_by_devnum(dev, FMODE_READ);
++	if (IS_ERR(bd))
++		goto fail;
++	return bd;
++fail:
++	dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++	return NULL;
++}
++
++/*
++ * Release the block device
++ */
++int nfs4_blkdev_put(struct block_device *bdev)
++{
++	dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
++			MINOR(bdev->bd_dev));
++	bd_release(bdev);
++	return blkdev_put(bdev, FMODE_READ);
++}
++
++/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
++ * in dev->dev_addr_buf.
++ */
++struct pnfs_block_dev *
++nfs4_blk_decode_device(struct nfs_server *server,
++		       struct pnfs_device *dev,
++		       struct list_head *sdlist)
++{
++	struct pnfs_block_dev *rv = NULL;
++	struct block_device *bd = NULL;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint32_t major, minor;
++
++	dprintk("%s enter\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return NULL;
++	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
++	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
++		dev->mincount);
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
++				    dev->mincount);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out_err;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out_err;
++	}
++	if (reply->status != BL_DEVICE_REQUEST_PROC) {
++		dprintk("%s failed to open device: %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++	memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
++	memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
++		sizeof(uint32_t));
++	bd = nfs4_blkdev_get(MKDEV(major, minor));
++	if (IS_ERR(bd)) {
++		dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++
++	rv = kzalloc(sizeof(*rv), GFP_KERNEL);
++	if (!rv)
++		goto out_err;
++
++	rv->bm_mdev = bd;
++	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
++	dprintk("%s Created device %s with bd_block_size %u\n",
++		__func__,
++		bd->bd_disk->disk_name,
++		bd->bd_block_size);
++	kfree(reply);
++	kfree(msg);
++	return rv;
++
++out_err:
++	kfree(rv);
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return NULL;
++}
++
++/* Map deviceid returned by the server to constructed block_device */
++static struct block_device *translate_devid(struct pnfs_layout_type *lo,
++					    struct pnfs_deviceid *id)
++{
++	struct block_device *rv = NULL;
++	struct block_mount_id *mid;
++	struct pnfs_block_dev *dev;
++
++	dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
++	mid = BLK_ID(lo);
++	spin_lock(&mid->bm_lock);
++	list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
++		if (memcmp(id->data, dev->bm_mdevid.data,
++			   NFS4_PNFS_DEVICEID4_SIZE) == 0) {
++			rv = dev->bm_mdev;
++			goto out;
++		}
++	}
++ out:
++	spin_unlock(&mid->bm_lock);
++	dprintk("%s returning %p\n", __func__, rv);
++	return rv;
++}
++
++/* Tracks info needed to ensure extents in layout obey constraints of spec */
++struct layout_verification {
++	u32 mode;	/* R or RW */
++	u64 start;	/* Expected start of next non-COW extent */
++	u64 inval;	/* Start of INVAL coverage */
++	u64 cowread;	/* End of COW read coverage */
++};
++
++/* Verify the extent meets the layout requirements of the pnfs-block draft,
++ * section 2.3.1.
++ */
++static int verify_extent(struct pnfs_block_extent *be,
++			 struct layout_verification *lv)
++{
++	if (lv->mode == IOMODE_READ) {
++		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
++		    be->be_state == PNFS_BLOCK_INVALID_DATA)
++			return -EIO;
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	}
++	/* lv->mode == IOMODE_RW */
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		if (lv->cowread > lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		lv->inval = lv->start;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
++		if (be->be_f_offset > lv->start)
++			return -EIO;
++		if (be->be_f_offset < lv->inval)
++			return -EIO;
++		if (be->be_f_offset < lv->cowread)
++			return -EIO;
++		/* It looks like you might want to min this with lv->start,
++		 * but you really don't.
++		 */
++		lv->inval = lv->inval + be->be_length;
++		lv->cowread = be->be_f_offset + be->be_length;
++		return 0;
++	} else
++		return -EIO;
++}
++
++/* XDR decode pnfs_block_layout4 structure */
++int
++nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
++			   struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t *end = (uint32_t *)((char *)lgr->layout.buf + lgr->layout.len);
++	int i, status = -EIO;
++	uint32_t count;
++	struct pnfs_block_extent *be = NULL, *save;
++	uint64_t tmp; /* Used by READSECTOR */
++	struct layout_verification lv = {
++		.mode = lgr->lseg.iomode,
++		.start = lgr->lseg.offset >> 9,
++		.inval = lgr->lseg.offset >> 9,
++		.cowread = lgr->lseg.offset >> 9,
++	};
++
++	LIST_HEAD(extents);
++
++	BLK_READBUF(p, end, 4);
++	READ32(count);
++
++	dprintk("%s enter, number of extents %i\n", __func__, count);
++	BLK_READBUF(p, end, (28 + NFS4_PNFS_DEVICEID4_SIZE) * count);
++
++	/* Decode individual extents, putting them in temporary
++	 * staging area until whole layout is decoded to make error
++	 * recovery easier.
++	 */
++	for (i = 0; i < count; i++) {
++		be = alloc_extent();
++		if (!be) {
++			status = -ENOMEM;
++			goto out_err;
++		}
++		READ_DEVID(&be->be_devid);
++		be->be_mdev = translate_devid(lo, &be->be_devid);
++		if (!be->be_mdev)
++			goto out_err;
++		/* The next three values are read in as bytes,
++		 * but stored as 512-byte sector lengths
++		 */
++		READ_SECTOR(be->be_f_offset);
++		READ_SECTOR(be->be_length);
++		READ_SECTOR(be->be_v_offset);
++		READ32(be->be_state);
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			be->be_inval = &bl->bl_inval;
++		if (verify_extent(be, &lv)) {
++			dprintk("%s verify failed\n", __func__);
++			goto out_err;
++		}
++		list_add_tail(&be->be_node, &extents);
++	}
++	if (p != end) {
++		dprintk("%s Undecoded cruft at end of opaque\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lgr->lseg.offset + lgr->lseg.length != lv.start << 9) {
++		dprintk("%s Final length mismatch\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lv.start < lv.cowread) {
++		dprintk("%s Final uncovered COW extent\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	/* Extents decoded properly, now try to merge them in to
++	 * existing layout extents.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	list_for_each_entry_safe(be, save, &extents, be_node) {
++		list_del(&be->be_node);
++		status = add_and_merge_extent(bl, be);
++		if (status) {
++			spin_unlock(&bl->bl_ext_lock);
++			/* This is a fairly catastrophic error, as the
++			 * entire layout extent lists are now corrupted.
++			 * We should have some way to distinguish this.
++			 */
++			be = NULL;
++			goto out_err;
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	status = 0;
++ out:
++	dprintk("%s returns %i\n", __func__, status);
++	return status;
++
++ out_err:
++	put_extent(be);
++	while (!list_empty(&extents)) {
++		be = list_first_entry(&extents, struct pnfs_block_extent,
++				      be_node);
++		list_del(&be->be_node);
++		put_extent(be);
++	}
++	goto out;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
+@@ -0,0 +1,120 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdm.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2007 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Fred Isaman <iisaman@umich.edu>
++ *  Andy Adamson <andros@citi.umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include <linux/genhd.h> /* gendisk - used in a dprintk*/
++#include <linux/sched.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Defines used for calculating memory usage in nfs4_blk_flatten() */
++#define ARGSIZE   24    /* Max bytes needed for linear target arg string */
++#define SPECSIZE (sizeof8(struct dm_target_spec) + ARGSIZE)
++#define SPECS_PER_PAGE (PAGE_SIZE / SPECSIZE)
++#define SPEC_HEADER_ADJUST (SPECS_PER_PAGE - \
++			    (PAGE_SIZE - sizeof8(struct dm_ioctl)) / SPECSIZE)
++#define roundup8(x) (((x)+7) & ~7)
++#define sizeof8(x) roundup8(sizeof(x))
++
++static int dev_remove(dev_t dev)
++{
++	int ret = 1;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint64_t bl_dev;
++	uint32_t major = MAJOR(dev), minor = MINOR(dev);
++
++	dprintk("Entering %s\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return ret;
++
++	memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
++	memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
++				    sizeof(uint64_t));
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out;
++	}
++
++	if (reply->status == BL_DEVICE_REQUEST_PROC)
++		ret = 0; /*TODO: what to return*/
++out:
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return ret;
++}
++
++/*
++ * Release meta device
++ */
++static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
++{
++	int rv;
++
++	dprintk("%s Releasing\n", __func__);
++	/* XXX Check return? */
++	rv = nfs4_blkdev_put(bdev->bm_mdev);
++	dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);
++
++	rv = dev_remove(bdev->bm_mdev->bd_dev);
++	dprintk("%s Returns %d\n", __func__, rv);
++	return rv;
++}
++
++void free_block_dev(struct pnfs_block_dev *bdev)
++{
++	if (bdev) {
++		if (bdev->bm_mdev) {
++			dprintk("%s Removing DM device: %d:%d\n",
++				__func__,
++				MAJOR(bdev->bm_mdev->bd_dev),
++				MINOR(bdev->bm_mdev->bd_dev));
++			/* XXX Check status ?? */
++			nfs4_blk_metadev_release(bdev);
++		}
++		kfree(bdev);
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
+@@ -0,0 +1,303 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#ifndef FS_NFS_NFS4BLOCKLAYOUT_H
++#define FS_NFS_NFS4BLOCKLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/pnfs_xdr.h> /* Needed by nfs4_pnfs.h */
++#include <linux/nfs4_pnfs.h>
++#include <linux/dm-ioctl.h> /* Needed for struct dm_ioctl*/
++
++#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
++
++#define PG_pnfserr PG_owner_priv_1
++#define PagePnfsErr(page)	test_bit(PG_pnfserr, &(page)->flags)
++#define SetPagePnfsErr(page)	set_bit(PG_pnfserr, &(page)->flags)
++#define ClearPagePnfsErr(page)	clear_bit(PG_pnfserr, &(page)->flags)
++
++extern int dm_dev_create(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_dev_remove(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_do_resume(struct dm_ioctl *param);
++extern int dm_table_load(struct dm_ioctl *param, size_t param_size);
++
++struct block_mount_id {
++	spinlock_t			bm_lock;    /* protects list */
++	struct list_head		bm_devlist; /* holds pnfs_block_dev */
++};
++
++struct pnfs_block_dev {
++	struct list_head		bm_node;
++	struct pnfs_deviceid		bm_mdevid;    /* associated devid */
++	struct block_device		*bm_mdev;     /* meta device itself */
++};
++
++/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
++struct visible_block_device {
++	struct list_head	vi_node;
++	struct block_device	*vi_bdev;
++	int			vi_mapped;
++	int			vi_put_done;
++};
++
++enum blk_vol_type {
++	PNFS_BLOCK_VOLUME_SIMPLE   = 0,	/* maps to a single LU */
++	PNFS_BLOCK_VOLUME_SLICE    = 1,	/* slice of another volume */
++	PNFS_BLOCK_VOLUME_CONCAT   = 2,	/* concatenation of multiple volumes */
++	PNFS_BLOCK_VOLUME_STRIPE   = 3	/* striped across multiple volumes */
++};
++
++/* All disk offset/lengths are stored in 512-byte sectors */
++struct pnfs_blk_volume {
++	uint32_t		bv_type;
++	sector_t 		bv_size;
++	struct pnfs_blk_volume 	**bv_vols;
++	int 			bv_vol_n;
++	union {
++		dev_t			bv_dev;
++		sector_t		bv_stripe_unit;
++		sector_t 		bv_offset;
++	};
++};
++
++/* Since components need not be aligned, cannot use sector_t */
++struct pnfs_blk_sig_comp {
++	int64_t 	bs_offset;  /* In bytes */
++	uint32_t   	bs_length;  /* In bytes */
++	char 		*bs_string;
++};
++
++/* Maximum number of signatures components in a simple volume */
++# define PNFS_BLOCK_MAX_SIG_COMP 16
++
++struct pnfs_blk_sig {
++	int 				si_num_comps;
++	struct pnfs_blk_sig_comp	si_comps[PNFS_BLOCK_MAX_SIG_COMP];
++};
++
++enum exstate4 {
++	PNFS_BLOCK_READWRITE_DATA	= 0,
++	PNFS_BLOCK_READ_DATA		= 1,
++	PNFS_BLOCK_INVALID_DATA		= 2, /* mapped, but data is invalid */
++	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
++};
++
++#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
++
++struct my_tree_t {
++	sector_t		mtt_step_size;	/* Internal sector alignment */
++	struct list_head	mtt_stub; /* Should be a radix tree */
++};
++
++struct pnfs_inval_markings {
++	spinlock_t	im_lock;
++	struct my_tree_t im_tree;	/* Sectors that need LAYOUTCOMMIT */
++	sector_t	im_block_size;	/* Server blocksize in sectors */
++};
++
++struct pnfs_inval_tracking {
++	struct list_head it_link;
++	int		 it_sector;
++	int		 it_tags;
++};
++
++/* sector_t fields are all in 512-byte sectors */
++struct pnfs_block_extent {
++	struct kref	be_refcnt;
++	struct list_head be_node;	/* link into lseg list */
++	struct pnfs_deviceid be_devid;  /* STUB - remevable??? */
++	struct block_device *be_mdev;
++	sector_t	be_f_offset;	/* the starting offset in the file */
++	sector_t	be_length;	/* the size of the extent */
++	sector_t	be_v_offset;	/* the starting offset in the volume */
++	enum exstate4	be_state;	/* the state of this extent */
++	struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */
++};
++
++/* Shortened extent used by LAYOUTCOMMIT */
++struct pnfs_block_short_extent {
++	struct list_head bse_node;
++	struct pnfs_deviceid bse_devid;	/* STUB - removable??? */
++	struct block_device *bse_mdev;
++	sector_t	bse_f_offset;	/* the starting offset in the file */
++	sector_t	bse_length;	/* the size of the extent */
++};
++
++static inline void
++INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
++{
++	spin_lock_init(&marks->im_lock);
++	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
++	marks->im_block_size = blocksize;
++	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
++					   blocksize);
++}
++
++enum extentclass4 {
++	RW_EXTENT	= 0, /* READWRTE and INVAL */
++	RO_EXTENT	= 1, /* READ and NONE */
++	EXTENT_LISTS	= 2,
++};
++
++static inline int choose_list(enum exstate4 state)
++{
++	if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA)
++		return RO_EXTENT;
++	else
++		return RW_EXTENT;
++}
++
++struct pnfs_block_layout {
++	struct pnfs_layout_type bl_layout;
++	struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */
++	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
++	struct list_head	bl_extents[EXTENT_LISTS]; /* R and RW extents */
++	struct list_head	bl_commit;	/* Needs layout commit */
++	unsigned int		bl_count;	/* entries in bl_commit */
++	sector_t		bl_blocksize;  /* Server blocksize in sectors */
++};
++
++/* this struct is comunicated between:
++ * bl_setup_layoutcommit && bl_encode_layoutcommit && bl_cleanup_layoutcommit
++ */
++struct bl_layoutupdate_data {
++	struct list_head ranges;
++};
++
++#define BLK_ID(lo) ((struct block_mount_id *)(PNFS_NFS_SERVER(lo)->pnfs_ld_data))
++
++static inline struct pnfs_block_layout *
++BLK_LO2EXT(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct pnfs_block_layout, bl_layout);
++}
++
++static inline struct pnfs_block_layout *
++BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
++{
++	return BLK_LO2EXT(lseg->layout);
++}
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
++
++#define BLK_READBUF(p, e, nbytes)  do { \
++	p = blk_overflow(p, e, nbytes); \
++	if (!p) { \
++		printk(KERN_WARNING \
++			"%s: reply buffer overflowed in line %d.\n", \
++			__func__, __LINE__); \
++		goto out_err; \
++	} \
++} while (0)
++
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {                  \
++	(x) = (uint64_t)ntohl(*p++) << 32;           \
++	(x) |= ntohl(*p++);                     \
++} while (0)
++#define COPYMEM(x, nbytes) do {                 \
++	memcpy((x), p, nbytes);                 \
++	p += XDR_QUADLEN(nbytes);               \
++} while (0)
++#define READ_DEVID(x)	COPYMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++#define READ_SECTOR(x)     do { \
++	READ64(tmp); \
++	if (tmp & 0x1ff) { \
++		printk(KERN_WARNING \
++		       "%s Value not 512-byte aligned at line %d\n", \
++		       __func__, __LINE__);			     \
++		goto out_err; \
++	} \
++	(x) = tmp >> 9; \
++} while (0)
++
++#define WRITE32(n)               do { \
++	*p++ = htonl(n); \
++	} while (0)
++#define WRITE64(n)               do {                           \
++	*p++ = htonl((uint32_t)((n) >> 32));			\
++	*p++ = htonl((uint32_t)(n));				\
++} while (0)
++#define WRITEMEM(ptr, nbytes)     do {                          \
++	p = xdr_encode_opaque_fixed(p, ptr, nbytes);	\
++} while (0)
++#define WRITE_DEVID(x)  WRITEMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++
++/* blocklayoutdev.c */
++struct block_device *nfs4_blkdev_get(dev_t dev);
++int nfs4_blkdev_put(struct block_device *bdev);
++struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
++					      struct pnfs_device *dev,
++					      struct list_head *sdlist);
++int nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
++			       struct nfs4_pnfs_layoutget_res *lgr);
++int nfs4_blk_create_block_disk_list(struct list_head *);
++void nfs4_blk_destroy_disk_list(struct list_head *);
++/* blocklayoutdm.c */
++int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
++void free_block_dev(struct pnfs_block_dev *bdev);
++/* extents.c */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++		struct pnfs_block_extent **cow_read);
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages);
++void put_extent(struct pnfs_block_extent *be);
++struct pnfs_block_extent *alloc_extent(void);
++struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
++int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   struct xdr_stream *xdr,
++				   const struct pnfs_layoutcommit_arg *arg);
++void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   const struct pnfs_layoutcommit_arg *arg,
++				   int status);
++int add_and_merge_extent(struct pnfs_block_layout *bl,
++			 struct pnfs_block_extent *new);
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length);
++
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++extern struct pipefs_list bl_device_list;
++extern struct dentry *bl_device_pipe;
++
++int bl_pipe_init(void);
++void bl_pipe_exit(void);
++
++#define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
++#define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
++#define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
++#define BL_DEVICE_REQUEST_PROC         0x1 /* User level process succeeds */
++#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
++
++#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
+@@ -0,0 +1,948 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include "blocklayout.h"
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Bit numbers */
++#define EXTENT_INITIALIZED 0
++#define EXTENT_WRITTEN     1
++#define EXTENT_IN_COMMIT   2
++#define INTERNAL_EXISTS    MY_MAX_TAGS
++#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
++
++/* Returns largest t<=s s.t. t%base==0 */
++static inline sector_t normalize(sector_t s, int base)
++{
++	sector_t tmp = s; /* Since do_div modifies its argument */
++	return s - do_div(tmp, base);
++}
++
++static inline sector_t normalize_up(sector_t s, int base)
++{
++	return normalize(s + base - 1, base);
++}
++
++/* Complete stub using list while determine API wanted */
++
++/* Returns tags, or negative */
++static int32_t _find_entry(struct my_tree_t *tree, u64 s)
++{
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu) enter\n", __func__, s);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s)
++			return pos->it_tags & INTERNAL_MASK;
++		else
++			break;
++	}
++	return -ENOENT;
++}
++
++static inline
++int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
++{
++	int32_t tags;
++
++	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
++	s = normalize(s, tree->mtt_step_size);
++	tags = _find_entry(tree, s);
++	if ((tags < 0) || !(tags & (1 << tag)))
++		return 0;
++	else
++		return 1;
++}
++
++/* Creates entry with tag, or if entry already exists, unions tag to it.
++ * If storage is not NULL, newly created entry will use it.
++ * Returns number of entries added, or negative on error.
++ */
++static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
++		      struct pnfs_inval_tracking *storage)
++{
++	int found = 0;
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s) {
++			found = 1;
++			break;
++		} else
++			break;
++	}
++	if (found) {
++		pos->it_tags |= (1 << tag);
++		return 0;
++	} else {
++		struct pnfs_inval_tracking *new;
++		if (storage)
++			new = storage;
++		else {
++			new = kmalloc(sizeof(*new), GFP_KERNEL);
++			if (!new)
++				return -ENOMEM;
++		}
++		new->it_sector = s;
++		new->it_tags = (1 << tag);
++		list_add(&new->it_link, &pos->it_link);
++		return 1;
++	}
++}
++
++/* XXXX Really want option to not create */
++/* Over range, unions tag with existing entries, else creates entry with tag */
++static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
++{
++	u64 i;
++
++	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
++	for (i = normalize(s, tree->mtt_step_size); i < s + length;
++	     i += tree->mtt_step_size)
++		if (_add_entry(tree, i, tag, NULL))
++			return -ENOMEM;
++	return 0;
++}
++
++/* Ensure that future operations on given range of tree will not malloc */
++static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
++{
++	u64 start, end, s;
++	int count, i, used = 0, status = -ENOMEM;
++	struct pnfs_inval_tracking **storage;
++
++	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
++	start = normalize(offset, tree->mtt_step_size);
++	end = normalize_up(offset + length, tree->mtt_step_size);
++	count = (int)(end - start) / (int)tree->mtt_step_size;
++
++	/* Pre-malloc what memory we might need */
++	storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
++	if (!storage)
++		return -ENOMEM;
++	for (i = 0; i < count; i++) {
++		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
++				     GFP_KERNEL);
++		if (!storage[i])
++			goto out_cleanup;
++	}
++
++	/* Now need lock - HOW??? */
++
++	for (s = start; s < end; s += tree->mtt_step_size)
++		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
++
++	/* Unlock - HOW??? */
++	status = 0;
++
++ out_cleanup:
++	for (i = used; i < count; i++) {
++		if (!storage[i])
++			break;
++		kfree(storage[i]);
++	}
++	kfree(storage);
++	return status;
++}
++
++static void set_needs_init(sector_t *array, sector_t offset)
++{
++	sector_t *p = array;
++
++	dprintk("%s enter\n", __func__);
++	if (!p)
++		return;
++	while (*p < offset)
++		p++;
++	if (*p == offset)
++		return;
++	else if (*p == ~0) {
++		*p++ = offset;
++		*p = ~0;
++		return;
++	} else {
++		sector_t *save = p;
++		dprintk("%s Adding %llu\n", __func__, (u64)offset);
++		while (*p != ~0)
++			p++;
++		p++;
++		memmove(save + 1, save, (char *)p - (char *)save);
++		*save = offset;
++		return;
++	}
++}
++
++/* We are relying on page lock to serialize this */
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Assume start, end already sector aligned */
++static int
++_range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
++{
++	struct pnfs_inval_tracking *pos;
++	u64 expect = 0;
++
++	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector >= end)
++			continue;
++		if (!expect) {
++			if ((pos->it_sector == end - tree->mtt_step_size) &&
++			    (pos->it_tags & (1 << tag))) {
++				expect = pos->it_sector - tree->mtt_step_size;
++				if (expect < start)
++					return 1;
++				continue;
++			} else {
++				return 0;
++			}
++		}
++		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
++			return 0;
++		expect -= tree->mtt_step_size;
++		if (expect < start)
++			return 1;
++	}
++	return 0;
++}
++
++static int is_range_written(struct pnfs_inval_markings *marks,
++			    sector_t start, sector_t end)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Marks sectors in [offest, offset_length) as having been initialized.
++ * All lengths are step-aligned, where step is min(pagesize, blocksize).
++ * Notes where partial block is initialized, and helps prepare it for
++ * complete initialization later.
++ */
++/* Currently assumes offset is page-aligned */
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages)
++{
++	sector_t s, start, end;
++	sector_t *array = NULL; /* Pages to mark */
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n",
++		__func__, (u64)offset, (u64)length);
++	s = max((sector_t) 3,
++		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
++	dprintk("%s set max=%llu\n", __func__, (u64)s);
++	if (pages) {
++		array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
++		if (!array)
++			goto outerr;
++		array[0] = ~0;
++	}
++
++	start = normalize(offset, marks->im_block_size);
++	end = normalize_up(offset + length, marks->im_block_size);
++	if (_preload_range(&marks->im_tree, start, end - start))
++		goto outerr;
++
++	spin_lock(&marks->im_lock);
++
++	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
++	     s < offset; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s pre-area pages\n", __func__);
++		/* Portion of used block is not initialized */
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
++		goto out_unlock;
++	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
++	     s < end; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s post-area pages\n", __func__);
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++
++	spin_unlock(&marks->im_lock);
++
++	if (pages) {
++		if (array[0] == ~0) {
++			kfree(array);
++			*pages = NULL;
++		} else
++			*pages = array;
++	}
++	return 0;
++
++ out_unlock:
++	spin_unlock(&marks->im_lock);
++ outerr:
++	if (pages) {
++		kfree(array);
++		*pages = NULL;
++	}
++	return -ENOMEM;
++}
++
++/* Marks sectors in [offest, offset+length) as having been written to disk.
++ * All lengths should be block aligned.
++ */
++int mark_written_sectors(struct pnfs_inval_markings *marks,
++			 sector_t offset, sector_t length)
++{
++	int status;
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
++		(u64)offset, (u64)length);
++	spin_lock(&marks->im_lock);
++	status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
++	spin_unlock(&marks->im_lock);
++	return status;
++}
++
++static void print_short_extent(struct pnfs_block_short_extent *be)
++{
++	dprintk("PRINT SHORT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->bse_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->bse_length);
++	}
++}
++
++void print_clist(struct list_head *list, unsigned int count)
++{
++	struct pnfs_block_short_extent *be;
++	unsigned int i = 0;
++
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, bse_node) {
++		i++;
++		print_short_extent(be);
++	}
++	if (i != count)
++		dprintk("\n\nExpected %u entries\n\n\n", count);
++	dprintk("****************\n");
++}
++
++/* Note: In theory, we should do more checking that devid's match between
++ * old and new, but if they don't, the lists are too corrupt to salvage anyway.
++ */
++/* Note this is very similar to add_and_merge_extent */
++static void add_to_commitlist(struct pnfs_block_layout *bl,
++			      struct pnfs_block_short_extent *new)
++{
++	struct list_head *clist = &bl->bl_commit;
++	struct pnfs_block_short_extent *old, *save;
++	sector_t end = new->bse_f_offset + new->bse_length;
++
++	dprintk("%s enter\n", __func__);
++	print_short_extent(new);
++	print_clist(clist, bl->bl_count);
++	bl->bl_count++;
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe(old, save, clist, bse_node) {
++		if (new->bse_f_offset < old->bse_f_offset)
++			break;
++		if (end <= old->bse_f_offset + old->bse_length) {
++			/* Range is already in list */
++			bl->bl_count--;
++			kfree(new);
++			return;
++		} else if (new->bse_f_offset <=
++				old->bse_f_offset + old->bse_length) {
++			/* new overlaps or abuts existing be */
++			if (new->bse_mdev == old->bse_mdev) {
++				/* extend new to fully replace old */
++				new->bse_length += new->bse_f_offset -
++						old->bse_f_offset;
++				new->bse_f_offset = old->bse_f_offset;
++				list_del(&old->bse_node);
++				bl->bl_count--;
++				kfree(old);
++			}
++		}
++	}
++	/* Note that if we never hit the above break, old will not point to a
++	 * valid extent.  However, in that case &old->bse_node==list.
++	 */
++	list_add_tail(&new->bse_node, &old->bse_node);
++	/* Scan forward for overlaps.  If we find any, extend new and
++	 * remove the overlapped extent.
++	 */
++	old = list_prepare_entry(new, clist, bse_node);
++	list_for_each_entry_safe_continue(old, save, clist, bse_node) {
++		if (end < old->bse_f_offset)
++			break;
++		/* new overlaps or abuts old */
++		if (new->bse_mdev == old->bse_mdev) {
++			if (end < old->bse_f_offset + old->bse_length) {
++				/* extend new to fully cover old */
++				end = old->bse_f_offset + old->bse_length;
++				new->bse_length = end - new->bse_f_offset;
++			}
++			list_del(&old->bse_node);
++			bl->bl_count--;
++			kfree(old);
++		}
++	}
++	dprintk("%s: after merging\n", __func__);
++	print_clist(clist, bl->bl_count);
++}
++
++/* Note the range described by offset, length is guaranteed to be contained
++ * within be.
++ */
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length)
++{
++	sector_t new_end, end = offset + length;
++	struct pnfs_block_short_extent *new;
++	struct pnfs_block_layout *bl = container_of(be->be_inval,
++						    struct pnfs_block_layout,
++						    bl_inval);
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return -ENOMEM;
++
++	mark_written_sectors(be->be_inval, offset, length);
++	/* We want to add the range to commit list, but it must be
++	 * block-normalized, and verified that the normalized range has
++	 * been entirely written to disk.
++	 */
++	new->bse_f_offset = offset;
++	offset = normalize(offset, bl->bl_blocksize);
++	if (offset < new->bse_f_offset) {
++		if (is_range_written(be->be_inval, offset, new->bse_f_offset))
++			new->bse_f_offset = offset;
++		else
++			new->bse_f_offset = offset + bl->bl_blocksize;
++	}
++	new_end = normalize_up(end, bl->bl_blocksize);
++	if (end < new_end) {
++		if (is_range_written(be->be_inval, end, new_end))
++			end = new_end;
++		else
++			end = new_end - bl->bl_blocksize;
++	}
++	if (end <= new->bse_f_offset) {
++		kfree(new);
++		return 0;
++	}
++	new->bse_length = end - new->bse_f_offset;
++	new->bse_devid = be->be_devid;
++	new->bse_mdev = be->be_mdev;
++
++	spin_lock(&bl->bl_ext_lock);
++	/* new will be freed, either by add_to_commitlist if it decides not
++	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
++	 */
++	add_to_commitlist(bl, new);
++	spin_unlock(&bl->bl_ext_lock);
++	return 0;
++}
++
++static void print_bl_extent(struct pnfs_block_extent *be)
++{
++	dprintk("PRINT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->be_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->be_length);
++		dprintk("        be_v_offset %llu\n", (u64)be->be_v_offset);
++		dprintk("        be_state    %d\n", be->be_state);
++	}
++}
++
++static void
++destroy_extent(struct kref *kref)
++{
++	struct pnfs_block_extent *be;
++
++	be = container_of(kref, struct pnfs_block_extent, be_refcnt);
++	dprintk("%s be=%p\n", __func__, be);
++	kfree(be);
++}
++
++void
++put_extent(struct pnfs_block_extent *be)
++{
++	if (be) {
++		dprintk("%s enter %p (%i)\n", __func__, be,
++			atomic_read(&be->be_refcnt.refcount));
++		kref_put(&be->be_refcnt, destroy_extent);
++	}
++}
++
++struct pnfs_block_extent *alloc_extent(void)
++{
++	struct pnfs_block_extent *be;
++
++	be = kmalloc(sizeof(struct pnfs_block_extent), GFP_KERNEL);
++	if (!be)
++		return NULL;
++	INIT_LIST_HEAD(&be->be_node);
++	kref_init(&be->be_refcnt);
++	be->be_inval = NULL;
++	return be;
++}
++
++struct pnfs_block_extent *
++get_extent(struct pnfs_block_extent *be)
++{
++	if (be)
++		kref_get(&be->be_refcnt);
++	return be;
++}
++
++void print_elist(struct list_head *list)
++{
++	struct pnfs_block_extent *be;
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, be_node) {
++		print_bl_extent(be);
++	}
++	dprintk("****************\n");
++}
++
++static inline int
++extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new)
++{
++	/* Note this assumes new->be_f_offset >= old->be_f_offset */
++	return (new->be_state == old->be_state) &&
++		((new->be_state == PNFS_BLOCK_NONE_DATA) ||
++		 ((new->be_v_offset - old->be_v_offset ==
++		   new->be_f_offset - old->be_f_offset) &&
++		  new->be_mdev == old->be_mdev));
++}
++
++/* Adds new to appropriate list in bl, modifying new and removing existing
++ * extents as appropriate to deal with overlaps.
++ *
++ * See find_get_extent for list constraints.
++ *
++ * Refcount on new is already set.  If end up not using it, or error out,
++ * need to put the reference.
++ *
++ * Lock is held by caller.
++ */
++int
++add_and_merge_extent(struct pnfs_block_layout *bl,
++		     struct pnfs_block_extent *new)
++{
++	struct pnfs_block_extent *be, *tmp;
++	sector_t end = new->be_f_offset + new->be_length;
++	struct list_head *list;
++
++	dprintk("%s enter with be=%p\n", __func__, new);
++	print_bl_extent(new);
++	list = &bl->bl_extents[choose_list(new->be_state)];
++	print_elist(list);
++
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
++		if (new->be_f_offset >= be->be_f_offset + be->be_length)
++			break;
++		if (new->be_f_offset >= be->be_f_offset) {
++			if (end <= be->be_f_offset + be->be_length) {
++				/* new is a subset of existing be*/
++				if (extents_consistent(be, new)) {
++					dprintk("%s: new is subset, ignoring\n",
++						__func__);
++					put_extent(new);
++					return 0;
++				} else {
++					goto out_err;
++				}
++			} else {
++				/* |<--   be   -->|
++				 *          |<--   new   -->| */
++				if (extents_consistent(be, new)) {
++					/* extend new to fully replace be */
++					new->be_length += new->be_f_offset -
++						be->be_f_offset;
++					new->be_f_offset = be->be_f_offset;
++					new->be_v_offset = be->be_v_offset;
++					dprintk("%s: removing %p\n", __func__, be);
++					list_del(&be->be_node);
++					put_extent(be);
++				} else {
++					goto out_err;
++				}
++			}
++		} else if (end >= be->be_f_offset + be->be_length) {
++			/* new extent overlap existing be */
++			if (extents_consistent(be, new)) {
++				/* extend new to fully replace be */
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		} else if (end > be->be_f_offset) {
++			/*           |<--   be   -->|
++			 *|<--   new   -->| */
++			if (extents_consistent(new, be)) {
++				/* extend new to fully replace be */
++				new->be_length += be->be_f_offset + be->be_length -
++					new->be_f_offset - new->be_length;
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		}
++	}
++	/* Note that if we never hit the above break, be will not point to a
++	 * valid extent.  However, in that case &be->be_node==list.
++	 */
++	list_add(&new->be_node, &be->be_node);
++	dprintk("%s: inserting new\n", __func__);
++	print_elist(list);
++	/* STUB - The per-list consistency checks have all been done,
++	 * should now check cross-list consistency.
++	 */
++	return 0;
++
++ out_err:
++	put_extent(new);
++	return -EIO;
++}
++
++/* Returns extent, or NULL.  If a second READ extent exists, it is returned
++ * in cow_read, if given.
++ *
++ * The extents are kept in two seperate ordered lists, one for READ and NONE,
++ * one for READWRITE and INVALID.  Within each list, we assume:
++ * 1. Extents are ordered by file offset.
++ * 2. For any given isect, there is at most one extents that matches.
++ */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++	    struct pnfs_block_extent **cow_read)
++{
++	struct pnfs_block_extent *be, *cow, *ret;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	cow = ret = NULL;
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret &&
++		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				if (!ret)
++					ret = be;
++				else if (be->be_state != PNFS_BLOCK_READ_DATA)
++					put_extent(be);
++				else
++					cow = be;
++				break;
++			}
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	if (cow_read)
++		*cow_read = cow;
++	print_bl_extent(ret);
++	return ret;
++}
++
++/* Similar to find_get_extent, but called with lock held, and ignores cow */
++static struct pnfs_block_extent *
++find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
++{
++	struct pnfs_block_extent *be, *ret = NULL;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret)
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				ret = be;
++				break;
++			}
++		}
++	}
++	print_bl_extent(ret);
++	return ret;
++}
++
++int
++encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			       struct xdr_stream *xdr,
++			       const struct pnfs_layoutcommit_arg *arg)
++{
++	sector_t start, end;
++	struct pnfs_block_short_extent *lce, *save;
++	unsigned int count = 0;
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct list_head *ranges = &bld->ranges;
++	__be32 *p, *xdr_start;
++
++	dprintk("%s enter\n", __func__);
++	start = arg->lseg.offset >> 9;
++	end = start + (arg->lseg.length >> 9);
++	dprintk("%s set start=%llu, end=%llu\n",
++		__func__, (u64)start, (u64)end);
++
++	/* BUG - creation of bl_commit is buggy - need to wait for
++	 * entire block to be marked WRITTEN before it can be added.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	/* Want to adjust for possible truncate */
++	/* We now want to adjust argument range */
++
++	/* XDR encode the ranges found */
++	xdr_start = xdr_reserve_space(xdr, 8);
++	if (!xdr_start)
++		goto out;
++	list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) {
++		p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data));
++		if (!p)
++			break;
++		WRITE_DEVID(&lce->bse_devid);
++		WRITE64(lce->bse_f_offset << 9);
++		WRITE64(lce->bse_length << 9);
++		WRITE64(0LL);
++		WRITE32(PNFS_BLOCK_READWRITE_DATA);
++		list_del(&lce->bse_node);
++		list_add_tail(&lce->bse_node, ranges);
++		bl->bl_count--;
++		count++;
++	}
++	xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
++	xdr_start[1] = cpu_to_be32(count);
++out:
++	spin_unlock(&bl->bl_ext_lock);
++	dprintk("%s found %i ranges\n", __func__, count);
++	return 0;
++}
++
++/* Helper function to set_to_rw that initialize a new extent */
++static void
++_prep_new_extent(struct pnfs_block_extent *new,
++		 struct pnfs_block_extent *orig,
++		 sector_t offset, sector_t length, int state)
++{
++	kref_init(&new->be_refcnt);
++	/* don't need to INIT_LIST_HEAD(&new->be_node) */
++	memcpy(&new->be_devid, &orig->be_devid, sizeof(struct pnfs_deviceid));
++	new->be_mdev = orig->be_mdev;
++	new->be_f_offset = offset;
++	new->be_length = length;
++	new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset;
++	new->be_state = state;
++	new->be_inval = orig->be_inval;
++}
++
++/* Tries to merge be with extent in front of it in list.
++ * Frees storage if not used.
++ */
++static struct pnfs_block_extent *
++_front_merge(struct pnfs_block_extent *be, struct list_head *head,
++	     struct pnfs_block_extent *storage)
++{
++	struct pnfs_block_extent *prev;
++
++	if (!storage)
++		goto no_merge;
++	if (&be->be_node == head || be->be_node.prev == head)
++		goto no_merge;
++	prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node);
++	if ((prev->be_f_offset + prev->be_length != be->be_f_offset) ||
++	    !extents_consistent(prev, be))
++		goto no_merge;
++	_prep_new_extent(storage, prev, prev->be_f_offset,
++			 prev->be_length + be->be_length, prev->be_state);
++	list_replace(&prev->be_node, &storage->be_node);
++	put_extent(prev);
++	list_del(&be->be_node);
++	put_extent(be);
++	return storage;
++
++ no_merge:
++	kfree(storage);
++	return be;
++}
++
++static u64
++set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length)
++{
++	u64 rv = offset + length;
++	struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old;
++	struct pnfs_block_extent *children[3];
++	struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL;
++	int i = 0, j;
++
++	dprintk("%s(%llu, %llu)\n", __func__, offset, length);
++	/* Create storage for up to three new extents e1, e2, e3 */
++	e1 = kmalloc(sizeof(*e1), GFP_KERNEL);
++	e2 = kmalloc(sizeof(*e2), GFP_KERNEL);
++	e3 = kmalloc(sizeof(*e3), GFP_KERNEL);
++	/* BUG - we are ignoring any failure */
++	if (!e1 || !e2 || !e3)
++		goto out_nosplit;
++
++	spin_lock(&bl->bl_ext_lock);
++	be = find_get_extent_locked(bl, offset);
++	rv = be->be_f_offset + be->be_length;
++	if (be->be_state != PNFS_BLOCK_INVALID_DATA) {
++		spin_unlock(&bl->bl_ext_lock);
++		goto out_nosplit;
++	}
++	/* Add e* to children, bumping e*'s krefs */
++	if (be->be_f_offset != offset) {
++		_prep_new_extent(e1, be, be->be_f_offset,
++				 offset - be->be_f_offset,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e1;
++		print_bl_extent(e1);
++	} else
++		merge1 = e1;
++	_prep_new_extent(e2, be, offset,
++			 min(length, be->be_f_offset + be->be_length - offset),
++			 PNFS_BLOCK_READWRITE_DATA);
++	children[i++] = e2;
++	print_bl_extent(e2);
++	if (offset + length < be->be_f_offset + be->be_length) {
++		_prep_new_extent(e3, be, e2->be_f_offset + e2->be_length,
++				 be->be_f_offset + be->be_length -
++				 offset - length,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e3;
++		print_bl_extent(e3);
++	} else
++		merge2 = e3;
++
++	/* Remove be from list, and insert the e* */
++	/* We don't get refs on e*, since this list is the base reference
++	 * set when init'ed.
++	 */
++	if (i < 3)
++		children[i] = NULL;
++	new = children[0];
++	list_replace(&be->be_node, &new->be_node);
++	put_extent(be);
++	new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1);
++	for (j = 1; j < i; j++) {
++		old = new;
++		new = children[j];
++		list_add(&new->be_node, &old->be_node);
++	}
++	if (merge2) {
++		/* This is a HACK, should just create a _back_merge function */
++		new = list_entry(new->be_node.next,
++				 struct pnfs_block_extent, be_node);
++		new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2);
++	}
++	spin_unlock(&bl->bl_ext_lock);
++
++	/* Since we removed the base reference above, be is now scheduled for
++	 * destruction.
++	 */
++	put_extent(be);
++	dprintk("%s returns %llu after split\n", __func__, rv);
++	return rv;
++
++ out_nosplit:
++	kfree(e1);
++	kfree(e2);
++	kfree(e3);
++	dprintk("%s returns %llu without splitting\n", __func__, rv);
++	return rv;
++}
++
++void
++clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			      const struct pnfs_layoutcommit_arg *arg,
++			      int status)
++{
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct pnfs_block_short_extent *lce, *save;
++
++	dprintk("%s status %d\n", __func__, status);
++	list_for_each_entry_safe_reverse(lce, save, &bld->ranges, bse_node) {
++		if (likely(!status)) {
++			u64 offset = lce->bse_f_offset;
++			u64 end = offset + lce->bse_length;
++
++			do {
++				offset = set_to_rw(bl, offset, end - offset);
++			} while (offset < end);
++
++			kfree(lce);
++		} else {
++			spin_lock(&bl->bl_ext_lock);
++			add_to_commitlist(bl, lce);
++			spin_unlock(&bl->bl_ext_lock);
++		}
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
+@@ -0,0 +1,6 @@
++#
++# Makefile for the pNFS block layout driver kernel module
++#
++obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
++blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o \
++			extents.o block-device-discovery-pipe.o
+diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
+--- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
+@@ -8,6 +8,8 @@
+ #ifndef __LINUX_FS_NFS_CALLBACK_H
+ #define __LINUX_FS_NFS_CALLBACK_H
+ 
++#include <linux/pnfs_xdr.h>
++
+ #define NFS4_CALLBACK 0x40000000
+ #define NFS4_CALLBACK_XDRSIZE 2048
+ #define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
+@@ -72,6 +74,8 @@ struct cb_recallargs {
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++#include <linux/pnfs_xdr.h>
++
+ struct referring_call {
+ 	uint32_t			rc_sequenceid;
+ 	uint32_t			rc_slotid;
+@@ -111,6 +115,13 @@ extern int nfs41_validate_delegation_sta
+ 
+ #define RCA4_TYPE_MASK_RDATA_DLG	0
+ #define RCA4_TYPE_MASK_WDATA_DLG	1
++#define RCA4_TYPE_MASK_DIR_DLG         2
++#define RCA4_TYPE_MASK_FILE_LAYOUT     3
++#define RCA4_TYPE_MASK_BLK_LAYOUT      4
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN  8
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+ 
+ struct cb_recallanyargs {
+ 	struct sockaddr	*craa_addr;
+@@ -127,6 +138,37 @@ struct cb_recallslotargs {
+ extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ 					  void *dummy);
+ 
++struct cb_pnfs_layoutrecallargs {
++	struct sockaddr		*cbl_addr;
++	struct nfs_fh		cbl_fh;
++	struct nfs4_pnfs_layout_segment cbl_seg;
++	struct nfs_fsid		cbl_fsid;
++	uint32_t		cbl_recall_type;
++	uint32_t		cbl_layout_type;
++	uint32_t		cbl_layoutchanged;
++	nfs4_stateid		cbl_stateid;
++};
++
++extern unsigned pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
++				     void *dummy);
++
++struct cb_pnfs_devicenotifyitem {
++	uint32_t		cbd_notify_type;
++	uint32_t		cbd_layout_type;
++	struct pnfs_deviceid	cbd_dev_id;
++	uint32_t		cbd_immediate;
++};
++
++/* XXX: Should be dynamic up to max compound size */
++#define NFS4_DEV_NOTIFY_MAXENTRIES 10
++struct cb_pnfs_devicenotifyargs {
++	struct sockaddr			*addr;
++	int				 ndevs;
++	struct cb_pnfs_devicenotifyitem	 devs[NFS4_DEV_NOTIFY_MAXENTRIES];
++};
++
++extern unsigned pnfs_cb_devicenotify(struct cb_pnfs_devicenotifyargs *args,
++				     void *dummy);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
+--- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
+@@ -8,10 +8,15 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/slab.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/writeback.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #ifdef NFS_DEBUG
+ #define NFSDBG_FACILITY NFSDBG_CALLBACK
+@@ -62,16 +67,6 @@ out:
+ 	return res->status;
+ }
+ 
+-static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
+-{
+-#if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion > 0)
+-		return nfs41_validate_delegation_stateid;
+-#endif
+-	return nfs4_validate_delegation_stateid;
+-}
+-
+-
+ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+@@ -92,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_re
+ 		inode = nfs_delegation_find_inode(clp, &args->fh);
+ 		if (inode != NULL) {
+ 			/* Set up a helper thread to actually return the delegation */
+-			switch (nfs_async_inode_return_delegation(inode, &args->stateid,
+-								  nfs_validate_delegation_stateid(clp))) {
++			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ 				case 0:
+ 					res = 0;
+ 					break;
+@@ -116,24 +110,364 @@ out:
+ 
+ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+-	if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
+-					 sizeof(delegation->stateid.data)) != 0)
++	if (delegation == NULL || memcmp(delegation->stateid.u.data,
++					 stateid->u.data,
++					 sizeof(delegation->stateid.u.data)))
+ 		return 0;
+ 	return 1;
+ }
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static bool
++pnfs_is_next_layout_stateid(const struct pnfs_layout_type *lo,
++			    const nfs4_stateid stateid)
++{
++	int seqlock;
++	bool res;
++	u32 oldseqid, newseqid;
++
++	do {
++		seqlock = read_seqbegin(&lo->seqlock);
++		oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
++		newseqid = be32_to_cpu(stateid.u.stateid.seqid);
++		res = !memcmp(lo->stateid.u.stateid.other,
++			      stateid.u.stateid.other,
++			      NFS4_STATEID_OTHER_SIZE);
++		if (res) { /* comparing layout stateids */
++			if (oldseqid == ~0)
++				res = (newseqid == 1);
++			else
++				res = (newseqid == oldseqid + 1);
++		} else { /* open stateid */
++			res = !memcmp(lo->stateid.u.data,
++				      &zero_stateid,
++				      NFS4_STATEID_SIZE);
++			if (res)
++				res = (newseqid == 1);
++		}
++	} while (read_seqretry(&lo->seqlock, seqlock));
++
++	return res;
++}
++
++/*
++ * Retrieve an inode based on layout recall parameters
++ *
++ * Note: caller must iput(inode) to dereference the inode.
++ */
++static struct inode *
++nfs_layoutrecall_find_inode(struct nfs_client *clp,
++			    const struct cb_pnfs_layoutrecallargs *args)
++{
++	struct nfs_inode *nfsi;
++	struct pnfs_layout_type *layout;
++	struct nfs_server *server;
++	struct inode *ino = NULL;
++
++	dprintk("%s: Begin recall_type=%d clp %p\n",
++		__func__, args->cbl_recall_type, clp);
++
++	spin_lock(&clp->cl_lock);
++	list_for_each_entry(layout, &clp->cl_layouts, lo_layouts) {
++		nfsi = PNFS_NFS_INODE(layout);
++		if (!nfsi)
++			continue;
++
++		dprintk("%s: Searching inode=%lu\n",
++			__func__, nfsi->vfs_inode.i_ino);
++
++		if (args->cbl_recall_type == RETURN_FILE) {
++		    if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
++			continue;
++		} else if (args->cbl_recall_type == RETURN_FSID) {
++			server = NFS_SERVER(&nfsi->vfs_inode);
++			if (server->fsid.major != args->cbl_fsid.major ||
++			    server->fsid.minor != args->cbl_fsid.minor)
++				continue;
++		}
++
++		/* Make sure client didn't clean up layout without
++		 * telling the server */
++		if (!has_layout(nfsi))
++			continue;
++
++		ino = igrab(&nfsi->vfs_inode);
++		dprintk("%s: Found inode=%p\n", __func__, ino);
++		break;
++	}
++	spin_unlock(&clp->cl_lock);
++	return ino;
++}
++
++struct recall_layout_threadargs {
++	struct inode *inode;
++	struct nfs_client *clp;
++	struct completion started;
++	struct cb_pnfs_layoutrecallargs *rl;
++	int result;
++};
++
++static int pnfs_recall_layout(void *data)
++{
++	struct inode *inode, *ino;
++	struct nfs_client *clp;
++	struct cb_pnfs_layoutrecallargs rl;
++	struct nfs4_pnfs_layoutreturn *lrp;
++	struct recall_layout_threadargs *args =
++		(struct recall_layout_threadargs *)data;
++	int status = 0;
++
++	daemonize("nfsv4-layoutreturn");
++
++	dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
++		__func__, args->rl->cbl_recall_type,
++		args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
++
++	clp = args->clp;
++	inode = args->inode;
++	rl = *args->rl;
++
++	/* support whole file layouts only */
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	if (rl.cbl_recall_type == RETURN_FILE) {
++		if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
++						rl.cbl_stateid))
++			status = pnfs_return_layout(inode, &rl.cbl_seg,
++						    &rl.cbl_stateid, RETURN_FILE,
++						    false);
++		else
++			status = cpu_to_be32(NFS4ERR_DELAY);
++		if (status)
++			dprintk("%s RETURN_FILE error: %d\n", __func__, status);
++		else
++			status =  cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++		args->result = status;
++		complete(&args->started);
++		goto out;
++	}
++
++	status = cpu_to_be32(NFS4_OK);
++	args->result = status;
++	complete(&args->started);
++	args = NULL;
++
++	/* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
++	while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
++		/* FIXME: need to check status on pnfs_return_layout */
++		pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
++		iput(ino);
++	}
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (!lrp) {
++		dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
++			__func__);
++		goto out;
++	}
++
++	/* send final layoutreturn */
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = rl.cbl_layout_type;
++	lrp->args.return_type = rl.cbl_recall_type;
++	lrp->args.lseg = rl.cbl_seg;
++	lrp->args.inode = inode;
++	pnfs4_proc_layoutreturn(lrp, true);
++
++out:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	nfs_put_client(clp);
++	module_put_and_exit(0);
++	dprintk("%s: exit status %d\n", __func__, 0);
++	return 0;
++}
++
++/*
++ * Asynchronous layout recall!
++ */
++static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
++				    struct cb_pnfs_layoutrecallargs *rl)
++{
++	struct recall_layout_threadargs data = {
++		.clp = clp,
++		.inode = inode,
++		.rl = rl,
++	};
++	struct task_struct *t;
++	int status = -EAGAIN;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* FIXME: do not allow two concurrent layout recalls */
++	if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
++		return status;
++
++	init_completion(&data.started);
++	__module_get(THIS_MODULE);
++	if (!atomic_inc_not_zero(&clp->cl_count))
++		goto out_put_no_client;
++
++	t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
++	if (IS_ERR(t)) {
++		printk(KERN_INFO "NFS: Layout recall callback thread failed "
++			"for client (clientid %08x/%08x)\n",
++			(unsigned)(clp->cl_clientid >> 32),
++			(unsigned)(clp->cl_clientid));
++		status = PTR_ERR(t);
++		goto out_module_put;
++	}
++	wait_for_completion(&data.started);
++	return data.result;
++out_module_put:
++	nfs_put_client(clp);
++out_put_no_client:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	module_put(THIS_MODULE);
++	return status;
++}
++
++static int pnfs_recall_all_layouts(struct nfs_client *clp)
++{
++	struct cb_pnfs_layoutrecallargs rl;
++	struct inode *inode;
++	int status = 0;
++
++	rl.cbl_recall_type = RETURN_ALL;
++	rl.cbl_seg.iomode = IOMODE_ANY;
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	/* we need the inode to get the nfs_server struct */
++	inode = nfs_layoutrecall_find_inode(clp, &rl);
++	if (!inode)
++		return status;
++	status = pnfs_async_return_layout(clp, inode, &rl);
++	iput(inode);
++
++	return status;
++}
++
++__be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
++			    void *dummy)
++{
++	struct nfs_client *clp;
++	struct inode *inode = NULL;
++	__be32 res;
++	int status;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
++	clp  = nfs_find_client(args->cbl_addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->cbl_addr));
++		goto out;
++	}
++
++	res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		/* the callback must come from the MDS personality */
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
++			goto loop;
++		if (args->cbl_recall_type == RETURN_FILE) {
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (inode != NULL) {
++				status = pnfs_async_return_layout(clp, inode,
++								  args);
++				if (status)
++					res = cpu_to_be32(NFS4ERR_DELAY);
++				iput(inode);
++			}
++		} else { /* _ALL or _FSID */
++			/* we need the inode to get the nfs_server struct */
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (!inode)
++				goto loop;
++			status = pnfs_async_return_layout(clp, inode, args);
++			if (status)
++				res = cpu_to_be32(NFS4ERR_DELAY);
++			iput(inode);
++		}
++loop:
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
++/* Remove the deviceid(s) from the nfs_client deviceid cache */
++static __be32 pnfs_devicenotify_client(struct nfs_client *clp,
++				       struct cb_pnfs_devicenotifyargs *args)
++{
++	uint32_t type;
++	int i;
++
++	dprintk("%s: --> clp %p\n", __func__, clp);
++
++	for (i = 0; i < args->ndevs; i++) {
++		struct cb_pnfs_devicenotifyitem *dev = &args->devs[i];
++		type = dev->cbd_notify_type;
++		if (type == NOTIFY_DEVICEID4_DELETE && clp->cl_devid_cache)
++			nfs4_delete_device(clp->cl_devid_cache,
++					   &dev->cbd_dev_id);
++		else if (type == NOTIFY_DEVICEID4_CHANGE)
++			printk(KERN_ERR "%s: NOTIFY_DEVICEID4_CHANGE "
++					"not supported\n", __func__);
++	}
++	return 0;
++}
++
++__be32 pnfs_cb_devicenotify(struct cb_pnfs_devicenotifyargs *args,
++			    void *dummy)
++{
++	struct nfs_client *clp;
++	__be32 res = 0;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = __constant_htonl(NFS4ERR_INVAL);
++	clp = nfs_find_client(args->addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->addr));
++		goto out;
++	}
++
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		res = pnfs_devicenotify_client(clp, args);
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
+ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+ 	if (delegation == NULL)
+ 		return 0;
+ 
+-	/* seqid is 4-bytes long */
+-	if (((u32 *) &stateid->data)[0] != 0)
++	if (stateid->u.stateid.seqid != 0)
+ 		return 0;
+-	if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
+-		   sizeof(stateid->data)-4))
++	if (memcmp(&delegation->stateid.u.stateid.other,
++		   &stateid->u.stateid.other,
++		   NFS4_STATEID_OTHER_SIZE))
+ 		return 0;
+ 
+ 	return 1;
+@@ -335,13 +669,37 @@ out:
+ 	return status;
+ }
+ 
++static inline bool
++validate_bitmap_values(const unsigned long *mask)
++{
++	int i;
++
++	if (*mask == 0)
++		return true;
++	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
++	    test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
++		return true;
++	for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	return false;
++}
++
+ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+ 	__be32 status;
+ 	fmode_t flags = 0;
+ 
+-	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
++	status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ 	clp = nfs_find_client(args->craa_addr, 4);
+ 	if (clp == NULL)
+ 		goto out;
+@@ -349,16 +707,25 @@ __be32 nfs4_callback_recallany(struct cb
+ 	dprintk("NFS: RECALL_ANY callback request from %s\n",
+ 		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ 
++	status = cpu_to_be32(NFS4ERR_INVAL);
++	if (!validate_bitmap_values((const unsigned long *)
++				    &args->craa_type_mask))
++		return status;
++
++	status = cpu_to_be32(NFS4_OK);
+ 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags = FMODE_READ;
+ 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags |= FMODE_WRITE;
++	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
++		     &args->craa_type_mask))
++		if (pnfs_recall_all_layouts(clp) == -EAGAIN)
++			status = cpu_to_be32(NFS4ERR_DELAY);
+ 
+ 	if (flags)
+ 		nfs_expire_all_delegation_types(clp, flags);
+-	status = htonl(NFS4_OK);
+ out:
+ 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ 	return status;
+diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
+--- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
+@@ -22,6 +22,8 @@
+ #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ 
+ #if defined(CONFIG_NFS_V4_1)
++#define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
++#define CB_OP_DEVICENOTIFY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+ 					4 + 1 + 3)
+ #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+@@ -136,7 +138,7 @@ static __be32 decode_stateid(struct xdr_
+ 	p = read_buf(xdr, 16);
+ 	if (unlikely(p == NULL))
+ 		return htonl(NFS4ERR_RESOURCE);
+-	memcpy(stateid->data, p, 16);
++	memcpy(stateid->u.data, p, 16);
+ 	return 0;
+ }
+ 
+@@ -220,6 +222,148 @@ out:
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static __be32 decode_pnfs_layoutrecall_args(struct svc_rqst *rqstp,
++					    struct xdr_stream *xdr,
++					    struct cb_pnfs_layoutrecallargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++
++	args->cbl_addr = svc_addr(rqstp);
++	p = read_buf(xdr, 4 * sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_BADXDR);
++		goto out;
++	}
++
++	args->cbl_layout_type = ntohl(*p++);
++	args->cbl_seg.iomode = ntohl(*p++);
++	args->cbl_layoutchanged = ntohl(*p++);
++	args->cbl_recall_type = ntohl(*p++);
++
++	if (likely(args->cbl_recall_type == RETURN_FILE)) {
++		status = decode_fh(xdr, &args->cbl_fh);
++		if (unlikely(status != 0))
++			goto out;
++
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_seg.offset);
++		p = xdr_decode_hyper(p, &args->cbl_seg.length);
++		status = decode_stateid(xdr, &args->cbl_stateid);
++		if (unlikely(status != 0))
++			goto out;
++	} else if (args->cbl_recall_type == RETURN_FSID) {
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
++		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
++	}
++	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d "
++		"fsid %llx-%llx fhsize %d\n", __func__,
++		args->cbl_layout_type, args->cbl_seg.iomode,
++		args->cbl_layoutchanged, args->cbl_recall_type,
++		args->cbl_fsid.major, args->cbl_fsid.minor,
++		args->cbl_fh.size);
++out:
++	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
++	return status;
++}
++
++static
++__be32 decode_pnfs_devicenotify_args(struct svc_rqst *rqstp,
++				     struct xdr_stream *xdr,
++				     struct cb_pnfs_devicenotifyargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++	u32 tmp;
++	int n, i;
++	args->ndevs = 0;
++
++	args->addr = svc_addr(rqstp);
++
++	/* Num of device notifications */
++	p = read_buf(xdr, sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_RESOURCE);
++		goto out;
++	}
++	n = ntohl(*p++);
++	if (n <= 0)
++		goto out;
++
++	/* XXX: need to possibly return error in this case */
++	if (n > NFS4_DEV_NOTIFY_MAXENTRIES) {
++		dprintk("%s: Processing (%d) notifications out of (%d)\n",
++			__func__, NFS4_DEV_NOTIFY_MAXENTRIES, n);
++		n = NFS4_DEV_NOTIFY_MAXENTRIES;
++	}
++
++	/* Decode each dev notification */
++	for (i = 0; i < n; i++) {
++		struct cb_pnfs_devicenotifyitem *dev = &args->devs[i];
++
++		p = read_buf(xdr, (4 * sizeof(uint32_t))
++			     + NFS4_PNFS_DEVICEID4_SIZE);
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_RESOURCE);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* bitmap size */
++		if (tmp != 1) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_notify_type = ntohl(*p++);
++		if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		    dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* opaque size */
++		if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 8)) ||
++		    ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 4))) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_layout_type = ntohl(*p++);
++		memcpy(dev->cbd_dev_id.data, p, NFS4_PNFS_DEVICEID4_SIZE);
++		p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++
++		if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
++			p = read_buf(xdr, sizeof(uint32_t));
++			if (unlikely(p == NULL)) {
++				status = htonl(NFS4ERR_DELAY);
++				goto out;
++			}
++			dev->cbd_immediate = ntohl(*p++);
++		} else {
++			dev->cbd_immediate = 0;
++		}
++
++		args->ndevs++;
++
++		dprintk("%s: type %d layout 0x%x immediate %d\n",
++			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
++			dev->cbd_immediate);
++	}
++out:
++	dprintk("%s: status %d ndevs %d\n",
++		__func__, ntohl(status), args->ndevs);
++	return status;
++}
++
+ static __be32 decode_sessionid(struct xdr_stream *xdr,
+ 				 struct nfs4_sessionid *sid)
+ {
+@@ -574,11 +718,11 @@ preprocess_nfs41_op(int nop, unsigned in
+ 	case OP_CB_SEQUENCE:
+ 	case OP_CB_RECALL_ANY:
+ 	case OP_CB_RECALL_SLOT:
++	case OP_CB_LAYOUTRECALL:
++	case OP_CB_NOTIFY_DEVICEID:
+ 		*op = &callback_ops[op_nr];
+ 		break;
+ 
+-	case OP_CB_LAYOUTRECALL:
+-	case OP_CB_NOTIFY_DEVICEID:
+ 	case OP_CB_NOTIFY:
+ 	case OP_CB_PUSH_DELEG:
+ 	case OP_CB_RECALLABLE_OBJ_AVAIL:
+@@ -739,6 +883,18 @@ static struct callback_op callback_ops[]
+ 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+ 	},
+ #if defined(CONFIG_NFS_V4_1)
++	[OP_CB_LAYOUTRECALL] = {
++		.process_op = (callback_process_op_t)pnfs_cb_layoutrecall,
++		.decode_args =
++			(callback_decode_arg_t)decode_pnfs_layoutrecall_args,
++		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
++	},
++	[OP_CB_NOTIFY_DEVICEID] = {
++		.process_op = (callback_process_op_t)pnfs_cb_devicenotify,
++		.decode_args =
++			(callback_decode_arg_t)decode_pnfs_devicenotify_args,
++		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
++	},
+ 	[OP_CB_SEQUENCE] = {
+ 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
+ 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
+@@ -39,6 +39,7 @@
+ #include <net/ipv6.h>
+ #include <linux/nfs_xdr.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -48,6 +49,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_CLIENT
+ 
+@@ -150,11 +152,14 @@ static struct nfs_client *nfs_alloc_clie
+ 	clp->cl_boot_time = CURRENT_TIME;
+ 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+ 	clp->cl_minorversion = cl_init->minorversion;
++	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ #endif
+ 	cred = rpc_lookup_machine_cred();
+ 	if (!IS_ERR(cred))
+ 		clp->cl_machine_cred = cred;
+-
++#if defined(CONFIG_NFS_V4_1)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++#endif
+ 	nfs_fscache_get_client_cookie(clp);
+ 
+ 	return clp;
+@@ -178,7 +183,7 @@ static void nfs4_clear_client_minor_vers
+ 		clp->cl_session = NULL;
+ 	}
+ 
+-	clp->cl_call_sync = _nfs4_call_sync;
++	clp->cl_mvops = nfs_v4_minor_ops[0];
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
+@@ -188,7 +193,7 @@ static void nfs4_clear_client_minor_vers
+ static void nfs4_destroy_callback(struct nfs_client *clp)
+ {
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+-		nfs_callback_down(clp->cl_minorversion);
++		nfs_callback_down(clp->cl_mvops->minor_version);
+ }
+ 
+ static void nfs4_shutdown_client(struct nfs_client *clp)
+@@ -251,6 +256,7 @@ void nfs_put_client(struct nfs_client *c
+ 		nfs_free_client(clp);
+ 	}
+ }
++EXPORT_SYMBOL(nfs_put_client);
+ 
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ /*
+@@ -343,7 +349,7 @@ static int nfs_sockaddr_match_ipaddr(con
+  * Test if two socket addresses represent the same actual socket,
+  * by comparing (only) relevant fields, including the port number.
+  */
+-static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ 			    const struct sockaddr *sa2)
+ {
+ 	if (sa1->sa_family != sa2->sa_family)
+@@ -357,6 +363,7 @@ static int nfs_sockaddr_cmp(const struct
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs_sockaddr_cmp);
+ 
+ /*
+  * Find a client by IP address and protocol version
+@@ -548,6 +555,7 @@ int nfs4_check_client_ready(struct nfs_c
+ 		return -EPROTONOSUPPORT;
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs4_check_client_ready);
+ 
+ /*
+  * Initialise the timeout values for a connection
+@@ -865,9 +873,34 @@ error:
+ }
+ 
+ /*
++ * Initialize the pNFS layout driver and setup pNFS related parameters
++ */
++static void nfs4_init_pnfs(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
++{
++#if defined(CONFIG_NFS_V4_1)
++	struct nfs_client *clp = server->nfs_client;
++
++	if (nfs4_has_session(clp) &&
++	    (clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
++		server->pnfs_blksize = fsinfo->blksize;
++		set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype);
++		pnfs_set_ds_iosize(server);
++	}
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++static void nfs4_uninit_pnfs(struct nfs_server *server)
++{
++#if defined(CONFIG_NFS_V4_1)
++	if (server->nfs_client && nfs4_has_session(server->nfs_client))
++		unmount_pnfs_layoutdriver(server);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++/*
+  * Load up the server record from information gained in an fsinfo record
+  */
+-static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
++static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
+ {
+ 	unsigned long max_rpc_payload;
+ 
+@@ -897,6 +930,8 @@ static void nfs_server_set_fsinfo(struct
+ 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ 		server->wsize = NFS_MAX_FILE_IO_SIZE;
+ 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++	nfs4_init_pnfs(server, mntfh, fsinfo);
++
+ 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+ 
+ 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+@@ -938,7 +973,7 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	if (error < 0)
+ 		goto out_error;
+ 
+-	nfs_server_set_fsinfo(server, &fsinfo);
++	nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ 
+ 	/* Get some general file system info */
+ 	if (server->namelen == 0) {
+@@ -1016,6 +1051,7 @@ void nfs_free_server(struct nfs_server *
+ {
+ 	dprintk("--> nfs_free_server()\n");
+ 
++	nfs4_uninit_pnfs(server);
+ 	spin_lock(&nfs_client_lock);
+ 	list_del(&server->client_link);
+ 	list_del(&server->master_link);
+@@ -1126,7 +1162,7 @@ static int nfs4_init_callback(struct nfs
+ 				return error;
+ 		}
+ 
+-		error = nfs_callback_up(clp->cl_minorversion,
++		error = nfs_callback_up(clp->cl_mvops->minor_version,
+ 					clp->cl_rpcclient->cl_xprt);
+ 		if (error < 0) {
+ 			dprintk("%s: failed to start callback. Error = %d\n",
+@@ -1143,10 +1179,8 @@ static int nfs4_init_callback(struct nfs
+  */
+ static int nfs4_init_client_minor_version(struct nfs_client *clp)
+ {
+-	clp->cl_call_sync = _nfs4_call_sync;
+-
+ #if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion) {
++	if (clp->cl_mvops->minor_version) {
+ 		struct nfs4_session *session = NULL;
+ 		/*
+ 		 * Create the session and mark it expired.
+@@ -1158,7 +1192,13 @@ static int nfs4_init_client_minor_versio
+ 			return -ENOMEM;
+ 
+ 		clp->cl_session = session;
+-		clp->cl_call_sync = _nfs4_call_sync_session;
++		/*
++		 * The create session reply races with the server back
++		 * channel probe. Mark the client NFS_CS_SESSION_INITING
++		 * so that the client back channel can find the
++		 * nfs_client struct
++		 */
++		clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+@@ -1216,7 +1256,7 @@ error:
+ /*
+  * Set up an NFS4 client
+  */
+-static int nfs4_set_client(struct nfs_server *server,
++int nfs4_set_client(struct nfs_server *server,
+ 		const char *hostname,
+ 		const struct sockaddr *addr,
+ 		const size_t addrlen,
+@@ -1259,6 +1299,7 @@ error:
+ 	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ 	return error;
+ }
++EXPORT_SYMBOL(nfs4_set_client);
+ 
+ 
+ /*
+@@ -1448,7 +1489,7 @@ struct nfs_server *nfs4_create_referral_
+ 				data->authflavor,
+ 				parent_server->client->cl_xprt->prot,
+ 				parent_server->client->cl_timeout,
+-				parent_client->cl_minorversion);
++				parent_client->cl_mvops->minor_version);
+ 	if (error < 0)
+ 		goto error;
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
+@@ -0,0 +1,292 @@
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/sched.h>
++#include <linux/exportfs.h>
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/workqueue.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/nfs_fs.h>
++
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd4_block.h>
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++static ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++    char __user *, size_t);
++static ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
++static void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops bl_upcall_ops = {
++	.upcall		= bl_pipe_upcall,
++	.downcall	= bl_pipe_downcall,
++	.destroy_msg	= bl_pipe_destroy_msg,
++};
++
++bl_comm_t	*bl_comm_global;
++
++int
++nfsd_bl_start(void)
++{
++	bl_comm_t	*bl_comm = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	dprintk("%s: starting pipe\n", __func__);
++	if (bl_comm_global)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	bl_comm = kzalloc(sizeof (*bl_comm), GFP_KERNEL);
++	if (!bl_comm) {
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	/* FIXME: rename to "spnfs_block" */
++	bl_comm->pipe_dentry = rpc_mkpipe(nd.path.dentry, "pnfs_block", bl_comm,
++					 &bl_upcall_ops, 0);
++	if (IS_ERR(bl_comm->pipe_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++	mutex_init(&bl_comm->lock);
++	mutex_init(&bl_comm->pipe_lock);
++	init_waitqueue_head(&bl_comm->pipe_wq);
++
++	bl_comm_global = bl_comm;
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(bl_comm);
++	return rc;
++}
++
++void
++nfsd_bl_stop(void)
++{
++	bl_comm_t	*c = bl_comm_global;
++
++	dprintk("%s: stopping pipe\n", __func__);
++	if (!c)
++		return;
++	rpc_unlink(c->pipe_dentry);
++	rpc_put_mount();
++	bl_comm_global = NULL;
++	kfree(c);
++}
++
++static ssize_t
++bl_pipe_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *dst,
++    size_t buflen)
++{
++	char	*data	= (char *)msg->data + msg->copied;
++	ssize_t	mlen	= msg->len - msg->copied,
++		left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen		-= left;
++	msg->copied	+= mlen;
++	msg->errno	= 0;
++
++	return mlen;
++}
++
++static ssize_t
++bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode	*rpci	= RPC_I(filp->f_dentry->d_inode);
++	bl_comm_t		*bc	= (bl_comm_t *)rpci->private;
++	bl_comm_msg_t		*im	= &bc->msg;
++	int			ret;
++	bl_comm_res_t		*res;
++	
++
++	if (mlen == 0) {
++		im->msg_status = PNFS_BLOCK_FAILURE;
++		im->msg_res = NULL;
++		wake_up(&bc->pipe_wq);
++		return -EFAULT;
++	}
++	
++	if ((res = kmalloc(mlen, GFP_KERNEL)) == NULL)
++		return -ENOMEM;
++	
++	if (copy_from_user(res, src, mlen)) {
++		kfree(res);
++		return -EFAULT;
++	}
++	
++	mutex_lock(&bc->pipe_lock);
++	
++	ret		= mlen;
++	im->msg_status	= res->res_status;
++	im->msg_res	= res;
++	
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++	return ret;
++}
++
++static void
++bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	bl_comm_msg_t	*im = msg->data;
++	bl_comm_t	*bc = container_of(im, struct bl_comm, msg);
++	
++	if (msg->errno >= 0)
++		return;
++
++	mutex_lock(&bc->pipe_lock);
++	im->msg_status = PNFS_BLOCK_FAILURE;
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++}
++
++int
++bl_upcall(bl_comm_t *bc, bl_comm_msg_t *upmsg, bl_comm_res_t **res)
++{
++	struct rpc_pipe_msg	msg;
++	DECLARE_WAITQUEUE(wq, current);
++	int			rval	= 1;
++	bl_comm_msg_t		*m	= &bc->msg;
++	
++	if (bc == NULL) {
++		dprintk("%s: No pNFS block daemon available\n", __func__);
++		return 1;
++	}
++	
++	mutex_lock(&bc->lock);
++	mutex_lock(&bc->pipe_lock);
++	
++	memcpy(m, upmsg, sizeof (*m));
++	
++	memset(&msg, 0, sizeof (msg));
++	msg.data = m;
++	msg.len = sizeof (*m);
++	
++	add_wait_queue(&bc->pipe_wq, &wq);
++	rval = rpc_queue_upcall(bc->pipe_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&bc->pipe_wq, &wq);
++		goto out;
++	}
++	
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&bc->pipe_lock);
++	schedule();
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(&bc->pipe_wq, &wq);
++	mutex_lock(&bc->pipe_lock);
++	
++	if (m->msg_status == PNFS_BLOCK_SUCCESS) {
++		*res = m->msg_res;
++		rval = 0;
++	} else
++		rval = 1;
++	
++out:
++	mutex_unlock(&bc->pipe_lock);
++	mutex_unlock(&bc->lock);
++	return rval;
++}
++
++static ssize_t ctl_write(struct file *file, const char __user *buf, size_t len,
++    loff_t *offset)
++{
++	int		cmd,
++			rc;
++	bl_comm_t	*bc	= bl_comm_global;
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof (int)))
++		return -EFAULT;
++	switch (cmd) {
++	case PNFS_BLOCK_CTL_STOP:
++		msg.msg_type = PNFS_UPCALL_MSG_STOP;
++		(void) bl_upcall(bc, &msg, &res);
++		kfree(res);
++		nfsd_bl_stop();
++		break;
++		
++	case PNFS_BLOCK_CTL_START:
++		rc = nfsd_bl_start();
++		if (rc != 0)
++			return rc;
++		break;
++		
++	case PNFS_BLOCK_CTL_VERS:
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bc, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++			    __func__);
++			return 0;
++		}
++		kfree(res);
++		break;
++		
++	default:
++		dprintk("%s: unknown ctl command %d\n", __func__, cmd);
++		break;
++	}
++	return len;
++}
++
++static struct file_operations ctl_ops = {
++	.write	= ctl_write,
++};
++
++/*
++ * bl_init_proc -- set up proc interfaces
++ *
++ * Creating a pnfs_block directory isn't really required at this point
++ * since we've only got a single node in that directory. If the need for
++ * more nodes doesn't present itself shortly this code should revert
++ * to a single top level node. McNeal 11-Aug-2008.
++ */
++int
++bl_init_proc(void)
++{
++	struct proc_dir_entry *e;
++
++	e = proc_mkdir("fs/pnfs_block", NULL);
++	if (!e)
++		return -ENOMEM;
++
++	e = create_proc_entry("fs/pnfs_block/ctl", 0, NULL);
++	if (!e)
++		return -ENOMEM;
++	e->proc_fops = &ctl_ops;
++
++	return 0;
++}
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
+@@ -0,0 +1,1672 @@
++/*
++ *  bl_ops.c
++ *  spNFS
++ *
++ *  Created by Rick McNeal on 4/1/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++
++/*
++ * Block layout operations.
++ *
++ * These functions, with the exception of pnfs_block_enabled, are assigned to
++ * the super block s_export_op structure.
++ */
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/genhd.h>
++#include <linux/fs.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd/export.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/spinlock_types.h>
++#include <linux/dm-ioctl.h>
++#include <asm/uaccess.h>
++#include <linux/falloc.h>
++#include <linux/nfsd4_block.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY	NFSDDBG_PNFS
++
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++
++#define BL_LAYOUT_HASH_BITS	4
++#define BL_LAYOUT_HASH_SIZE	(1 << BL_LAYOUT_HASH_BITS)
++#define BL_LAYOUT_HASH_MASK	(BL_LAYOUT_HASH_SIZE - 1)
++#define BL_LIST_REQ	(sizeof (struct dm_ioctl) + 256)
++
++#define bl_layout_hashval(id) \
++	((id) & BL_LAYOUT_HASH_MASK)
++
++#define BLL_F_END(p) ((p)->bll_foff + (p)->bll_len)
++#define BLL_S_END(p) ((p)->bll_soff + (p)->bll_len)
++#define _2SECTS(v) ((v) >> 9)
++
++#ifndef READ32
++#define READ32(x)	(x) = ntohl(*p++)
++#define READ64(x)	do {			\
++(x) = (u64)ntohl(*p++) << 32;	\
++(x) |= ntohl(*p++);		\
++} while (0)
++#endif
++
++
++typedef enum {True, False} boolean_t;
++/* ---- block layoutget and commit structure ---- */
++typedef struct bl_layout_rec {
++	struct list_head	blr_hash,
++				blr_layouts;
++	dev_t			blr_rdev;
++	struct inode		*blr_inode;
++	int			blr_recalled;	// debug
++	u64			blr_orig_size,
++				blr_commit_size,
++				blr_ext_size;
++	spinlock_t		blr_lock;	// Protects blr_layouts
++} bl_layout_rec_t;
++
++static struct list_head layout_hash;
++static struct list_head layout_hashtbl[BL_LAYOUT_HASH_SIZE];
++static spinlock_t layout_hashtbl_lock;
++
++/* ---- prototypes ---- */
++static boolean_t device_slice(dev_t devid);
++static boolean_t device_dm(dev_t devid);
++static boolean_t layout_inode_add(struct inode *i, bl_layout_rec_t **);
++static bl_layout_rec_t *layout_inode_find(struct inode *i);
++static void layout_inode_del(struct inode *i);
++static char *map_state2name(enum pnfs_block_extent_state4 s);
++static pnfs_blocklayout_devinfo_t *bld_alloc(struct list_head *volume, int type);
++static void bld_free(pnfs_blocklayout_devinfo_t *bld);
++static pnfs_blocklayout_devinfo_t *bld_simple(struct list_head *volumes,
++    dev_t devid, int local_index);
++static pnfs_blocklayout_devinfo_t *bld_slice(struct list_head *volumes,
++    dev_t devid, int my_loc, int idx);
++static int layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg);
++struct list_head *layout_cache_iter(bl_layout_rec_t *r,
++    struct list_head *bl_possible, struct nfsd4_layout_seg *seg);
++static void layout_cache_merge(bl_layout_rec_t *r, struct list_head *h);
++static int layout_cache_update(bl_layout_rec_t *r, struct list_head *h);
++static void layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg);
++static void print_bll(pnfs_blocklayout_layout_t *b, char *);
++static inline boolean_t layout_cache_fill_from_list(bl_layout_rec_t *r,
++    struct list_head *h, struct nfsd4_layout_seg *seg);
++static inline void bll_collapse(bl_layout_rec_t *r,
++    pnfs_blocklayout_layout_t *c);
++static pnfs_blocklayout_layout_t *bll_alloc(u64 offset, u64 len,
++    enum bl_cache_state state, struct list_head *h);
++static pnfs_blocklayout_layout_t *bll_alloc_dup(pnfs_blocklayout_layout_t *b,
++    enum bl_cache_state c, struct list_head *h);
++static inline boolean_t layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++    enum pnfs_block_extent_state4 *s);
++static void extents_setup(struct fiemap_extent_info *fei);
++static void extents_count(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_get(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_process(struct fiemap_extent_info *fei,
++    struct list_head *bl_candidates, struct nfsd4_layout_seg *, dev_t dev,
++    pnfs_blocklayout_layout_t *b);
++static void extents_cleanup(struct fiemap_extent_info *fei);
++
++void
++nfsd_bl_init(void)
++{
++	int	i;
++	dprintk("%s loaded\n", __func__);
++
++	spin_lock_init(&layout_hashtbl_lock);
++	INIT_LIST_HEAD(&layout_hash);
++	for (i = 0; i < BL_LAYOUT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&layout_hashtbl[i]);
++	bl_init_proc();
++}
++
++/*
++ * pnfs_block_enabled -- check to see if this file system should be export as
++ * block pnfs
++ */
++int
++pnfs_block_enabled(struct inode *inode, int ex_flags)
++{
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res	= NULL;
++	static int bl_comm_once	= 0;
++	
++	dprintk("--> %s\n", __func__);
++	/*
++	 * FIXME: Figure out method to determine if this file system should
++	 * be exported. The following areas need to be checked.
++	 * (1) Validate that this file system was exported as a pNFS
++	 *     block-layout
++	 * (2) Has there been successful communication with the
++	 *     volume daemon?
++	 */
++	/* Check #1 */
++#ifdef notyet
++	if (!(ex_flags & NFSEXP_PNFS_BLOCK)) {
++		dprintk("%s: pnfs_block not set in export\n", __func__);
++		return 0;
++	}
++#endif
++	
++	/* Check #1 */
++	if (!bl_comm_once) {
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bl_comm_global, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++				__func__);
++			return 0;
++		}
++		if (msg.u.msg_vers != res->u.vers) {
++			dprintk("%s: vers mismatch, kernel != daemon\n",
++				__func__);
++			kfree(res);
++			return 0;
++		}
++	}
++	bl_comm_once = 1;
++
++	kfree(res);
++	
++	dprintk("<-- %s okay\n", __func__);
++	return 1;
++}
++
++int
++bl_layout_type(struct super_block *sb)
++{
++	return LAYOUT_BLOCK_VOLUME;
++}
++
++int
++bl_getdeviceiter(struct super_block *sb,
++		 u32 layout_type,
++		 struct nfsd4_pnfs_dev_iter_res *res)
++{
++	res->gd_eof = 1;	
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_devid	= sb->s_dev;
++	res->gd_verf	= 1;
++	res->gd_cookie	= 1;
++	return 0;
++}
++
++static int
++bl_getdeviceinfo_slice(struct super_block *sb, struct exp_xdr_stream *xdr,
++		       const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld_slice_p,
++					*bld_simple_p,
++					*bld;
++	int				status		= -EIO,
++					location	= 0;
++	struct list_head		volumes;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++
++	bld_simple_p = bld_simple(&volumes, devid->devid,
++				  location++);
++	if (!bld_simple_p)
++		goto out;
++	bld_slice_p = bld_slice(&volumes, devid->devid, location++,
++	    bld_simple_p->bld_index_loc);
++
++	if (!bld_slice_p)
++		goto out;
++	
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		if (bld->bld_type == PNFS_BLOCK_VOLUME_SIMPLE)
++			kfree(bld->u.simple.bld_sig);
++		bld_free(bld);
++	}
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++static int
++bl_getdeviceinfo_dm(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld		= NULL;
++	int				status		= -EIO,	// default to error
++					i,
++					location	= 0;
++	struct list_head		volumes;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++	
++	msg.msg_type = PNFS_UPCALL_MSG_DMGET;
++	msg.u.msg_dev = devid->devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: upcall for DMGET failed\n", __func__);
++		goto out;
++	}
++		
++	/*
++	 * Don't use bld_alloc() here. If used this will be the first volume
++	 * type added to the list whereas the protocol requires it to be the
++	 * last.
++	 */
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		goto out;
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type			= PNFS_BLOCK_VOLUME_STRIPE;
++	bld->u.stripe.bld_stripes	= res->u.stripe.num_stripes;
++	bld->u.stripe.bld_chunk_size	= res->u.stripe.stripe_size * 512LL;
++	dprintk("%s: stripes %d, chunk_size %Lu\n", __func__,
++	    bld->u.stripe.bld_stripes, bld->u.stripe.bld_chunk_size / 512LL);
++	
++	bld->u.stripe.bld_stripe_indexs = kmalloc(bld->u.stripe.bld_stripes *
++						  sizeof (int), GFP_KERNEL);
++	if (!bld->u.stripe.bld_stripe_indexs)
++		goto out;
++
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++) {
++		dev_t			dev;
++		pnfs_blocklayout_devinfo_t	*bldp;
++		
++		dev = MKDEV(res->u.stripe.devs[i].major,
++			    res->u.stripe.devs[i].minor);
++		if (dev == 0)
++			goto out;
++		
++		bldp = bld_simple(&volumes, dev, location++);
++		if (!bldp) {
++			dprintk("%s: bld_simple failed\n", __func__);
++			goto out;
++		}
++		bldp = bld_slice(&volumes, dev, location++, bldp->bld_index_loc);
++
++		if (!bldp) {
++			dprintk("%s: bld_slice failed\n", __func__);
++			goto out;
++		}
++		bld->u.stripe.bld_stripe_indexs[i] = bldp->bld_index_loc;
++
++	}
++	list_add_tail(&bld->bld_list, &volumes);
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++	
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SLICE:
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				// No memory to release for these
++				break;
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				kfree(bld->u.simple.bld_sig);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				kfree(bld->u.stripe.bld_stripe_indexs);
++				break;
++		}
++		bld_free(bld);
++	}
++	kfree(res);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++/*
++ * bl_getdeviceinfo -- determine device tree for requested devid
++ */
++int
++bl_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		 u32 layout_type,
++		 const struct nfsd4_pnfs_deviceid *devid)
++{
++	if (device_slice(devid->devid) == True)
++		return bl_getdeviceinfo_slice(sb, xdr, devid);
++	else if (device_dm(devid->devid) == True)
++		return bl_getdeviceinfo_dm(sb, xdr, devid);
++	return -EINVAL;
++}
++
++enum nfsstat4
++bl_layoutget(struct inode *i, struct exp_xdr_stream *xdr,
++	     const struct nfsd4_pnfs_layoutget_arg *arg,
++	     struct nfsd4_pnfs_layoutget_res *res)
++{
++	pnfs_blocklayout_layout_t	*b;
++	bl_layout_rec_t			*r;
++	struct list_head		bl_possible,
++					*bl_candidates	= NULL;
++	boolean_t			del_on_error	= False;
++	int				adj;
++	enum nfsstat4			nfserr		= NFS4_OK;
++	
++	dprintk("--> %s (inode=[0x%x:%lu], offset=%Lu, len=%Lu, iomode=%d)\n",
++	    __func__, i->i_sb->s_dev, i->i_ino, _2SECTS(res->lg_seg.offset),
++	    _2SECTS(res->lg_seg.length), res->lg_seg.iomode);
++
++	if (res->lg_seg.length == 0) {
++		printk("%s: request length of 0, error condition\n", __func__);
++		return NFS4ERR_BADLAYOUT;
++	}
++	
++	/*
++	 * Adjust the length as required per spec.
++	 * - First case is were the length is set to (u64)-1. Cheap means to
++	 *   define the end of the file.
++	 * - Second case is were the I/O mode is read-only, but the request is
++	 *   past the end of the file so the request needs to be trimed.
++	 */
++	if ((res->lg_seg.length == NFS4_MAX_UINT64) ||
++	    (((res->lg_seg.offset + res->lg_seg.length) > i->i_size) &&
++	     (res->lg_seg.iomode == IOMODE_READ)))
++		res->lg_seg.length = i->i_size - res->lg_seg.offset;
++	
++	adj = (res->lg_seg.offset & 511) ? res->lg_seg.offset & 511 : 0;
++	res->lg_seg.offset -= adj;
++	res->lg_seg.length = (res->lg_seg.length + adj + 511) & ~511;
++	
++	if (res->lg_seg.iomode != IOMODE_READ)
++		if (i->i_op->fallocate(i, FALLOC_FL_KEEP_SIZE,
++				       res->lg_seg.offset, res->lg_seg.length))
++			return NFS4ERR_IO;
++		
++	INIT_LIST_HEAD(&bl_possible);
++	
++	if ((r = layout_inode_find(i)) == NULL) {
++		if (layout_inode_add(i, &r) == False) {
++			printk("%s: layout_inode_add failed\n", __func__);
++			return NFS4ERR_IO;
++		}
++		del_on_error = True;
++	}
++	BUG_ON(!r);
++	
++	spin_lock(&r->blr_lock);
++	
++	if (layout_cache_fill_from(r, &bl_possible, &res->lg_seg)) {
++		/*
++		 * This will send LAYOUTTRYAGAIN error to the client.
++		 */
++		dprintk("%s: layout_cache_fill_from() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	res->lg_return_on_close	= 1;
++	res->lg_seg.length	= 0;
++	
++	bl_candidates = layout_cache_iter(r, &bl_possible, &res->lg_seg);
++	if (!bl_candidates) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	layout_cache_merge(r, bl_candidates);
++	if (layout_cache_update(r, bl_candidates)) {
++		/* ---- Failed to allocate memory. ---- */
++		dprintk("%s: layout_cache_update() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	nfserr = blocklayout_encode_layout(xdr, bl_candidates);
++	if (nfserr)
++		dprintk("%s: layoutget xdr routine failed\n", __func__);
++	
++layoutget_cleanup:
++	if (bl_candidates) {
++		while (!list_empty(bl_candidates)) {
++			b = list_entry(bl_candidates->next,
++			    struct pnfs_blocklayout_layout, bll_list);
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++	}
++
++	spin_unlock(&r->blr_lock);
++	if (unlikely(nfserr)) {
++		if (del_on_error == True)
++			layout_inode_del(i);
++		res->lg_seg.length = 0;
++		res->lg_seg.offset = 0;
++	}
++	
++	dprintk("<-- %s (rval %u)\n", __func__, nfserr);
++	return nfserr;
++}
++
++/*
++ * bl_layoutcommit -- commit changes, especially size, to file systemj
++ *
++ * Currently this routine isn't called and everything is handled within
++ * nfsd4_layoutcommit(). By not calling this routine the server doesn't
++ * handle a partial return, a set of extents, of the layout. The extents
++ * are decoded here, but nothing is done with them. If this routine is
++ * be called the interface must change to pass the 'dentry' pointer such
++ * that notify_change() can be called.
++ */
++int
++bl_layoutcommit(struct inode *i,
++		const struct nfsd4_pnfs_layoutcommit_arg *args,
++		struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	bl_layout_rec_t			*r;
++	int				status	= 0;
++	u64				lw_plus;
++	
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	r = layout_inode_find(i);
++	if (r) {
++		lw_plus = args->lc_last_wr + 1;
++		if (args->lc_newoffset) {
++			dprintk("  lc_last_wr %Lu\n", lw_plus);
++			if (r->blr_orig_size < lw_plus) {
++				r->blr_orig_size	= lw_plus;
++				res->lc_size_chg	= 1;
++				res->lc_newsize		= lw_plus;
++			}
++		}
++
++		if (args->lc_up_len) {
++			int	extents,
++				i;
++			struct pnfs_blocklayout_layout *b;
++			__be32 *p = args->lc_up_layout;
++			
++			/*
++			 * Client is returning a set of extents which
++			 * should/could be used to update the file system.
++			 * See section 2.3.2 in draft-ietf-nfsv4-pnfs-block-08
++			 */
++			READ32(extents);
++			dprintk("  Client returning %d extents: data size %d\n",
++			    extents, args->lc_up_len);
++			b = kmalloc(sizeof (struct pnfs_blocklayout_layout) *
++				    extents, GFP_KERNEL);
++			if (b) {
++				for (i = 0; i < extents; i++) {
++					READ64(b[i].bll_vol_id.sbid);
++					READ64(b[i].bll_vol_id.devid);
++					READ64(b[i].bll_foff);
++					READ64(b[i].bll_len);
++					READ64(b[i].bll_soff);
++					READ32(b[i].bll_es);
++					dprintk("  %d: foff %Lu, len %Lu, soff %Lu "
++					    "state %s\n",
++					    i, _2SECTS(b[i].bll_foff),
++					    _2SECTS(b[i].bll_len),
++					    _2SECTS(b[i].bll_soff),
++					    map_state2name(b[i].bll_es));
++				}
++				kfree(b);
++			} else {
++				status = -ENOMEM;
++			}
++		}
++	} else
++		dprintk("%s: Unexpected commit to inode %p\n", __func__, i);
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutreturn(struct inode *i,
++		const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	int				status	= 0;
++	bl_layout_rec_t			*r;
++
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	
++	r = layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		layout_cache_del(r, &args->lr_seg);
++		spin_unlock(&r->blr_lock);
++		dprintk("    ext_size %Lu, i_size %Lu, orig_size %Lu\n",
++		    r->blr_ext_size, i->i_size, r->blr_orig_size);
++	}
++
++	layout_inode_del(i);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block		*sb;
++	struct nfsd4_pnfs_cb_layout	lr;
++	bl_layout_rec_t			*r;
++	pnfs_blocklayout_layout_t	*b;
++	u64				adj;
++	
++	dprintk("--> %s\n", __func__);
++	BUG_ON(!len);
++	switch (type) {
++		case RETURN_FILE:
++			sb = inode->i_sb;
++			dprintk("  recalling layout [0x%x:%lu], %Lu:%Lu\n",
++			    inode->i_sb->s_dev, inode->i_ino,
++				_2SECTS(offset), _2SECTS(len));
++			break;
++		case RETURN_FSID:
++			sb = inode->i_sb;
++			dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++				__func__);
++			return 0;
++		case RETURN_ALL:
++			/*
++			 * XXX figure out how to get a sb since there's no
++			 * inode ptr
++			 */
++			dprintk("%s: recalling all layouts (unimplemented)\n",
++				__func__);
++			return 0;
++		default:
++			return -EINVAL;
++	}
++	
++restart:
++	r = layout_inode_find(inode);
++	if (r && len && !r->blr_recalled) {
++		spin_lock(&r->blr_lock);
++		list_for_each_entry(b, &r->blr_layouts, bll_list) {
++			if (!r->blr_recalled && !b->bll_recalled &&
++			    (offset >= b->bll_foff) && (offset < BLL_F_END(b))) {
++				b->bll_recalled		= 1;
++				lr.cbl_recall_type	= type;
++				lr.cbl_seg.layout_type	= LAYOUT_BLOCK_VOLUME;
++				lr.cbl_seg.clientid	= 0;
++				lr.cbl_seg.offset	= 0;
++				lr.cbl_seg.length	= NFS4_MAX_UINT64;
++				r->blr_recalled		= 1;
++				dprintk("  FULL LAYOUTRECALL\n");
++				lr.cbl_seg.iomode = IOMODE_ANY;
++
++				/*
++				 * Currently there are only two cases where the
++				 * layout is being returned.
++				 *    (1) Someone is issuing a NFS_WRITE operation
++				 *        to this layout.
++				 *    (2) The file has been truncated which means
++				 *        the layout is immediately made invalid.
++				 * In both cases the client must write any
++				 * uncommitted modifications to the server via
++				 * NFS_WRITE.
++				 */
++				lr.cbl_layoutchanged = 1;
++
++				/*
++				 * Need to drop the lock because we'll get a
++				 * layoutreturn which will block waiting for
++				 * the lock. The request will come in on the
++				 * same thread which will cause a deadlock.
++				 */
++				spin_unlock(&r->blr_lock);
++				nfsd_layout_recall_cb(sb, inode, &lr);
++				adj = MIN(b->bll_len - (offset - b->bll_foff),
++				    len);
++				offset += adj;
++				len -= adj;
++				if (!len) {
++					spin_lock(&r->blr_lock);
++					break;
++				}
++				/*
++				 * Since layoutreturn will have been called we
++				 * can't assume blr_layouts is still valid,
++				 * so restart.
++				 */
++				goto restart;
++			}
++		}
++		spin_unlock(&r->blr_lock);
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++/*
++ * []------------------------------------------------------------------[]
++ * | Support functions from here on down.				|
++ * []------------------------------------------------------------------[]
++ */
++
++/*
++ * bld_simple -- given a dev_t build a simple volume structure
++ *
++ * Simple volume contains the device signature and offset to that data in
++ * the storage volume.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_simple(struct list_head *volumes, dev_t devid, int local_index)
++{
++	pnfs_blocklayout_devinfo_t	*bld	= NULL;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res	= NULL;
++	
++	msg.msg_type = PNFS_UPCALL_MSG_GETSIG;
++	msg.u.msg_dev = devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: Failed to get signature information\n", __func__);
++		goto error;
++	}
++	
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SIMPLE);
++	if (!bld)
++		return NULL;
++	
++	bld->u.simple.bld_offset = (res->u.sig.sector * 512LL) + res->u.sig.offset;
++	bld->u.simple.bld_sig_len = res->u.sig.len;
++	bld->u.simple.bld_sig = kmalloc(res->u.sig.len, GFP_KERNEL);
++	if (!bld->u.simple.bld_sig)
++		goto error;
++	
++	memcpy(bld->u.simple.bld_sig, res->u.sig.sig, res->u.sig.len);
++	kfree(res);
++	return bld;
++	
++error:
++	if (bld)
++		bld_free(bld);
++	if (res)
++		kfree(res);
++	dprintk("%s: error in bld_simple\n", __func__);
++	return NULL;
++}
++
++/*
++ * bld_slice -- given a dev_t build a slice volume structure
++ *
++ * A slice volume contains the length of the slice/partition and its offset
++ * from the beginning of the storage volume. There's also a reference to
++ * the "simple" volume which contains this slice.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_slice(struct list_head *volumes, dev_t devid, int my_loc, int simple_loc)
++{
++	pnfs_blocklayout_devinfo_t	*bld;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SLICE);
++	if (!bld)
++		return NULL;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_GETSLICE;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Upcall to get slice info failed\n");
++		bld_free(bld);
++		return NULL;
++	}
++	
++	bld->bld_devid.devid = devid;
++	bld->bld_index_loc	= my_loc;
++	bld->u.slice.bld_start	= res->u.slice.start * 512LL;
++	bld->u.slice.bld_len	= res->u.slice.length * 512LL;
++	bld->u.slice.bld_index	= simple_loc;
++
++	dprintk("%s: start %Lu, len %Lu\n", __func__,
++		bld->u.slice.bld_start / 512LL, bld->u.slice.bld_len / 512LL);
++
++	kfree(res);
++	dprintk("<-- %s (rval %p)\n", __func__, bld);
++	return bld;
++}
++
++static int
++layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	dprintk("--> %s\n", __func__);
++	
++	if (!list_empty(&r->blr_layouts))
++		if (layout_cache_fill_from_list(r, h, seg) == False)
++			return -EIO;
++	
++	/*
++	 * This deals with two conditions.
++	 *    (1) When blr_layouts is empty we need to create the first entry
++	 *    (2) When the range requested falls past the end of any current
++	 *        layout the residual must be taken care of.
++	 */	
++	if (seg->length) {
++		n = bll_alloc(seg->offset, seg->length, BLOCK_LAYOUT_NEW, h);
++		if (!n)
++			return -ENOMEM;
++		dprintk("  remaining at %Lu, len %Lu\n", _2SECTS(n->bll_foff),
++			_2SECTS(n->bll_len));
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++struct list_head *
++layout_cache_iter(bl_layout_rec_t *r, struct list_head *bl_possible,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n		= NULL;
++	struct list_head		*bl_candidates	= NULL;
++	struct fiemap_extent_info	fei;
++	struct inode			*i;
++	dev_t				dev;
++	
++	dev	= r->blr_rdev;
++	i	= r->blr_inode;
++	
++	dprintk("--> %s\n", __func__);
++	bl_candidates = kmalloc(sizeof (*bl_candidates), GFP_KERNEL);
++	if (!bl_candidates)
++		return NULL;
++	INIT_LIST_HEAD(bl_candidates);
++	extents_setup(&fei);
++	
++	list_for_each_entry(b, bl_possible, bll_list) {
++		if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			extents_count(&fei, i, b->bll_foff, b->bll_len);
++			if (fei.fi_extents_mapped) {
++				
++				/*
++				 * Common case here. Got a range which has
++				 * extents. Now get those extents and process
++				 * them into pNFS extents.
++				 */
++				if (extents_get(&fei, i, b->bll_foff,
++				    b->bll_len) == False)
++					goto cleanup;
++				if (extents_process(&fei, bl_candidates,
++				    seg, dev, b) == False)
++					goto cleanup;
++				extents_cleanup(&fei);
++				
++			} else if (seg->iomode == IOMODE_READ) {
++				
++				/*
++				 * Found a hole in a file while reading. No 
++				 * problem, just create a pNFS extent for the
++				 * range and let the client know there's no
++				 * backing store.
++				 */
++				n = bll_alloc(b->bll_foff, b->bll_len,
++				    BLOCK_LAYOUT_NEW, bl_candidates);
++				n->bll_es = PNFS_BLOCK_NONE_DATA;
++				n->bll_vol_id.sbid = 0;
++				n->bll_vol_id.devid = dev;
++				seg->length += b->bll_len;
++			} else {
++				
++				/*
++				 * There's a problem here. Since the iomode
++				 * is read/write fallocate should have allocated
++				 * any necessary storage for the given range.
++				 */
++				dprintk("    Extent count for RW is 0\n");
++				goto cleanup;
++			}
++			
++		} else {
++			n = bll_alloc_dup(b, b->bll_cache_state, bl_candidates);
++			seg->length += n->bll_len;
++		}
++
++		if (r->blr_ext_size < (b->bll_foff + b->bll_len))
++			r->blr_ext_size = b->bll_foff + b->bll_len;
++	}
++	
++	while (!list_empty(bl_possible)) {
++		b = list_entry(bl_possible->next,
++		    struct pnfs_blocklayout_layout, bll_list);
++		list_del(&b->bll_list);
++		kfree(b);
++	}
++		
++	b = list_first_entry(bl_candidates, struct pnfs_blocklayout_layout,
++	    bll_list);
++	seg->offset = b->bll_foff;
++	dprintk("<-- %s okay\n", __func__);
++	return bl_candidates;
++	
++cleanup:
++	extents_cleanup(&fei);
++	if (bl_candidates)
++		kfree(bl_candidates);
++	dprintk("<-- %s, error occurred\n", __func__);
++	return NULL;
++}
++
++/*
++ * layout_cache_merge -- collapse layouts which make up a contiguous range.
++ */
++static void
++layout_cache_merge(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*p;
++	
++	dprintk("--> %s\n", __func__);
++restart:
++	p = NULL;
++	list_for_each_entry(b, h, bll_list) {
++		if (p && (BLL_S_END(p) == b->bll_soff) &&
++		    (p->bll_es == b->bll_es) &&
++		    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++			/*
++			 * We've got a condidate.
++			 */
++#ifdef too_verbose
++			dprintk("  merge %Lu(f):%Lu(l):%Lu(s) into %Lu(f):%Lu(l):%Lu(s)\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff),
++				_2SECTS(p->bll_foff), _2SECTS(p->bll_len),
++				_2SECTS(b->bll_soff));
++#endif
++			
++			if (p->bll_cache_state == BLOCK_LAYOUT_CACHE)
++				p->bll_cache_state = BLOCK_LAYOUT_UPDATE;
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else if (p && (BLL_F_END(p) == b->bll_foff) &&
++			   (p->bll_es == b->bll_es) &&
++			   (b->bll_es == PNFS_BLOCK_NONE_DATA)) {
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else
++			p = b;
++	}
++	dprintk("<-- %s\n", __func__);
++}
++
++static int
++layout_cache_update(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*c,
++					*n;
++	boolean_t			status = 0;
++	
++	dprintk("--> %s\n", __func__);
++	if (list_empty(&r->blr_layouts)) {
++		/* ---- Just add entries and return ---- */
++		dprintk("  cache empty for inode 0x%x:%ld\n", r->blr_rdev,
++			r->blr_inode->i_ino);
++		list_for_each_entry(b, h, bll_list) {
++			c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE,
++					  &r->blr_layouts);
++			if (!c) {
++				status = -ENOMEM;
++				break;
++			}
++			dprintk("    adding %Lu(f):%Lu(l):%Lu(s):%d\n",
++				_2SECTS(c->bll_foff), _2SECTS(c->bll_len),
++				_2SECTS(c->bll_soff), c->bll_es);
++		}
++		return status;
++	}
++	
++	list_for_each_entry(b, h, bll_list) {
++		BUG_ON(!b->bll_vol_id.devid);
++		if (b->bll_cache_state == BLOCK_LAYOUT_UPDATE) {
++			boolean_t found = False;
++			list_for_each_entry(c, &r->blr_layouts, bll_list) {
++				if ((b->bll_soff >= c->bll_soff) &&
++				    (b->bll_soff < BLL_S_END(c)) &&
++				    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++					u64	u;
++					
++					if ((b->bll_foff < c->bll_foff) ||
++					    (b->bll_foff > BLL_F_END(c)))
++						BUG();
++					
++					u = BLL_S_END(b) - BLL_S_END(c);
++					/*
++					 * The updated cache entry has to be
++					 * different than the current.
++					 * Otherwise the cache state for 'b'
++					 * should be BLOCK_LAYOUT_CACHE.
++					 */
++					BUG_ON(BLL_S_END(b) < BLL_S_END(c));
++					
++					dprintk("  "
++						"updating %Lu(f):%Lu(l):%Lu(s) to len %Lu\n",
++						_2SECTS(c->bll_foff),
++						_2SECTS(c->bll_len),
++						_2SECTS(c->bll_soff),
++						_2SECTS(c->bll_len + u));
++					c->bll_len += u;
++					bll_collapse(r, c);
++					found = True;
++					break;
++				}
++			}
++
++			if (found == False) {
++				dprintk("  ERROR Expected to find"
++				    " %Lu(f):%Lu(l):%Lu(s), but didn't\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff));
++				list_for_each_entry(c, &r->blr_layouts, bll_list)
++					print_bll(c, "Cached");
++				BUG();
++			}
++		} else if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			c = list_first_entry(&r->blr_layouts,
++			    struct pnfs_blocklayout_layout, bll_list);
++			if (b->bll_foff < c->bll_foff) {
++				/*
++				 * Special case where new entry is before
++				 * first cached entry.
++				 */
++				c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE, NULL);
++				list_add(&c->bll_list, &r->blr_layouts);
++				dprintk("  new entry at head of list at %Lu, "
++					"len %Lu\n",
++					_2SECTS(c->bll_foff), _2SECTS(c->bll_len));
++			} else {
++				list_for_each_entry(c, &r->blr_layouts,
++				    bll_list) {
++					n = list_entry(c->bll_list.next,
++					    struct pnfs_blocklayout_layout,
++					    bll_list);
++					/*
++					 * This is ugly, but can't think of
++					 * another way to examine this case.
++					 * Consider the following. Need to
++					 * add an entry which starts at 40
++					 * and the cache has the following
++					 * entries:
++					 * Start    Length
++					 * 10       5
++					 * 30       5
++					 * 50       5
++					 * So, need to look and see if the new
++					 * entry starts after the current
++					 * cache, but before the next one.
++					 * There's a catch in that the next
++					 * entry might not be valid as it's
++					 * really just a pointer to the list
++					 * head.
++					 */
++					if (((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (c->bll_list.next == &r->blr_layouts)) ||
++					    ((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (b->bll_foff < n->bll_foff))) {
++						
++						n = bll_alloc_dup(b,
++								  BLOCK_LAYOUT_CACHE, NULL);
++						dprintk("  adding new %Lu:%Lu"
++							" after %Lu:%Lu\n",
++							_2SECTS(n->bll_foff),
++							_2SECTS(n->bll_len),
++							_2SECTS(c->bll_foff),
++							_2SECTS(c->bll_len));
++						list_add(&n->bll_list,
++							 &c->bll_list);
++						break;
++					}
++				}
++			}
++		}
++	}
++	dprintk("<-- %s\n", __func__);
++	return status;
++}
++
++static void
++layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg_in)
++{
++	struct pnfs_blocklayout_layout	*b,
++					*n;
++	u64				len;
++	struct nfsd4_layout_seg		seg = *seg_in;
++	
++	dprintk("--> %s\n", __func__);
++	if (seg.length == NFS4_MAX_UINT64) {
++		r->blr_recalled = 0;
++		dprintk("  Fast return of all layouts\n");
++		while (!list_empty(&r->blr_layouts)) {
++			b = list_entry(r->blr_layouts.next,
++				       struct pnfs_blocklayout_layout, bll_list);
++			dprintk("    foff %Lu, len %Lu, soff %Lu\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff));
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++		dprintk("<-- %s\n", __func__);
++		return;
++	}
++
++restart:
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg.offset == b->bll_foff) {
++			/*
++			 * This handle the following three cases:
++			 * (1) return layout matches entire cache layout
++			 * (2) return layout matches beginning portion of cache
++			 * (3) return layout matches entire cache layout and
++			 *     into next entry. Varies from #1 in end case.
++			 */
++			dprintk("  match on offsets, %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length));
++			len = MIN(seg.length, b->bll_len);
++			b->bll_foff	+= len;
++			b->bll_soff	+= len;
++			b->bll_len	-= len;
++			seg.length	-= len;
++			seg.offset	+= len;
++			if (!b->bll_len) {
++				list_del(&b->bll_list);
++				kfree(b);
++				dprintk("    removing cache line\n");
++				if (!seg.length) {
++					dprintk("    also finished\n");
++					goto complete;
++				}
++				/*
++				 * Since 'b' was freed we can't continue at the
++				 * next entry which is referenced as
++				 * b->bll_list.next by the list_for_each_entry
++				 * macro. Need to restart the loop.
++				 * TODO: Think about creating a dummy 'b' which
++				 *       would keep list_for_each_entry() happy.
++				 */
++				goto restart;
++			}
++			if (!seg.length) {
++				dprintk("    finished, but cache line not"
++					"empty\n");
++				goto complete;
++			}
++		} else if ((seg.offset >= b->bll_foff) &&
++		    (seg.offset < BLL_F_END(b))) {
++			/*
++			 * layout being returned is within this cache line.
++			 */
++			dprintk("  layout %Lu:%Lu within cache line %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length),
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			BUG_ON(!seg.length);
++			if ((seg.offset + seg.length) >= BLL_F_END(b)) {
++				/*
++				 * Layout returned starts in the middle of
++				 * cache entry and just need to trim back
++				 * cache to shorter length.
++				 */
++				dprintk("    trim back cache line\n");
++				len = seg.offset - b->bll_foff;
++				seg.offset += b->bll_len - len;
++				seg.length -= b->bll_len - len;
++				b->bll_len = len;
++				if (!seg.length)
++					return;
++			} else {
++				/*
++				 * Need to split current cache layout because
++				 * chunk is being removed from the middle.
++				 */
++				dprintk("    split cache line\n");
++				len = seg.offset + seg.length;
++				n = bll_alloc(len,
++					      (b->bll_foff + b->bll_len) - len,
++					      BLOCK_LAYOUT_CACHE, NULL);
++				n->bll_soff = b->bll_soff + len;
++				list_add(&n->bll_list, &b->bll_list);
++				b->bll_len = seg.offset - b->bll_foff;
++				return;
++			}
++		}
++	}
++complete:
++	if (list_empty(&r->blr_layouts))
++		r->blr_recalled = 0;
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++ * layout_cache_fill_from_list -- fills from cache list
++ *
++ * NOTE: This routine was only seperated out from layout_cache_file_from()
++ * to reduce the indentation level which makes the code easier to read.
++ */
++static inline boolean_t
++layout_cache_fill_from_list(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n;
++	enum pnfs_block_extent_state4	s;
++	
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg->offset < b->bll_foff) {
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, b->bll_foff - seg->offset),
++			    BLOCK_LAYOUT_NEW, NULL);
++			if (!n)
++				return False;
++			
++			list_add(&n->bll_list, h->prev);
++			dprintk("  new: %Lu:%Lu, added before %Lu:%Lu\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++		
++		if ((seg->offset >= b->bll_foff) &&
++		    (seg->offset < BLL_F_END(b))) {
++			if (layout_conflict(b, seg->iomode, &s) == False) {
++				dprintk("  CONFLICT FOUND: "
++				    "%Lu(f):%Lu(l):%Lu(s) state %d, iomode %d\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff), b->bll_es,
++				    seg->iomode);
++				return False;
++			}
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, BLL_F_END(b) - seg->offset),
++			    BLOCK_LAYOUT_CACHE, h);
++			dprintk("  CACHE hit: Found %Lu(f):%Lu(l): "
++			    "in %Lu(f):%Lu(l):%Lu(s):%d\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++			    _2SECTS(b->bll_soff), b->bll_es);
++			if (!n)
++				return False;
++			
++			n->bll_soff = b->bll_soff + seg->offset - b->bll_foff;
++			n->bll_vol_id.sbid = 0;
++			n->bll_vol_id.devid = b->bll_vol_id.devid;
++			n->bll_es = s;
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++	}
++	return True;
++}
++
++static u64
++bll_alloc_holey(struct list_head *bl_candidates, u64 offset, u64 length,
++    dev_t dev)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	n = bll_alloc(offset, length, BLOCK_LAYOUT_NEW, bl_candidates);
++	if (!n)
++		return 0;
++	n->bll_es = PNFS_BLOCK_NONE_DATA;
++	n->bll_vol_id.sbid = 0;
++	n->bll_vol_id.devid = dev;
++	
++	return n->bll_len;
++}
++
++static void
++extents_setup(struct fiemap_extent_info *fei)
++{
++	fei->fi_extents_start	= NULL;
++}
++
++/*
++ * extents_count -- Determine the number of extents for a given range.
++ *
++ * No need to call set_fs() here because the function
++ * doesn't use copy_to_user() if it's only counting
++ * the number of extents needed.
++ */
++static void
++extents_count(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	dprintk("    Need fiemap of %Ld:%Ld\n", _2SECTS(foff), _2SECTS(len));
++	fei->fi_flags		= FIEMAP_FLAG_SYNC;
++	fei->fi_extents_max	= 0;
++	fei->fi_extents_start	= NULL;
++	fei->fi_extents_mapped	= 0;
++	i->i_op->fiemap(i, fei, foff, len + (1 << i->i_sb->s_blocksize_bits) - 1);
++}
++
++/*
++ * extents_get -- Get list of extents for range
++ *
++ * extents_count() must have been called before this routine such that
++ * fi_extents_mapped is known.
++ */
++static boolean_t
++extents_get(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	int			m_space,
++				rval;
++	struct fiemap_extent	*fe;
++	mm_segment_t		old_fs = get_fs();
++	
++	/*
++	 * Now malloc the correct amount of space
++	 * needed. It's possible for the file to have changed
++	 * between calls which would require more space for
++	 * the extents. If that occurs the last extent will
++	 * not have FIEMAP_EXTENT_LAST set and the error will
++	 * be caught in extents_process().
++	 */
++	m_space = fei->fi_extents_mapped * sizeof (struct fiemap_extent);
++	fe = kmalloc(m_space, GFP_KERNEL);
++	if (!fe)
++		return False;
++	memset(fe, 0, m_space);
++	
++	fei->fi_extents_max	= fei->fi_extents_mapped;
++	fei->fi_extents_mapped	= 0;
++	fei->fi_extents_start	= fe;
++	
++	set_fs(KERNEL_DS);
++	rval = i->i_op->fiemap(i, fei, foff, len +
++	    (1 << i->i_sb->s_blocksize_bits) - 1);
++	set_fs(old_fs);
++	
++	if (rval || !fei->fi_extents_mapped) {
++		dprintk("    No extents. Wanted %d, got %d\n",
++			fei->fi_extents_max, fei->fi_extents_mapped);
++		kfree(fe);
++		fei->fi_extents_start = NULL;
++		return False;
++	} else
++		return True;
++}
++
++/*
++ * extents_process -- runs through the extent returned from the file system and
++ *	 creates block layout entries.
++ */
++static boolean_t
++extents_process(struct fiemap_extent_info *fei, struct list_head *bl_candidates,
++    struct nfsd4_layout_seg *seg, dev_t dev, pnfs_blocklayout_layout_t *b)
++{
++	struct fiemap_extent		*fep,
++					*fep_last	= NULL;
++	int				i;
++	pnfs_blocklayout_layout_t	*n;
++	u64				last_end,
++					rval;
++	
++	dprintk("--> %s\n", __func__);
++	for (fep = fei->fi_extents_start, i = 0; i < fei->fi_extents_mapped;
++	    i++, fep++) {
++		
++		BUG_ON(!fep->fe_physical);
++		/*
++		 * Deal with corner cases of hoel-y files.
++		 */
++		if (fep_last && ((fep_last->fe_logical + fep_last->fe_length) !=
++				 fep->fe_logical)) {
++			
++			/*
++			 * If the last extent doesn't end logically
++			 * at the beginning of the current we've got
++			 * hole and need to create a pNFS extent.
++			 */
++			dprintk("    Got a hole at %Ld:%Ld \n", 
++			    _2SECTS(fep_last->fe_logical),
++			    _2SECTS(fep_last->fe_length));
++			last_end = fep_last->fe_logical + fep_last->fe_length;
++			rval = bll_alloc_holey(bl_candidates, last_end,
++			    fep->fe_logical - last_end, dev);
++			if (!rval)
++				return False;
++			seg->length += rval;
++		}
++		
++		n = bll_alloc(fep->fe_logical, fep->fe_length,
++		    BLOCK_LAYOUT_NEW, bl_candidates);
++		if (unlikely(n == NULL)) {
++			dprintk("%s: bll_alloc failed\n", __func__);
++			return False;
++		}
++		
++		n->bll_soff = fep->fe_physical;
++		n->bll_es = seg->iomode == IOMODE_READ ?
++		    PNFS_BLOCK_READ_DATA : PNFS_BLOCK_READWRITE_DATA;
++		n->bll_vol_id.sbid = 0;
++		n->bll_vol_id.devid = dev;
++		seg->length += fep->fe_length;
++		print_bll(n, "New extent");
++		fep_last = fep;
++	}
++	dprintk("<-- %s (i=%d)\n", __func__, i);
++	
++	return True;
++}
++
++static void
++extents_cleanup(struct fiemap_extent_info *fei)
++{
++	if (fei->fi_extents_start) {
++		kfree(fei->fi_extents_start);
++		fei->fi_extents_start = NULL;
++	}
++}
++
++/*
++ * device_slice -- check to see if device is a slice or DM
++ */
++static boolean_t
++device_slice(dev_t devid)
++{
++	struct block_device	*bd	= open_by_devnum(devid, FMODE_READ);
++	boolean_t		rval	= False;
++	
++	if (bd) {
++		if (bd->bd_disk->minors > 1)
++			rval = True;
++		blkdev_put(bd, FMODE_READ);
++	}
++	return rval;
++}
++
++/*
++ * device_dm -- check to see if device is a Device Mapper volume.
++ *
++ * Returns 1 for DM or 0 if not
++ */
++static boolean_t
++device_dm(dev_t devid)
++{
++	boolean_t		rval = False;
++	bl_comm_msg_t		msg;
++	bl_comm_res_t		*res;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_DMCHK;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Failed upcall to check on DM status\n");
++	} else if (res->u.dm_vol) {
++		rval = True;
++		dprintk("Device is DM volume\n");
++	} else
++		dprintk("Device is not DM volume\n");
++	kfree(res);
++	
++	return rval;
++}
++
++static boolean_t
++layout_inode_add(struct inode *i, bl_layout_rec_t **p)
++{
++	bl_layout_rec_t		*r	= NULL;
++
++	if (!i->i_op->fiemap || !i->i_op->fallocate) {
++		printk("pNFS: file system doesn't support required fiemap or"
++		    "fallocate methods\n");
++		return False;
++	}
++	
++	r = kmalloc(sizeof (*r), GFP_KERNEL);
++	if (!r)
++		goto error;
++
++	r->blr_rdev	= i->i_sb->s_dev;
++	r->blr_inode	= i;
++	r->blr_orig_size = i->i_size;
++	r->blr_ext_size	= 0;
++	r->blr_recalled	= 0;
++	INIT_LIST_HEAD(&r->blr_layouts);
++	spin_lock_init(&r->blr_lock);
++	spin_lock(&layout_hashtbl_lock);
++	list_add_tail(&r->blr_hash, &layout_hash);
++	spin_unlock(&layout_hashtbl_lock);
++	*p = r;
++	return True;
++	
++error:
++	if (r)
++		kfree(r);
++	return False;
++}
++
++static bl_layout_rec_t *
++__layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	if (!list_empty(&layout_hash)) {
++		list_for_each_entry(r, &layout_hash, blr_hash) {
++			if ((r->blr_inode->i_ino == i->i_ino) &&
++			    (r->blr_rdev == i->i_sb->s_dev)) {
++				return r;
++			}
++		}
++	}
++	return NULL;
++}
++
++static bl_layout_rec_t *
++layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	spin_unlock(&layout_hashtbl_lock);
++	
++	return r;
++}
++
++static void
++layout_inode_del(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		if (list_empty(&r->blr_layouts)) {
++			list_del(&r->blr_hash);
++			spin_unlock(&r->blr_lock);
++			kfree(r);
++		} else {
++			spin_unlock(&r->blr_lock);
++		}
++	} else {
++		dprintk("%s: failed to find inode [0x%x:%lu] in table for delete\n",
++			__func__, i->i_sb->s_dev, i->i_ino);
++	}
++	spin_unlock(&layout_hashtbl_lock);
++}
++
++/*
++ * map_state2name -- converts state in ascii string.
++ *
++ * Used for debug messages only.
++ */
++static char *
++map_state2name(enum pnfs_block_extent_state4 s)
++{
++	switch (s) {
++	case PNFS_BLOCK_READWRITE_DATA:	return "     RW";
++	case PNFS_BLOCK_READ_DATA:	return "     RO";
++	case PNFS_BLOCK_INVALID_DATA:	return "INVALID";
++	case PNFS_BLOCK_NONE_DATA:	return "   NONE";
++	default:
++		BUG();
++	}
++}
++
++static pnfs_blocklayout_devinfo_t *
++bld_alloc(struct list_head *volumes, int type)
++{
++	pnfs_blocklayout_devinfo_t *bld;
++	
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		return NULL;
++
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type = type;
++	list_add_tail(&bld->bld_list, volumes);
++
++	return bld;
++}
++
++static void
++bld_free(pnfs_blocklayout_devinfo_t *bld)
++{
++	list_del(&bld->bld_list);
++	kfree(bld);
++}
++
++static void
++print_bll(pnfs_blocklayout_layout_t *b, char *text)
++{
++	dprintk("    BLL: %s\n", text);
++	dprintk("    foff %Lu, soff %Lu, len %Lu, state %s\n",
++	    _2SECTS(b->bll_foff), _2SECTS(b->bll_soff), _2SECTS(b->bll_len),
++	    map_state2name(b->bll_es));
++}
++
++static inline void
++bll_collapse(bl_layout_rec_t *r, pnfs_blocklayout_layout_t *c)
++{
++	pnfs_blocklayout_layout_t	*n;
++	int				dbg_count	= 0;
++	u64				endpoint;
++	
++	BUG_ON(c->bll_es == PNFS_BLOCK_NONE_DATA);
++	while (c->bll_list.next != &r->blr_layouts) {
++		n = list_entry(c->bll_list.next,
++			       struct pnfs_blocklayout_layout, bll_list);
++		endpoint = BLL_S_END(c);
++		if ((n->bll_soff >= c->bll_soff) &&
++		    (n->bll_soff < endpoint)) {
++			if (endpoint < BLL_S_END(n)) {
++				/*
++				 * The following is possible.
++				 *
++				 * 
++				 * Existing: +---+                 +---+
++				 *      New: +-----------------------+
++				 * The client request merge entries together
++				 * but didn't require picking up all of the
++				 * last entry. So, we still need to delete
++				 * the last entry and add the remaining space
++				 * to the new entry.
++				 */
++				c->bll_len += BLL_S_END(n) - endpoint;
++			}
++			dbg_count++;
++			list_del(&n->bll_list);
++			kfree(n);
++		} else {
++			break;
++		}
++	}
++	/* ---- Debug only, remove before integration ---- */
++	if (dbg_count)
++		dprintk("  Collapsed %d cache entries between %Lu(s) and %Lu(s)\n",
++			dbg_count, _2SECTS(c->bll_soff), _2SECTS(BLL_S_END(c)));
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc(u64 offset, u64 len, enum bl_cache_state state, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = kmalloc(sizeof (*n), GFP_KERNEL);
++	if (n) {
++		memset(n, 0, sizeof (*n));
++		n->bll_foff		= offset;
++		n->bll_len		= len;
++		n->bll_cache_state	= state;
++		if (h)
++			list_add_tail(&n->bll_list, h);
++	}
++	return n;
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc_dup(pnfs_blocklayout_layout_t *b, enum bl_cache_state c,
++	      struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = bll_alloc(b->bll_foff, b->bll_len, c, h);
++	if (n) {
++		n->bll_es			= b->bll_es;
++		n->bll_soff			= b->bll_soff;
++		n->bll_vol_id.devid		= b->bll_vol_id.devid;
++	}
++	return n;
++}
++
++static inline boolean_t
++layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++		enum pnfs_block_extent_state4 *s)
++{
++	/* ---- Normal case ---- */
++	*s = b->bll_es;
++	
++	switch (b->bll_es) {
++	case PNFS_BLOCK_READWRITE_DATA:
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_READ_DATA;
++		/* ---- Any use is permitted. ---- */
++		break;
++	case PNFS_BLOCK_READ_DATA:
++		/* ---- Committed as read only data. ---- */
++		if (iomode == IOMODE_RW)
++			return False;
++		break;
++	case PNFS_BLOCK_INVALID_DATA:
++		/* ---- Blocks have been allocated, but not initialized ---- */
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_NONE_DATA;
++		break;
++	case PNFS_BLOCK_NONE_DATA:
++		/* ---- Hole-y file. No backing store avail. ---- */
++		if (iomode != IOMODE_READ)
++			return False;
++		break;
++	default:
++		BUG();
++	}
++	return True;
++}
++
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
+@@ -104,7 +104,8 @@ again:
+ 			continue;
+ 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 			continue;
+-		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
++		if (memcmp(state->stateid.u.data, stateid->u.data,
++			   sizeof(state->stateid.u.data)) != 0)
+ 			continue;
+ 		get_nfs_open_context(ctx);
+ 		spin_unlock(&inode->i_lock);
+@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct
+ 	if (delegation != NULL) {
+ 		spin_lock(&delegation->lock);
+ 		if (delegation->inode != NULL) {
+-			memcpy(delegation->stateid.data, res->delegation.data,
+-			       sizeof(delegation->stateid.data));
++			memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			       sizeof(delegation->stateid.u.data));
+ 			delegation->type = res->delegation_type;
+ 			delegation->maxsize = res->maxsize;
+ 			oldcred = delegation->cred;
+@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach
+ 	if (delegation == NULL)
+ 		goto nomatch;
+ 	spin_lock(&delegation->lock);
+-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+-				sizeof(delegation->stateid.data)) != 0)
++	if (stateid != NULL && memcmp(delegation->stateid.u.data,
++				      stateid->u.data,
++				      sizeof(delegation->stateid.u.data)) != 0)
+ 		goto nomatch_unlock;
+ 	list_del_rcu(&delegation->super_list);
+ 	delegation->inode = NULL;
+@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inod
+ 	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+-	memcpy(delegation->stateid.data, res->delegation.data,
+-			sizeof(delegation->stateid.data));
++	memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			sizeof(delegation->stateid.u.data));
+ 	delegation->type = res->delegation_type;
+ 	delegation->maxsize = res->maxsize;
+ 	delegation->change_attr = nfsi->change_attr;
+@@ -471,9 +473,7 @@ void nfs_expire_unreferenced_delegations
+ /*
+  * Asynchronous delegation recall!
+  */
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid))
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+ {
+ 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ 	struct nfs_delegation *delegation;
+@@ -481,7 +481,7 @@ int nfs_async_inode_return_delegation(st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(NFS_I(inode)->delegation);
+ 
+-	if (!validate_stateid(delegation, stateid)) {
++	if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+ 		rcu_read_unlock();
+ 		return -ENOENT;
+ 	}
+@@ -562,7 +562,8 @@ int nfs4_copy_delegation_stateid(nfs4_st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(nfsi->delegation);
+ 	if (delegation != NULL) {
+-		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
++		memcpy(dst->u.data, delegation->stateid.u.data,
++		       sizeof(dst->u.data));
+ 		ret = 1;
+ 	}
+ 	rcu_read_unlock();
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
+--- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
+@@ -34,9 +34,7 @@ enum {
+ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ int nfs_inode_return_delegation(struct inode *inode);
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid));
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+ 
+ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
+@@ -17,11 +17,19 @@
+ #include <linux/module.h>
+ #include <linux/exportfs.h>
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++#if defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
++#endif
+ #include <linux/nfsd/syscall.h>
+ #include <net/ipv6.h>
+ 
+ #include "nfsd.h"
+ #include "nfsfh.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
+ 
+@@ -352,6 +360,40 @@ static int svc_export_upcall(struct cach
+ 	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static struct pnfsd_cb_operations pnfsd_cb_op = {
++	.cb_layout_recall = nfsd_layout_recall_cb,
++	.cb_device_notify = nfsd_device_notify_cb,
++
++	.cb_get_state = nfs4_pnfs_cb_get_state,
++	.cb_change_state = nfs4_pnfs_cb_change_state,
++};
++
++#if defined(CONFIG_SPNFS)
++static struct pnfs_export_operations spnfs_export_ops = {
++	.layout_type = spnfs_layout_type,
++	.get_device_info = spnfs_getdeviceinfo,
++	.get_device_iter = spnfs_getdeviceiter,
++	.layout_get = spnfs_layoutget,
++	.layout_return = spnfs_layoutreturn,
++};
++
++static struct pnfs_export_operations spnfs_ds_export_ops = {
++	.get_state = spnfs_get_state,
++};
++
++#if defined(CONFIG_SPNFS_BLOCK)
++static struct pnfs_export_operations bl_export_ops = {
++	.layout_type = bl_layout_type,
++	.get_device_info = bl_getdeviceinfo,
++	.get_device_iter = bl_getdeviceiter,
++	.layout_get = bl_layoutget,
++	.layout_return = bl_layoutreturn,
++};
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_SPNFS */
++#endif /* CONFIG_PNFSD */
++
+ static struct svc_export *svc_export_update(struct svc_export *new,
+ 					    struct svc_export *old);
+ static struct svc_export *svc_export_lookup(struct svc_export *);
+@@ -395,6 +437,47 @@ static int check_export(struct inode *in
+ 		return -EINVAL;
+ 	}
+ 
++#if !defined(CONFIG_SPNFS)
++	if (inode->i_sb->s_pnfs_op &&
++	    (!inode->i_sb->s_pnfs_op->layout_type ||
++	     !inode->i_sb->s_pnfs_op->get_device_info ||
++	     !inode->i_sb->s_pnfs_op->layout_get)) {
++		dprintk("exp_export: export of invalid fs pnfs export ops.\n");
++		return -EINVAL;
++	}
++#endif /* CONFIG_SPNFS */
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	if (!inode->i_sb->s_pnfs_op)
++		pnfsd_lexp_init(inode);
++	return 0;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(inode, *flags)) {
++		dprintk("set pnfs block export structure... \n");
++		inode->i_sb->s_pnfs_op = &bl_export_ops;
++	} else
++#endif /* CONFIG_SPNFS_BLOCK */
++	/*
++	 * spnfs_enabled() indicates we're an MDS.
++	 * XXX Better to check an export time option as well.
++	 */
++	if (spnfs_enabled()) {
++		dprintk("set spnfs export structure...\n");
++		inode->i_sb->s_pnfs_op = &spnfs_export_ops;
++	} else {
++		dprintk("%s spnfs not in use\n", __func__);
++
++		/*
++		 * get_state is needed if we're a DS using spnfs.
++		 * XXX Better to check an export time option instead.
++		 */
++		inode->i_sb->s_pnfs_op = &spnfs_ds_export_ops;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	return 0;
+ 
+ }
+@@ -586,6 +669,8 @@ static int svc_export_parse(struct cache
+ 					if (exp.ex_uuid == NULL)
+ 						err = -ENOMEM;
+ 				}
++			} else if (strcmp(buf, "pnfs") == 0) {
++				exp.ex_pnfs = 1;
+ 			} else if (strcmp(buf, "secinfo") == 0)
+ 				err = secinfo_parse(&mesg, buf, &exp);
+ 			else
+@@ -660,6 +745,8 @@ static int svc_export_show(struct seq_fi
+ 				seq_printf(m, "%02x", exp->ex_uuid[i]);
+ 			}
+ 		}
++		if (exp->ex_pnfs)
++			seq_puts(m, ",pnfs");
+ 		show_secinfo(m, exp);
+ 	}
+ 	seq_puts(m, ")\n");
+@@ -687,6 +774,7 @@ static void svc_export_init(struct cache
+ 	new->ex_fslocs.locations = NULL;
+ 	new->ex_fslocs.locations_count = 0;
+ 	new->ex_fslocs.migrated = 0;
++	new->ex_pnfs = 0;
+ }
+ 
+ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+@@ -699,6 +787,7 @@ static void export_update(struct cache_h
+ 	new->ex_anon_uid = item->ex_anon_uid;
+ 	new->ex_anon_gid = item->ex_anon_gid;
+ 	new->ex_fsid = item->ex_fsid;
++	new->ex_pnfs = item->ex_pnfs;
+ 	new->ex_uuid = item->ex_uuid;
+ 	item->ex_uuid = NULL;
+ 	new->ex_pathname = item->ex_pathname;
+@@ -1635,8 +1724,17 @@ nfsd_export_init(void)
+ 	if (rv)
+ 		return rv;
+ 	rv = cache_register(&svc_expkey_cache);
+-	if (rv)
++	if (rv) {
+ 		cache_unregister(&svc_export_cache);
++		goto out;
++	}
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = THIS_MODULE;
++	pnfsd_cb_ctl.cb_op = &pnfsd_cb_op;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
++out:
+ 	return rv;
+ 
+ }
+@@ -1664,6 +1762,12 @@ nfsd_export_shutdown(void)
+ 
+ 	exp_writelock();
+ 
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = NULL;
++	pnfsd_cb_ctl.cb_op = NULL;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
+ 	cache_unregister(&svc_expkey_cache);
+ 	cache_unregister(&svc_export_cache);
+ 	svcauth_unix_purge();
+diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
+--- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
+@@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
+ 	.rpc_release = nfs_direct_read_release,
+ };
+ 
++static long nfs_direct_read_execute(struct nfs_read_data *data,
++				    struct rpc_task_setup *task_setup_data,
++				    struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	nfs_fattr_init(&data->fattr);
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	NFS_PROTO(inode)->read_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct read call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
+  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
+@@ -283,7 +315,6 @@ static ssize_t nfs_direct_read_schedule_
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+ 	size_t rsize = NFS_SERVER(inode)->rsize;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -343,26 +374,9 @@ static ssize_t nfs_direct_read_schedule_
+ 		data->res.fattr = &data->fattr;
+ 		data->res.eof = 0;
+ 		data->res.count = bytes;
+-		nfs_fattr_init(&data->fattr);
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct read call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_read_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+@@ -448,12 +462,15 @@ static void nfs_direct_free_writedata(st
+ }
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg);
++
+ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+ {
+ 	struct inode *inode = dreq->inode;
+ 	struct list_head *p;
+ 	struct nfs_write_data *data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = dreq->ctx->cred,
+ 	};
+@@ -487,25 +504,7 @@ static void nfs_direct_write_reschedule(
+ 		 * Reuse data->task; data->args should not have changed
+ 		 * since the original request was sent.
+ 		 */
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		/*
+-		 * We're called via an RPC callback, so BKL is already held.
+-		 */
+-		task = rpc_run_task(&task_setup_data);
+-		if (!IS_ERR(task))
+-			rpc_put_task(task);
+-
+-		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				data->args.count,
+-				(unsigned long long)data->args.offset);
++		nfs_direct_write_execute(data, &task_setup_data, &msg);
+ 	}
+ 
+ 	if (put_dreq(dreq))
+@@ -548,10 +547,31 @@ static const struct rpc_call_ops nfs_com
+ 	.rpc_release = nfs_direct_commit_release,
+ };
+ 
++static long nfs_direct_commit_execute(struct nfs_direct_req *dreq,
++				      struct nfs_write_data *data,
++				      struct rpc_task_setup *task_setup_data,
++				      struct rpc_message *msg)
++{
++	struct rpc_task *task;
++
++	NFS_PROTO(data->inode)->commit_setup(data, msg);
++
++	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
++	dreq->commit_data = NULL;
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++	return 0;
++}
++
+ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+ {
+ 	struct nfs_write_data *data = dreq->commit_data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+@@ -579,16 +599,7 @@ static void nfs_direct_commit_schedule(s
+ 	data->res.verf = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	NFS_PROTO(data->inode)->commit_setup(data, &msg);
+-
+-	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+-	dreq->commit_data = NULL;
+-
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (!IS_ERR(task))
+-		rpc_put_task(task);
++	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
+ }
+ 
+ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+@@ -690,6 +701,36 @@ static const struct rpc_call_ops nfs_wri
+ 	.rpc_release = nfs_direct_write_release,
+ };
+ 
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++	NFS_PROTO(inode)->write_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+@@ -705,7 +746,6 @@ static ssize_t nfs_direct_write_schedule
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -771,24 +811,8 @@ static ssize_t nfs_direct_write_schedule
+ 		data->res.verf = &data->verf;
+ 		nfs_fattr_init(&data->fattr);
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct write call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_write_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
+--- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
+@@ -79,3 +79,52 @@ config NFSD_V4
+ 	  available from http://linux-nfs.org/.
+ 
+ 	  If unsure, say N.
++
++config PNFSD
++	bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
++	depends on NFSD_V4 && EXPERIMENTAL
++	select EXPORTFS_FILE_LAYOUT
++	help
++	  This option enables support for the parallel NFS features of the
++	  minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
++	  in the kernel's NFS server.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFSD_LOCAL_EXPORT
++	bool "Enable pNFS support for exporting local filesystems for debugging purposes"
++	depends on PNFSD
++	help
++	  Say Y here if you want your pNFS server to export local file systems
++	  over the files layout type.  With this option the MDS (metadata
++	  server) functions also as a single DS (data server).  This is mostly
++	  useful for development and debugging purposes.
++
++	  If unsure, say N.
++
++config SPNFS
++	bool "Provide spNFS server support (EXPERIMENTAL)"
++	depends on PNFSD
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS server support.
++
++	  If unsure, say N.
++
++config SPNFS_LAYOUTSEGMENTS
++	bool "Allow spNFS to return partial file layouts (EXPERIMENTAL)"
++	depends on SPNFS
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS to be able to return layout segments.
++
++	  If unsure, say N.
++
++config SPNFS_BLOCK
++	bool "Provide Block Layout server support (EXPERIMENTAL)"
++	depends on SPNFS
++	select EXPORTFS_BLOCK_LAYOUT
++	help
++	  Say Y here if you want spNFS block layout support
++
++	  If unsure, say N.
+diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
+--- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
+@@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ 			   nfs4acl.o nfs4callback.o nfs4recover.o
++nfsd-$(CONFIG_PNFSD)	+= nfs4pnfsd.o nfs4pnfsdlm.o nfs4pnfsds.o
++nfsd-$(CONFIG_PNFSD_LOCAL_EXPORT) += pnfsd_lexp.o
++nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
++nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
+@@ -40,7 +40,6 @@
+ 
+ #define NFSPROC4_CB_NULL 0
+ #define NFSPROC4_CB_COMPOUND 1
+-#define NFS4_STATEID_SIZE 16
+ 
+ /* Index of predefined Linux callback client operations */
+ 
+@@ -48,11 +47,17 @@ enum {
+ 	NFSPROC4_CLNT_CB_NULL = 0,
+ 	NFSPROC4_CLNT_CB_RECALL,
+ 	NFSPROC4_CLNT_CB_SEQUENCE,
++#if defined(CONFIG_PNFSD)
++	NFSPROC4_CLNT_CB_LAYOUT,
++	NFSPROC4_CLNT_CB_DEVICE,
++#endif
+ };
+ 
+ enum nfs_cb_opnum4 {
+ 	OP_CB_RECALL            = 4,
++	OP_CB_LAYOUT            = 5,
+ 	OP_CB_SEQUENCE          = 11,
++	OP_CB_DEVICE            = 14,
+ };
+ 
+ #define NFS4_MAXTAGLEN		20
+@@ -78,6 +83,19 @@ enum nfs_cb_opnum4 {
+ #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
++#define NFS4_enc_cb_layout_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 3 +                         \
++					enc_nfs4_fh_sz + 4)
++#define NFS4_dec_cb_layout_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
++#define NFS4_enc_cb_device_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 6)
++#define NFS4_dec_cb_device_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
+ 
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+@@ -94,6 +112,10 @@ xdr_writemem(__be32 *p, const void *ptr,
+ }
+ 
+ #define WRITE32(n)               *p++ = htonl(n)
++#define WRITE64(n)               do {				\
++	*p++ = htonl((u32)((n) >> 32));				\
++	*p++ = htonl((u32)(n));					\
++} while (0)
+ #define WRITEMEM(ptr,nbytes)     do {                           \
+ 	p = xdr_writemem(p, ptr, nbytes);                       \
+ } while (0)
+@@ -204,6 +226,16 @@ nfs_cb_stat_to_errno(int stat)
+  */
+ 
+ static void
++encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
++{
++	__be32 *p;
++
++	RESERVE_SPACE(sizeof(stateid_t));
++	WRITE32(sid->si_generation);
++	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
++}
++
++static void
+ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+ {
+ 	__be32 * p;
+@@ -228,10 +260,10 @@ encode_cb_recall(struct xdr_stream *xdr,
+ 	__be32 *p;
+ 	int len = dp->dl_fh.fh_size;
+ 
+-	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
++	RESERVE_SPACE(4);
+ 	WRITE32(OP_CB_RECALL);
+-	WRITE32(dp->dl_stateid.si_generation);
+-	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
++	encode_stateid(xdr, &dp->dl_stateid);
++	RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
+ 	WRITE32(0); /* truncate optimization not implemented */
+ 	WRITE32(len);
+ 	WRITEMEM(&dp->dl_fh.fh_base, len);
+@@ -259,6 +291,111 @@ encode_cb_sequence(struct xdr_stream *xd
+ 	hdr->nops++;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++#include "pnfsd.h"
++
++static void
++encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(20);
++	WRITE32(OP_CB_LAYOUT);
++	WRITE32(clr->cb.cbl_seg.layout_type);
++	WRITE32(clr->cb.cbl_seg.iomode);
++	WRITE32(clr->cb.cbl_layoutchanged);
++	WRITE32(clr->cb.cbl_recall_type);
++	if (unlikely(clr->cb.cbl_recall_type == RETURN_FSID)) {
++		struct nfs4_fsid fsid = clr->cb.cbl_fsid;
++
++		RESERVE_SPACE(16);
++		WRITE64(fsid.major);
++		WRITE64(fsid.minor);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"fsid 0x%llx-0x%llx\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type, fsid.major, fsid.minor);
++	} else if (clr->cb.cbl_recall_type == RETURN_FILE) {
++		int len = clr->clr_file->fi_fhlen;
++		stateid_t *cbl_sid = (stateid_t *)&clr->cb.cbl_sid;
++
++		RESERVE_SPACE(20 + len);
++		WRITE32(len);
++		WRITEMEM(clr->clr_file->fi_fhval, len);
++		WRITE64(clr->cb.cbl_seg.offset);
++		WRITE64(clr->cb.cbl_seg.length);
++		encode_stateid(xdr, cbl_sid);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"offset %lld length %lld stateid " STATEID_FMT "\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type,
++			clr->cb.cbl_seg.offset, clr->cb.cbl_seg.length,
++			STATEID_VAL(cbl_sid));
++	} else {
++		dprintk("%s: type %x iomode %d changed %d recall_type %d\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type);
++	}
++	hdr->nops++;
++}
++
++static void
++encode_cb_device(struct xdr_stream *xdr, struct nfs4_notify_device *nd,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++	int i;
++	int len					= nd->nd_list->cbd_len;
++	struct nfsd4_pnfs_cb_dev_item *cbd	= nd->nd_list->cbd_list;
++
++	dprintk("NFSD %s: --> num %d\n", __func__, len);
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(8);
++	WRITE32(OP_CB_DEVICE);
++
++	/* notify4 cnda_changes<>; */
++	WRITE32(len);
++	for (i = 0; i < len; i++) {
++		dprintk("%s: nt %d lt %d devid x%llx-x%llx im %d i %d\n",
++			__func__, cbd[i].cbd_notify_type,
++			cbd[i].cbd_layout_type,
++			cbd[i].cbd_devid.sbid,
++			cbd[i].cbd_devid.devid,
++			cbd[i].cbd_immediate, i);
++
++		BUG_ON(cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		       cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_DELETE);
++		RESERVE_SPACE(32);
++		/* bitmap4         notify_mask; */
++		WRITE32(1);
++		WRITE32(cbd[i].cbd_notify_type);
++		/* opaque     notify_vals<>; */
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
++			WRITE32(24);
++		else
++			WRITE32(20);
++		WRITE32(cbd[i].cbd_layout_type);
++		WRITE64(cbd[i].cbd_devid.sbid);
++		WRITE64(cbd[i].cbd_devid.devid);
++
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) {
++			RESERVE_SPACE(4);
++			WRITE32(cbd[i].cbd_immediate);
++		}
++	}
++	hdr->nops++;
++}
++#endif /* CONFIG_PNFSD */
++
+ static int
+ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+ {
+@@ -288,6 +425,45 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_layoutrecall *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_layout(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++
++static int
++nfs4_xdr_enc_cb_device(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_notify_device *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_device(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++#endif /* CONFIG_PNFSD */
+ 
+ static int
+ decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+@@ -403,6 +579,48 @@ out:
+ 	return status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_LAYOUT);
++out:
++	return status;
++}
++
++static int
++nfs4_xdr_dec_cb_device(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_DEVICE);
++out:
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * RPC procedure tables
+  */
+@@ -420,6 +638,10 @@ out:
+ static struct rpc_procinfo     nfs4_cb_procedures[] = {
+     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
++#if defined(CONFIG_PNFSD)
++    PROC(CB_LAYOUT,    COMPOUND,   enc_cb_layout,      dec_cb_layout),
++    PROC(CB_DEVICE,    COMPOUND,   enc_cb_device,      dec_cb_device),
++#endif
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
+@@ -606,10 +828,9 @@ out:
+  * TODO: cb_sequence should support referring call lists, cachethis, multiple
+  * slots, and mark callback channel down on communication errors.
+  */
+-static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_prepare_sequence(struct rpc_task *task,
++				      struct nfs4_client *clp)
+ {
+-	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+ 	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+ 	int status = 0;
+@@ -629,11 +850,15 @@ static void nfsd4_cb_prepare(struct rpc_
+ 	rpc_call_start(task);
+ }
+ 
+-static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_recall_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
++	nfsd4_cb_prepare_sequence(task, dp->dl_client);
++}
+ 
++static void nfsd4_cb_done_sequence(struct rpc_task *task,
++				   struct nfs4_client *clp)
++{
+ 	dprintk("%s: minorversion=%d\n", __func__,
+ 		clp->cl_cb_conn.cb_minorversion);
+ 
+@@ -657,7 +882,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	struct nfs4_client *clp = dp->dl_client;
+ 	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+-	nfsd4_cb_done(task, calldata);
++	nfsd4_cb_done_sequence(task, clp);
+ 
+ 	if (current_rpc_client == NULL) {
+ 		/* We're shutting down; give up. */
+@@ -688,7 +913,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+ 		task->tk_status = 0;
+-		rpc_restart_call(task);
++		rpc_restart_call_prepare(task);
+ 		return;
+ 	} else {
+ 		atomic_set(&clp->cl_cb_set, 0);
+@@ -704,7 +929,7 @@ static void nfsd4_cb_recall_release(void
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+-	.rpc_call_prepare = nfsd4_cb_prepare,
++	.rpc_call_prepare = nfsd4_cb_recall_prepare,
+ 	.rpc_call_done = nfsd4_cb_recall_done,
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+@@ -781,3 +1006,173 @@ void nfsd4_cb_recall(struct nfs4_delegat
+ {
+ 	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
++
++#if defined(CONFIG_PNFSD)
++static void nfsd4_cb_layout_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	nfsd4_cb_prepare_sequence(task, clr->clr_client);
++}
++
++static void nfsd4_cb_layout_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	struct nfs4_client *clp = clr->clr_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	if (!task->tk_status)
++		return;
++
++	printk("%s: clp %p cb_client %p fp %p failed with status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       clr->clr_file,
++	       task->tk_status);
++
++	switch (task->tk_status) {
++	case -EIO:
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++		/* FIXME:
++		 * The pnfs standard states that we need to only expire
++		 * the client after at-least "lease time" .eg lease-time * 2
++		 * when failing to communicate a recall
++		 */
++		break;
++	case -NFS4ERR_DELAY:
++		/* Pole the client until it's done with the layout */
++		rpc_delay(task, HZ/100); /* 10 mili-seconds */
++		task->tk_status = 0;
++		rpc_restart_call_prepare(task);
++		break;
++	case -NFS4ERR_NOMATCHING_LAYOUT:
++		task->tk_status = 0;
++		nomatching_layout(clr);
++	}
++}
++
++static void nfsd4_cb_layout_release(void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	kfree(clr->clr_args);
++	clr->clr_args = NULL;
++	put_layoutrecall(clr);
++}
++
++static const struct rpc_call_ops nfsd4_cb_layout_ops = {
++	.rpc_call_prepare = nfsd4_cb_layout_prepare,
++	.rpc_call_done = nfsd4_cb_layout_done,
++	.rpc_release = nfsd4_cb_layout_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfs4_client *clp = clr->clr_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_LAYOUT],
++		.rpc_cred = callback_cred
++	};
++	int status;
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	clr->clr_args = args;
++	args->args_op = clr;
++	msg.rpc_argp = args;
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_layout_ops, clr);
++out:
++	if (status) {
++		kfree(args);
++		put_layoutrecall(clr);
++	}
++	dprintk("NFSD: nfsd4_cb_layout: status %d\n", status);
++	return status;
++}
++
++static void nfsd4_cb_device_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	nfsd4_cb_prepare_sequence(task, cbnd->nd_client);
++}
++
++static void nfsd4_cb_device_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	struct nfs4_client *clp = cbnd->nd_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	dprintk("%s: clp %p cb_client %p: status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       task->tk_status);
++
++	if (task->tk_status == -EIO) {
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++	}
++}
++
++static void nfsd4_cb_device_release(void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	kfree(cbnd->nd_args);
++	cbnd->nd_args = NULL;
++	kfree(cbnd);
++}
++
++static const struct rpc_call_ops nfsd4_cb_device_ops = {
++	.rpc_call_prepare = nfsd4_cb_device_prepare,
++	.rpc_call_done = nfsd4_cb_device_done,
++	.rpc_release = nfsd4_cb_device_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_notify_device(struct nfs4_notify_device *cbnd)
++{
++	struct nfs4_client *clp = cbnd->nd_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_DEVICE],
++		.rpc_cred = callback_cred
++	};
++	int status = -EIO;
++
++	dprintk("%s: clp %p\n", __func__, clp);
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	args->args_op = cbnd;
++	msg.rpc_argp = args;
++
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_device_ops, cbnd);
++out:
++	if (status)
++		kfree(args);
++	dprintk("%s: status %d\n", __func__, status);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
+@@ -0,0 +1,1679 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *****************************************************************************/
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Globals */
++static u32 current_layoutid = 1;
++
++/*
++ * Currently used for manipulating the layout state.
++ */
++static DEFINE_SPINLOCK(layout_lock);
++
++#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_SMP)
++#  define BUG_ON_UNLOCKED_LAYOUT() BUG_ON(!spin_is_locked(&layout_lock))
++#else
++#  define BUG_ON_UNLOCKED_LAYOUT()
++#endif
++
++/*
++ * Layout state - NFSv4.1 pNFS
++ */
++static struct kmem_cache *pnfs_layout_slab;
++static struct kmem_cache *pnfs_layoutrecall_slab;
++
++/* hash table for nfsd4_pnfs_deviceid.sbid */
++#define SBID_HASH_BITS	8
++#define SBID_HASH_SIZE	(1 << SBID_HASH_BITS)
++#define SBID_HASH_MASK	(SBID_HASH_SIZE - 1)
++
++struct sbid_tracker {
++	u64 id;
++	struct super_block *sb;
++	struct list_head hash;
++};
++
++static u64 current_sbid;
++static struct list_head sbid_hashtbl[SBID_HASH_SIZE];
++
++static inline unsigned long
++sbid_hashval(struct super_block *sb)
++{
++	return hash_ptr(sb, SBID_HASH_BITS);
++}
++
++static inline struct sbid_tracker *
++alloc_sbid(void)
++{
++	return kmalloc(sizeof(struct sbid_tracker), GFP_KERNEL);
++}
++
++static void
++destroy_sbid(struct sbid_tracker *sbid)
++{
++	spin_lock(&layout_lock);
++	list_del(&sbid->hash);
++	spin_unlock(&layout_lock);
++	kfree(sbid);
++}
++
++void
++nfsd4_free_pnfs_slabs(void)
++{
++	int i;
++	struct sbid_tracker *sbid;
++
++	nfsd4_free_slab(&pnfs_layout_slab);
++	nfsd4_free_slab(&pnfs_layoutrecall_slab);
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		while (!list_empty(&sbid_hashtbl[i])) {
++			sbid = list_first_entry(&sbid_hashtbl[i],
++						struct sbid_tracker,
++						hash);
++			destroy_sbid(sbid);
++		}
++	}
++}
++
++int
++nfsd4_init_pnfs_slabs(void)
++{
++	int i;
++
++	pnfs_layout_slab = kmem_cache_create("pnfs_layouts",
++			sizeof(struct nfs4_layout), 0, 0, NULL);
++	if (pnfs_layout_slab == NULL)
++		return -ENOMEM;
++	pnfs_layoutrecall_slab = kmem_cache_create("pnfs_layoutrecalls",
++			sizeof(struct nfs4_layoutrecall), 0, 0, NULL);
++	if (pnfs_layoutrecall_slab == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		INIT_LIST_HEAD(&sbid_hashtbl[i]);
++	}
++
++	return 0;
++}
++
++/* XXX: Need to implement the notify types and track which
++ * clients have which devices. */
++void pnfs_set_device_notify(clientid_t *clid, unsigned int types)
++{
++	struct nfs4_client *clp;
++	dprintk("%s: -->\n", __func__);
++
++	nfs4_lock_state();
++	/* Indicate that client has a device so we can only notify
++	 * the correct clients */
++	clp = find_confirmed_client(clid);
++	if (clp) {
++		atomic_inc(&clp->cl_deviceref);
++		dprintk("%s: Incr device count (clnt %p) to %d\n",
++			__func__, clp, atomic_read(&clp->cl_deviceref));
++	}
++	nfs4_unlock_state();
++}
++
++/* Clear notifications for this client
++ * XXX: Do we need to loop through a clean up all
++ *      krefs when nfsd cleans up the client? */
++void pnfs_clear_device_notify(struct nfs4_client *clp)
++{
++	atomic_dec(&clp->cl_deviceref);
++	dprintk("%s: Decr device count (clnt %p) to %d\n",
++		__func__, clp, atomic_read(&clp->cl_deviceref));
++}
++
++static struct nfs4_layout_state *
++alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
++			stateid_t *stateid)
++{
++	struct nfs4_layout_state *new;
++
++	/* FIXME: use a kmem_cache */
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return new;
++	get_nfs4_file(fp);
++	INIT_LIST_HEAD(&new->ls_perfile);
++	INIT_LIST_HEAD(&new->ls_layouts);
++	kref_init(&new->ls_ref);
++	new->ls_client = clp;
++	new->ls_file = fp;
++	new->ls_stateid.si_boot = stateid->si_boot;
++	new->ls_stateid.si_stateownerid = 0; /* identifies layout stateid */
++	new->ls_stateid.si_generation = 1;
++	spin_lock(&layout_lock);
++	new->ls_stateid.si_fileid = current_layoutid++;
++	list_add(&new->ls_perfile, &fp->fi_layout_states);
++	spin_unlock(&layout_lock);
++	return new;
++}
++
++static inline void
++get_layout_state(struct nfs4_layout_state *ls)
++{
++	kref_get(&ls->ls_ref);
++}
++
++static void
++destroy_layout_state_common(struct nfs4_layout_state *ls)
++{
++	struct nfs4_file *fp = ls->ls_file;
++
++	dprintk("pNFS %s: ls %p fp %p clp %p\n", __func__, ls, fp,
++		ls->ls_client);
++	BUG_ON(!list_empty(&ls->ls_layouts));
++	kfree(ls);
++	put_nfs4_file(fp);
++}
++
++static void
++destroy_layout_state(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	spin_lock(&layout_lock);
++	list_del(&ls->ls_perfile);
++	spin_unlock(&layout_lock);
++	destroy_layout_state_common(ls);
++}
++
++static void
++destroy_layout_state_locked(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	list_del(&ls->ls_perfile);
++	destroy_layout_state_common(ls);
++}
++
++static inline void
++put_layout_state(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state);
++}
++
++static inline void
++put_layout_state_locked(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state_locked);
++}
++
++/*
++ * Search the fp->fi_layout_state list for a layout state with the clientid.
++ * If not found, then this is a 'first open/delegation/lock stateid' from
++ * the client for this file.
++ * Called under the layout_lock.
++ */
++static struct nfs4_layout_state *
++find_get_layout_state(struct nfs4_client *clp, struct nfs4_file *fp)
++{
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_for_each_entry(ls, &fp->fi_layout_states, ls_perfile) {
++		if (ls->ls_client == clp) {
++			dprintk("pNFS %s: before GET ls %p ls_ref %d\n",
++				__func__, ls,
++				atomic_read(&ls->ls_ref.refcount));
++			get_layout_state(ls);
++			return ls;
++		}
++	}
++	return NULL;
++}
++
++static __be32
++verify_stateid(struct nfs4_file *fp, stateid_t *stateid)
++{
++	struct nfs4_stateid *local = NULL;
++	struct nfs4_delegation *temp = NULL;
++
++	/* check if open or lock stateid */
++	local = find_stateid(stateid, RD_STATE);
++	if (local)
++		return 0;
++	temp = find_delegation_stateid(fp->fi_inode, stateid);
++	if (temp)
++		return 0;
++	return nfserr_bad_stateid;
++}
++
++/*
++ * nfs4_preocess_layout_stateid ()
++ *
++ * We have looked up the nfs4_file corresponding to the current_fh, and
++ * confirmed the clientid. Pull the few tests from nfs4_preprocess_stateid_op()
++ * that make sense with a layout stateid.
++ *
++ * Called with the state_lock held
++ * Returns zero and stateid is updated, or error.
++ *
++ * Note: the struct nfs4_layout_state pointer is only set by layoutget.
++ */
++static __be32
++nfs4_process_layout_stateid(struct nfs4_client *clp, struct nfs4_file *fp,
++			    stateid_t *stateid, struct nfs4_layout_state **lsp)
++{
++	struct nfs4_layout_state *ls = NULL;
++	__be32 status = 0;
++
++	dprintk("--> %s clp %p fp %p \n", __func__, clp, fp);
++
++	dprintk("%s: operation stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		goto out;
++
++	/* Is this the first use of this layout ? */
++	spin_lock(&layout_lock);
++	ls = find_get_layout_state(clp, fp);
++	spin_unlock(&layout_lock);
++	if (!ls) {
++		/* Only alloc layout state on layoutget (which sets lsp). */
++		if (!lsp) {
++			dprintk("%s ERROR: Not layoutget & no layout stateid\n",
++				__func__);
++			status = nfserr_bad_stateid;
++			goto out;
++		}
++		dprintk("%s Initial stateid for layout: file %p client %p\n",
++			__func__, fp, clp);
++
++		/* verify input stateid */
++		status = verify_stateid(fp, stateid);
++		if (status) {
++			dprintk("%s ERROR: invalid open/deleg/lock stateid\n",
++				__func__);
++			goto out;
++		}
++		ls = alloc_init_layout_state(clp, fp, stateid);
++		if (!ls) {
++			dprintk("%s pNFS ERROR: no memory for layout state\n",
++				__func__);
++			status = nfserr_resource;
++			goto out;
++		}
++	} else {
++		dprintk("%s Not initial stateid. Layout state %p file %p\n",
++			__func__, ls, fp);
++
++		/* BAD STATEID */
++		status = nfserr_bad_stateid;
++		if (memcmp(&ls->ls_stateid.si_opaque, &stateid->si_opaque,
++			sizeof(stateid_opaque_t)) != 0) {
++
++			/* if a LAYOUTGET operation and stateid is a valid
++			 * open/deleg/lock stateid, accept it as a parallel
++			 * initial layout stateid
++			 */
++			if (lsp && ((verify_stateid(fp, stateid)) == 0)) {
++				dprintk("%s parallel initial layout state\n",
++					__func__);
++				goto update;
++			}
++
++			dprintk("%s ERROR bad opaque in stateid 1\n", __func__);
++			goto out_put;
++		}
++
++		/* stateid is a valid layout stateid for this file. */
++		if (stateid->si_generation > ls->ls_stateid.si_generation) {
++			dprintk("%s bad stateid 1\n", __func__);
++			goto out_put;
++		}
++update:
++		update_stateid(&ls->ls_stateid);
++		dprintk("%s Updated ls_stateid to %d on layoutstate %p\n",
++			__func__, ls->ls_stateid.si_generation, ls);
++	}
++	status = 0;
++	/* Set the stateid to be encoded */
++	memcpy(stateid, &ls->ls_stateid, sizeof(stateid_t));
++
++	/* Return the layout state if requested */
++	if (lsp) {
++		get_layout_state(ls);
++		*lsp = ls;
++	}
++	dprintk("%s: layout stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(&ls->ls_stateid));
++out_put:
++	dprintk("%s PUT LO STATE:\n", __func__);
++	put_layout_state(ls);
++out:
++	dprintk("<-- %s status %d\n", __func__, htonl(status));
++
++	return status;
++}
++
++static inline struct nfs4_layout *
++alloc_layout(void)
++{
++	return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
++}
++
++static inline void
++free_layout(struct nfs4_layout *lp)
++{
++	kmem_cache_free(pnfs_layout_slab, lp);
++}
++
++static void
++init_layout(struct nfs4_layout_state *ls,
++	    struct nfs4_layout *lp,
++	    struct nfs4_file *fp,
++	    struct nfs4_client *clp,
++	    struct svc_fh *current_fh,
++	    struct nfsd4_layout_seg *seg)
++{
++	dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__,
++		ls, lp, clp, fp, fp->fi_inode);
++
++	get_nfs4_file(fp);
++	lp->lo_client = clp;
++	lp->lo_file = fp;
++	get_layout_state(ls);
++	lp->lo_state = ls;
++	memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
++	spin_lock(&layout_lock);
++	list_add_tail(&lp->lo_perstate, &ls->ls_layouts);
++	list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
++	list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
++	spin_unlock(&layout_lock);
++	dprintk("pNFS %s end\n", __func__);
++}
++
++static void
++dequeue_layout(struct nfs4_layout *lp)
++{
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del(&lp->lo_perclnt);
++	list_del(&lp->lo_perfile);
++	list_del(&lp->lo_perstate);
++}
++
++static void
++destroy_layout(struct nfs4_layout *lp)
++{
++	struct nfs4_client *clp;
++	struct nfs4_file *fp;
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	clp = lp->lo_client;
++	fp = lp->lo_file;
++	ls = lp->lo_state;
++	dprintk("pNFS %s: lp %p clp %p fp %p ino %p ls_layouts empty %d\n",
++		__func__, lp, clp, fp, fp->fi_inode,
++		list_empty(&ls->ls_layouts));
++
++	kmem_cache_free(pnfs_layout_slab, lp);
++	/* release references taken by init_layout */
++	put_layout_state_locked(ls);
++	put_nfs4_file(fp);
++}
++
++void fs_layout_return(struct super_block *sb, struct inode *ino,
++		      struct nfsd4_pnfs_layoutreturn *lrp, int flags,
++		      void *recall_cookie)
++{
++	int ret;
++
++	if (unlikely(!sb->s_pnfs_op->layout_return))
++		return;
++
++	lrp->lr_flags = flags;
++	lrp->args.lr_cookie = recall_cookie;
++
++	if (!ino) /* FSID or ALL */
++		ino = sb->s_root->d_inode;
++
++	ret = sb->s_pnfs_op->layout_return(ino, &lrp->args);
++	dprintk("%s: inode %lu iomode=%d offset=0x%llx length=0x%llx "
++		"cookie = %p flags 0x%x status=%d\n",
++		__func__, ino->i_ino, lrp->args.lr_seg.iomode,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length,
++		recall_cookie, flags, ret);
++}
++
++static u64
++alloc_init_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	struct sbid_tracker *new = alloc_sbid();
++	unsigned long hash_idx = sbid_hashval(sb);
++	u64 id = 0;
++
++	if (likely(new)) {
++		spin_lock(&layout_lock);
++		id = ++current_sbid;
++		new->id = (id << SBID_HASH_BITS) | (hash_idx & SBID_HASH_MASK);
++		id = new->id;
++		BUG_ON(id == 0);
++		new->sb = sb;
++
++		list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash)
++			if (sbid->sb == sb) {
++				kfree(new);
++				id = sbid->id;
++				spin_unlock(&layout_lock);
++				return id;
++			}
++		list_add(&new->hash, &sbid_hashtbl[hash_idx]);
++		spin_unlock(&layout_lock);
++	}
++	return id;
++}
++
++struct super_block *
++find_sbid_id(u64 id)
++{
++	struct sbid_tracker *sbid;
++	struct super_block *sb = NULL;
++	unsigned long hash_idx = id & SBID_HASH_MASK;
++	int pos = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->id != id)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		sb = sbid->sb;
++		break;
++	}
++	spin_unlock(&layout_lock);
++	return sb;
++}
++
++u64
++find_create_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	unsigned long hash_idx = sbid_hashval(sb);
++	int pos = 0;
++	u64 id = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->sb != sb)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		id = sbid->id;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	if (!id)
++		id = alloc_init_sbid(sb);
++
++	return id;
++}
++
++/*
++ * Create a layoutrecall structure
++ * An optional layoutrecall can be cloned (except for the layoutrecall lists)
++ */
++static struct nfs4_layoutrecall *
++alloc_init_layoutrecall(struct nfsd4_pnfs_cb_layout *cbl,
++			struct nfs4_client *clp,
++			struct nfs4_file *lrfile)
++{
++	struct nfs4_layoutrecall *clr;
++
++	dprintk("NFSD %s\n", __func__);
++	clr = kmem_cache_alloc(pnfs_layoutrecall_slab, GFP_KERNEL);
++	if (clr == NULL)
++		return clr;
++
++	dprintk("NFSD %s -->\n", __func__);
++
++	memset(clr, 0, sizeof(*clr));
++	if (lrfile)
++		get_nfs4_file(lrfile);
++	clr->clr_client = clp;
++	clr->clr_file = lrfile;
++	clr->cb = *cbl;
++
++	kref_init(&clr->clr_ref);
++	INIT_LIST_HEAD(&clr->clr_perclnt);
++
++	dprintk("NFSD %s return %p\n", __func__, clr);
++	return clr;
++}
++
++static void
++get_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	kref_get(&clr->clr_ref);
++}
++
++static void
++destroy_layoutrecall(struct kref *kref)
++{
++	struct nfs4_layoutrecall *clr =
++			container_of(kref, struct nfs4_layoutrecall, clr_ref);
++	dprintk("pNFS %s: clr %p fp %p clp %p\n", __func__, clr,
++		clr->clr_file, clr->clr_client);
++	BUG_ON(!list_empty(&clr->clr_perclnt));
++	if (clr->clr_file)
++		put_nfs4_file(clr->clr_file);
++	kmem_cache_free(pnfs_layoutrecall_slab, clr);
++}
++
++int
++put_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	return kref_put(&clr->clr_ref, destroy_layoutrecall);
++}
++
++void *
++layoutrecall_done(struct nfs4_layoutrecall *clr)
++{
++	void *recall_cookie = clr->cb.cbl_cookie;
++	struct nfs4_layoutrecall *parent = clr->parent;
++
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del_init(&clr->clr_perclnt);
++	put_layoutrecall(clr);
++
++	if (parent && !put_layoutrecall(parent))
++		recall_cookie = NULL;
++
++	return recall_cookie;
++}
++
++/*
++ * get_state() and cb_get_state() are
++ */
++void
++release_pnfs_ds_dev_list(struct nfs4_stateid *stp)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	while (!list_empty(&stp->st_pnfs_ds_id)) {
++		ddp = list_entry(stp->st_pnfs_ds_id.next,
++				 struct pnfs_ds_dev_entry, dd_dev_entry);
++		list_del(&ddp->dd_dev_entry);
++		kfree(ddp);
++	}
++}
++
++static int
++nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 dsid)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	ddp = kmalloc(sizeof(*ddp), GFP_KERNEL);
++	if (!ddp)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&ddp->dd_dev_entry);
++	list_add(&ddp->dd_dev_entry, &stp->st_pnfs_ds_id);
++	ddp->dd_dsid = dsid;
++	return 0;
++}
++
++/*
++ * are two octet ranges overlapping?
++ * start1            last1
++ *   |-----------------|
++ *                start2            last2
++ *                  |----------------|
++ */
++static inline int
++lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 last1 = last_byte_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 last2 = last_byte_offset(start2, l2->length);
++	int ret;
++
++	/* if last1 == start2 there's a single byte overlap */
++	ret = (last2 >= start1) && (last1 >= start2);
++	dprintk("%s: l1 %llu:%lld l2 %llu:%lld ret=%d\n", __func__,
++		l1->offset, l1->length, l2->offset, l2->length, ret);
++	return ret;
++}
++
++static inline int
++same_fsid_major(struct nfs4_fsid *fsid, u64 major)
++{
++	return fsid->major == major;
++}
++
++static inline int
++same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
++{
++	return same_fsid_major(fsid, current_fh->fh_export->ex_fsid);
++}
++
++/*
++ * find a layout recall conflicting with the specified layoutget
++ */
++static int
++is_layout_recalled(struct nfs4_client *clp,
++		   struct svc_fh *current_fh,
++		   struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layoutrecall *clr;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (clr, &clp->cl_layoutrecalls, clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != seg->layout_type)
++			continue;
++		if (clr->cb.cbl_recall_type == RETURN_ALL)
++			goto found;
++		if (clr->cb.cbl_recall_type == RETURN_FSID) {
++			if (same_fsid(&clr->cb.cbl_fsid, current_fh))
++				goto found;
++			else
++				continue;
++		}
++		BUG_ON(clr->cb.cbl_recall_type != RETURN_FILE);
++		if (clr->cb.cbl_seg.clientid == seg->clientid &&
++		    lo_seg_overlapping(&clr->cb.cbl_seg, seg))
++			goto found;
++	}
++	spin_unlock(&layout_lock);
++	return 0;
++found:
++	spin_unlock(&layout_lock);
++	return 1;
++}
++
++/*
++ * are two octet ranges overlapping or adjacent?
++ */
++static inline int
++lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	/* is end1 == start2 ranges are adjacent */
++	return (end2 >= start1) && (end1 >= start2);
++}
++
++static void
++extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lg_start = lg->offset;
++	u64 lg_end = end_offset(lg_start, lg->length);
++
++	/* lo already covers lg? */
++	if (lo_start <= lg_start && lg_end <= lo_end)
++		return;
++
++	/* extend start offset */
++	if (lo_start > lg_start)
++		lo_start = lg_start;
++
++	/* extend end offset */
++	if (lo_end < lg_end)
++		lo_end = lg_end;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ?
++		      lo_end : lo_end - lo_start;
++}
++
++static struct nfs4_layout *
++merge_layout(struct nfs4_file *fp,
++	     struct nfs4_client *clp,
++	     struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layout *lp = NULL;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
++		if (lp->lo_seg.layout_type == seg->layout_type &&
++		    lp->lo_seg.clientid == seg->clientid &&
++		    lp->lo_seg.iomode == seg->iomode &&
++		    lo_seg_mergeable(&lp->lo_seg, seg)) {
++			extend_layout(&lp->lo_seg, seg);
++			break;
++		}
++	spin_unlock(&layout_lock);
++
++	return lp;
++}
++
++__be32
++nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
++		     struct exp_xdr_stream *xdr)
++{
++	u32 status;
++	__be32 nfserr;
++	struct inode *ino = lgp->lg_fhp->fh_dentry->d_inode;
++	struct super_block *sb = ino->i_sb;
++	int can_merge;
++	struct nfs4_file *fp;
++	struct nfs4_client *clp;
++	struct nfs4_layout *lp = NULL;
++	struct nfs4_layout_state *ls = NULL;
++	struct nfsd4_pnfs_layoutget_arg args = {
++		.lg_minlength = lgp->lg_minlength,
++		.lg_fh = &lgp->lg_fhp->fh_handle,
++	};
++	struct nfsd4_pnfs_layoutget_res res = {
++		.lg_seg = lgp->lg_seg,
++	};
++
++	dprintk("NFSD: %s Begin\n", __func__);
++
++	args.lg_sbid = find_create_sbid(sb);
++	if (!args.lg_sbid) {
++		nfserr = nfserr_layouttrylater;
++		goto out;
++	}
++
++	can_merge = sb->s_pnfs_op->can_merge_layouts != NULL &&
++		    sb->s_pnfs_op->can_merge_layouts(lgp->lg_seg.layout_type);
++
++	nfs4_lock_state();
++	fp = find_alloc_file(ino, lgp->lg_fhp);
++	clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
++	dprintk("pNFS %s: fp %p clp %p \n", __func__, fp, clp);
++	if (!fp || !clp) {
++		nfserr = nfserr_inval;
++		goto out_unlock;
++	}
++
++	/* Check decoded layout stateid */
++	nfserr = nfs4_process_layout_stateid(clp, fp, &lgp->lg_sid, &ls);
++	if (nfserr)
++		goto out_unlock;
++
++	if (is_layout_recalled(clp, lgp->lg_fhp, &lgp->lg_seg)) {
++		nfserr = nfserr_recallconflict;
++		goto out;
++	}
++
++	/* pre-alloc layout in case we can't merge after we call
++	 * the file system
++	 */
++	lp = alloc_layout();
++	if (!lp) {
++		nfserr = nfserr_layouttrylater;
++		goto out_unlock;
++	}
++
++	dprintk("pNFS %s: pre-export type 0x%x maxcount %Zd "
++		"iomode %u offset %llu length %llu\n",
++		__func__, lgp->lg_seg.layout_type,
++		exp_xdr_qbytes(xdr->end - xdr->p),
++		lgp->lg_seg.iomode, lgp->lg_seg.offset, lgp->lg_seg.length);
++
++	/* FIXME: need to eliminate the use of the state lock */
++	nfs4_unlock_state();
++	status = sb->s_pnfs_op->layout_get(ino, xdr, &args, &res);
++	nfs4_lock_state();
++
++	dprintk("pNFS %s: post-export status %u "
++		"iomode %u offset %llu length %llu\n",
++		__func__, status, res.lg_seg.iomode,
++		res.lg_seg.offset, res.lg_seg.length);
++
++	/*
++	 * The allowable error codes for the layout_get pNFS export
++	 * operations vector function (from the file system) can be
++	 * expanded as needed to include other errors defined for
++	 * the RFC 5561 LAYOUTGET operation.
++	 */
++	switch (status) {
++	case 0:
++		nfserr = NFS4_OK;
++		break;
++	case NFS4ERR_ACCESS:
++	case NFS4ERR_BADIOMODE:
++		/* No support for LAYOUTIOMODE4_RW layouts */
++	case NFS4ERR_BADLAYOUT:
++		/* No layout matching loga_minlength rules */
++	case NFS4ERR_INVAL:
++	case NFS4ERR_IO:
++	case NFS4ERR_LAYOUTTRYLATER:
++	case NFS4ERR_LAYOUTUNAVAILABLE:
++	case NFS4ERR_LOCKED:
++	case NFS4ERR_NOSPC:
++	case NFS4ERR_RECALLCONFLICT:
++	case NFS4ERR_SERVERFAULT:
++	case NFS4ERR_TOOSMALL:
++		/* Requested layout too big for loga_maxcount */
++	case NFS4ERR_WRONG_TYPE:
++		/* Not a regular file */
++		nfserr = cpu_to_be32(status);
++		goto out_freelayout;
++	default:
++		BUG();
++		nfserr = nfserr_serverfault;
++	}
++
++	lgp->lg_seg = res.lg_seg;
++	lgp->lg_roc = res.lg_return_on_close;
++
++	/* SUCCESS!
++	 * Can the new layout be merged into an existing one?
++	 * If so, free unused layout struct
++	 */
++	if (can_merge && merge_layout(fp, clp, &res.lg_seg))
++		goto out_freelayout;
++
++	/* Can't merge, so let's initialize this new layout */
++	init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
++out_unlock:
++	if (ls)
++		put_layout_state(ls);
++	if (fp)
++		put_nfs4_file(fp);
++	nfs4_unlock_state();
++out:
++	dprintk("pNFS %s: lp %p exit nfserr %u\n", __func__, lp,
++		be32_to_cpu(nfserr));
++	return nfserr;
++out_freelayout:
++	free_layout(lp);
++	goto out_unlock;
++}
++
++static void
++trim_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lr)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lr_start = lr->offset;
++	u64 lr_end = end_offset(lr_start, lr->length);
++
++	dprintk("%s:Begin lo %llu:%lld lr %llu:%lld\n", __func__,
++		lo->offset, lo->length, lr->offset, lr->length);
++
++	/* lr fully covers lo? */
++	if (lr_start <= lo_start && lo_end <= lr_end) {
++		lo->length = 0;
++		goto out;
++	}
++
++	/*
++	 * split not supported yet. retain layout segment.
++	 * remains must be returned by the client
++	 * on the final layout return.
++	 */
++	if (lo_start < lr_start && lr_end < lo_end) {
++		dprintk("%s: split not supported\n", __func__);
++		goto out;
++	}
++
++	if (lo_start < lr_start)
++		lo_end = lr_start - 1;
++	else /* lr_end < lo_end */
++		lo_start = lr_end + 1;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ? lo_end : lo_end - lo_start;
++out:
++	dprintk("%s:End lo %llu:%lld\n", __func__, lo->offset, lo->length);
++}
++
++static int
++pnfs_return_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp,
++			 struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	dprintk("%s: clp %p fp %p\n", __func__, clp, fp);
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &fp->fi_layouts, lo_perfile) {
++		dprintk("%s: lp %p client %p,%p lo_type %x,%x iomode %d,%d\n",
++			__func__, lp,
++			lp->lo_client, clp,
++			lp->lo_seg.layout_type, lrp->args.lr_seg.layout_type,
++			lp->lo_seg.iomode, lrp->args.lr_seg.iomode);
++		if (lp->lo_client != clp ||
++		    lp->lo_seg.layout_type != lrp->args.lr_seg.layout_type ||
++		    (lp->lo_seg.iomode != lrp->args.lr_seg.iomode &&
++		     lrp->args.lr_seg.iomode != IOMODE_ANY) ||
++		     !lo_seg_overlapping(&lp->lo_seg, &lrp->args.lr_seg))
++			continue;
++		layouts_found++;
++		trim_layout(&lp->lo_seg, &lrp->args.lr_seg);
++		if (!lp->lo_seg.length) {
++			lrp->lrs_present = 0;
++			dequeue_layout(lp);
++			destroy_layout(lp);
++		}
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++pnfs_return_client_layouts(struct nfs4_client *clp,
++			   struct nfsd4_pnfs_layoutreturn *lrp, u64 ex_fsid)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &clp->cl_layouts, lo_perclnt) {
++		if (lrp->args.lr_seg.layout_type != lp->lo_seg.layout_type ||
++		   (lrp->args.lr_seg.iomode != lp->lo_seg.iomode &&
++		    lrp->args.lr_seg.iomode != IOMODE_ANY))
++			continue;
++
++		if (lrp->args.lr_return_type == RETURN_FSID &&
++		    !same_fsid_major(&lp->lo_file->fi_fsid, ex_fsid))
++			continue;
++
++		layouts_found++;
++		dequeue_layout(lp);
++		destroy_layout(lp);
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++recall_return_perfect_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode ||
++	    clr->cb.cbl_recall_type != lrp->args.lr_return_type)
++		return 0;
++
++	return (clr->cb.cbl_recall_type == RETURN_FILE &&
++		clr->clr_file == fp &&
++		clr->cb.cbl_seg.offset == lrp->args.lr_seg.offset &&
++		clr->cb.cbl_seg.length == lrp->args.lr_seg.length) ||
++
++		(clr->cb.cbl_recall_type == RETURN_FSID &&
++		 same_fsid(&clr->cb.cbl_fsid, current_fh)) ||
++
++		clr->cb.cbl_recall_type == RETURN_ALL;
++}
++
++static int
++recall_return_partial_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	/* iomode matching? */
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode &&
++	    clr->cb.cbl_seg.iomode != IOMODE_ANY &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY)
++		return 0;
++
++	if (clr->cb.cbl_recall_type == RETURN_ALL ||
++	    lrp->args.lr_return_type == RETURN_ALL)
++		return 1;
++
++	/* fsid matches? */
++	if (clr->cb.cbl_recall_type == RETURN_FSID ||
++	    lrp->args.lr_return_type == RETURN_FSID)
++		return same_fsid(&clr->cb.cbl_fsid, current_fh);
++
++	/* file matches, range overlapping? */
++	return clr->clr_file == fp &&
++	       lo_seg_overlapping(&clr->cb.cbl_seg, &lrp->args.lr_seg);
++}
++
++int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
++			    struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status = 0;
++	int layouts_found = 0;
++	struct inode *ino = current_fh->fh_dentry->d_inode;
++	struct nfs4_file *fp = NULL;
++	struct nfs4_client *clp;
++	struct nfs4_layoutrecall *clr, *nextclr;
++	u64 ex_fsid = current_fh->fh_export->ex_fsid;
++	void *recall_cookie = NULL;
++
++	dprintk("NFSD: %s\n", __func__);
++
++	nfs4_lock_state();
++	clp = find_confirmed_client((clientid_t *)&lrp->args.lr_seg.clientid);
++	if (!clp)
++		goto out;
++
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		fp = find_file(ino);
++		if (!fp) {
++			printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
++				"ino %p:%lu\n",
++				__func__, ino, ino ? ino->i_ino : 0L);
++			goto out;
++		}
++
++		/* Check the stateid */
++		dprintk("%s PROCESS LO_STATEID inode %p\n", __func__, ino);
++		status = nfs4_process_layout_stateid(clp, fp, &lrp->lr_sid,
++						     NULL);
++		if (status)
++			goto out_put_file;
++
++		/* update layouts */
++		layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
++		/* optimize for the all-empty case */
++		if (list_empty(&fp->fi_layouts))
++			recall_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++	} else {
++		layouts_found = pnfs_return_client_layouts(clp, lrp, ex_fsid);
++	}
++
++	dprintk("pNFS %s: clp %p fp %p layout_type 0x%x iomode %d "
++		"return_type %d fsid 0x%llx offset %llu length %llu: "
++		"layouts_found %d\n",
++		__func__, clp, fp, lrp->args.lr_seg.layout_type,
++		lrp->args.lr_seg.iomode, lrp->args.lr_return_type,
++		ex_fsid,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);
++
++	/* update layoutrecalls
++	 * note: for RETURN_{FSID,ALL}, fp may be NULL
++	 */
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (clr, nextclr, &clp->cl_layoutrecalls,
++				  clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != lrp->args.lr_seg.layout_type)
++			continue;
++
++		if (recall_return_perfect_match(clr, lrp, fp, current_fh))
++			recall_cookie = layoutrecall_done(clr);
++		else if (layouts_found &&
++			 recall_return_partial_match(clr, lrp, fp, current_fh))
++			clr->clr_time = CURRENT_TIME;
++	}
++	spin_unlock(&layout_lock);
++
++out_put_file:
++	if (fp)
++		put_nfs4_file(fp);
++out:
++	nfs4_unlock_state();
++
++	/* call exported filesystem layout_return (ignore return-code) */
++	fs_layout_return(sb, ino, lrp, 0, recall_cookie);
++
++	dprintk("pNFS %s: exit status %d \n", __func__, status);
++	return status;
++}
++
++/*
++ * PNFS Metadata server export operations callback for get_state
++ *
++ * called by the cluster fs when it receives a get_state() from a data
++ * server.
++ * returns status, or pnfs_get_state* with pnfs_get_state->status set.
++ *
++ */
++int
++nfs4_pnfs_cb_get_state(struct super_block *sb, struct pnfs_get_state *arg)
++{
++	struct nfs4_stateid *stp;
++	int flags = LOCK_STATE | OPEN_STATE; /* search both hash tables */
++	int status = -EINVAL;
++	struct inode *ino;
++	struct nfs4_delegation *dl;
++	stateid_t *stid = (stateid_t *)&arg->stid;
++
++	dprintk("NFSD: %s sid=" STATEID_FMT " ino %llu\n", __func__,
++		STATEID_VAL(stid), arg->ino);
++
++	nfs4_lock_state();
++	stp = find_stateid(stid, flags);
++	if (!stp) {
++		ino = iget_locked(sb, arg->ino);
++		if (!ino)
++			goto out;
++
++		if (ino->i_state & I_NEW) {
++			iget_failed(ino);
++			goto out;
++		}
++
++		dl = find_delegation_stateid(ino, stid);
++		if (dl)
++			status = 0;
++
++		iput(ino);
++	} else {
++		/* XXX ANDROS: marc removed nfs4_check_fh - how come? */
++
++		/* arg->devid is the Data server id, set by the cluster fs */
++		status = nfs4_add_pnfs_ds_dev(stp, arg->dsid);
++		if (status)
++			goto out;
++
++		arg->access = stp->st_access_bmap;
++		*(clientid_t *)&arg->clid =
++			stp->st_stateowner->so_client->cl_clientid;
++	}
++out:
++	nfs4_unlock_state();
++	return status;
++}
++
++static int
++cl_has_file_layout(struct nfs4_client *clp, struct nfs4_file *lrfile,
++		   stateid_t *lsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++	struct nfs4_layout_state *ls;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt) {
++		if (lp->lo_file != lrfile)
++			continue;
++
++		ls = find_get_layout_state(clp, lrfile);
++		if (!ls) {
++			/* This shouldn't happen as the file should have a
++			 * layout stateid if it has a layout.
++			 */
++			printk(KERN_ERR "%s: file %p has no layout stateid\n",
++				__func__, lrfile);
++			WARN_ON(1);
++			break;
++		}
++		update_stateid(&ls->ls_stateid);
++		memcpy(lsid, &ls->ls_stateid, sizeof(stateid_t));
++		put_layout_state_locked(ls);
++		found = 1;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	return found;
++}
++
++static int
++cl_has_fsid_layout(struct nfs4_client *clp, struct nfs4_fsid *fsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++
++	/* note: minor version unused */
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt)
++		if (lp->lo_file->fi_fsid.major == fsid->major) {
++			found = 1;
++			break;
++		}
++	spin_unlock(&layout_lock);
++	return found;
++}
++
++static int
++cl_has_any_layout(struct nfs4_client *clp)
++{
++	return !list_empty(&clp->cl_layouts);
++}
++
++static int
++cl_has_layout(struct nfs4_client *clp, struct nfsd4_pnfs_cb_layout *cbl,
++	      struct nfs4_file *lrfile, stateid_t *lsid)
++{
++	switch (cbl->cbl_recall_type) {
++	case RETURN_FILE:
++		return cl_has_file_layout(clp, lrfile, lsid);
++	case RETURN_FSID:
++		return cl_has_fsid_layout(clp, &cbl->cbl_fsid);
++	default:
++		return cl_has_any_layout(clp);
++	}
++}
++
++/*
++ * Called without the layout_lock.
++ */
++void
++nomatching_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfsd4_pnfs_layoutreturn lr = {
++		.args.lr_return_type = clr->cb.cbl_recall_type,
++		.args.lr_seg = clr->cb.cbl_seg,
++	};
++	struct inode *inode;
++	void *recall_cookie;
++
++	if (clr->clr_file) {
++		inode = igrab(clr->clr_file->fi_inode);
++		if (WARN_ON(!inode))
++			return;
++	} else {
++		inode = NULL;
++	}
++
++	dprintk("%s: clp %p fp %p: simulating layout_return\n", __func__,
++		clr->clr_client, clr->clr_file);
++
++	if (clr->cb.cbl_recall_type == RETURN_FILE)
++		pnfs_return_file_layouts(clr->clr_client, clr->clr_file, &lr);
++	else
++		pnfs_return_client_layouts(clr->clr_client, &lr,
++					   clr->cb.cbl_fsid.major);
++
++	spin_lock(&layout_lock);
++	recall_cookie = layoutrecall_done(clr);
++	spin_unlock(&layout_lock);
++
++	fs_layout_return(clr->clr_sb, inode, &lr, LR_FLAG_INTERN,
++			 recall_cookie);
++	iput(inode);
++}
++
++void pnfs_expire_client(struct nfs4_client *clp)
++{
++	for (;;) {
++		struct nfs4_layoutrecall *lrp = NULL;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layoutrecalls)) {
++			lrp = list_entry(clp->cl_layoutrecalls.next,
++					 struct nfs4_layoutrecall, clr_perclnt);
++			get_layoutrecall(lrp);
++		}
++		spin_unlock(&layout_lock);
++		if (!lrp)
++			break;
++
++		dprintk("%s: lrp %p, fp %p\n", __func__, lrp, lrp->clr_file);
++		BUG_ON(lrp->clr_client != clp);
++		nomatching_layout(lrp);
++		put_layoutrecall(lrp);
++	}
++
++	for (;;) {
++		struct nfs4_layout *lp = NULL;
++		struct inode *inode = NULL;
++		struct nfsd4_pnfs_layoutreturn lr;
++		bool empty = false;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layouts)) {
++			lp = list_entry(clp->cl_layouts.next,
++					struct nfs4_layout, lo_perclnt);
++			inode = igrab(lp->lo_file->fi_inode);
++			memset(&lr, 0, sizeof(lr));
++			lr.args.lr_return_type = RETURN_FILE;
++			lr.args.lr_seg = lp->lo_seg;
++			empty = list_empty(&lp->lo_file->fi_layouts);
++			BUG_ON(lp->lo_client != clp);
++			dequeue_layout(lp);
++			destroy_layout(lp); /* do not access lp after this */
++		}
++		spin_unlock(&layout_lock);
++		if (!lp)
++			break;
++
++		if (WARN_ON(!inode))
++			break;
++
++		dprintk("%s: inode %lu lp %p clp %p\n", __func__, inode->i_ino,
++			lp, clp);
++
++		fs_layout_return(inode->i_sb, inode, &lr, LR_FLAG_EXPIRE,
++				 empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL);
++		iput(inode);
++	}
++}
++
++struct create_recall_list_arg {
++	struct nfsd4_pnfs_cb_layout *cbl;
++	struct nfs4_file *lrfile;
++	struct list_head *todolist;
++	unsigned todo_count;
++};
++
++/*
++ * look for matching layout for the given client
++ * and add a pending layout recall to the todo list
++ * if found any.
++ * returns:
++ *   0 if layouts found or negative error.
++ */
++static int
++lo_recall_per_client(struct nfs4_client *clp, void *p)
++{
++	stateid_t lsid;
++	struct nfs4_layoutrecall *pending;
++	struct create_recall_list_arg *arg = p;
++
++	memset(&lsid, 0, sizeof(lsid));
++	if (!cl_has_layout(clp, arg->cbl, arg->lrfile, &lsid))
++		return 0;
++
++	/* Matching put done by layoutreturn */
++	pending = alloc_init_layoutrecall(arg->cbl, clp, arg->lrfile);
++	/* out of memory, drain todo queue */
++	if (!pending)
++		return -ENOMEM;
++
++	*(stateid_t *)&pending->cb.cbl_sid = lsid;
++	list_add(&pending->clr_perclnt, arg->todolist);
++	arg->todo_count++;
++	return 0;
++}
++
++/* Create a layoutrecall structure for each client based on the
++ * original structure. */
++int
++create_layout_recall_list(struct list_head *todolist, unsigned *todo_len,
++			  struct nfsd4_pnfs_cb_layout *cbl,
++			  struct nfs4_file *lrfile)
++{
++	struct nfs4_client *clp;
++	struct create_recall_list_arg arg = {
++		.cbl = cbl,
++		.lrfile = lrfile,
++		.todolist = todolist,
++	};
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* If client given by fs, just do single client */
++	if (cbl->cbl_seg.clientid) {
++		clp = find_confirmed_client(
++				(clientid_t *)&cbl->cbl_seg.clientid);
++		if (!clp) {
++			status = -ENOENT;
++			dprintk("%s: clientid %llx not found\n", __func__,
++				(unsigned long long)cbl->cbl_seg.clientid);
++			goto out;
++		}
++
++		status = lo_recall_per_client(clp, &arg);
++	} else {
++		/* Check all clients for layout matches */
++		status = filter_confirmed_clients(lo_recall_per_client, &arg);
++	}
++
++out:
++	*todo_len = arg.todo_count;
++	dprintk("%s: <-- list len %u status %d\n", __func__, *todo_len, status);
++	return status;
++}
++
++/*
++ * Recall layouts asynchronously
++ * Called with state lock.
++ */
++static int
++spawn_layout_recall(struct super_block *sb, struct list_head *todolist,
++		    unsigned todo_len)
++{
++	struct nfs4_layoutrecall *pending;
++	struct nfs4_layoutrecall *parent = NULL;
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	if (todo_len > 1) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++
++		parent = alloc_init_layoutrecall(&pending->cb, NULL,
++						 pending->clr_file);
++		if (unlikely(!parent)) {
++			/* We want forward progress. If parent cannot be
++			 * allocated take the first one as parent but don't
++			 * execute it.  Caller must check for -EAGAIN, if so
++			 * When the partial recalls return,
++			 * nfsd_layout_recall_cb should be called again.
++			 */
++			list_del_init(&pending->clr_perclnt);
++			if (todo_len > 2) {
++				parent = pending;
++			} else {
++				parent = NULL;
++				put_layoutrecall(pending);
++			}
++			--todo_len;
++				status = -ENOMEM;
++		}
++	}
++
++	while (!list_empty(todolist)) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++		list_del_init(&pending->clr_perclnt);
++		dprintk("%s: clp %p cb_client %p fp %p\n", __func__,
++			pending->clr_client,
++			pending->clr_client->cl_cb_client,
++			pending->clr_file);
++		if (unlikely(!pending->clr_client->cl_cb_client)) {
++			printk(KERN_INFO
++				"%s: clientid %08x/%08x has no callback path\n",
++				__func__,
++				pending->clr_client->cl_clientid.cl_boot,
++				pending->clr_client->cl_clientid.cl_id);
++			put_layoutrecall(pending);
++			continue;
++		}
++
++		pending->clr_time = CURRENT_TIME;
++		pending->clr_sb = sb;
++		if (parent) {
++			/* If we created a parent its initial ref count is 1.
++			 * We will need to de-ref it eventually. So we just
++			 * don't increment on behalf of the last one.
++			 */
++			if (todo_len != 1)
++				get_layoutrecall(parent);
++		}
++		pending->parent = parent;
++		get_layoutrecall(pending);
++		/* Add to list so corresponding layoutreturn can find req */
++		list_add(&pending->clr_perclnt,
++			 &pending->clr_client->cl_layoutrecalls);
++
++		nfsd4_cb_layout(pending);
++		--todo_len;
++	}
++
++	return status;
++}
++
++/*
++ * Spawn a thread to perform a recall layout
++ *
++ */
++int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,
++			  struct nfsd4_pnfs_cb_layout *cbl)
++{
++	int status;
++	struct nfs4_file *lrfile = NULL;
++	struct list_head todolist;
++	unsigned todo_len = 0;
++
++	dprintk("NFSD nfsd_layout_recall_cb: inode %p cbl %p\n", inode, cbl);
++	BUG_ON(!cbl);
++	BUG_ON(cbl->cbl_recall_type != RETURN_FILE &&
++	       cbl->cbl_recall_type != RETURN_FSID &&
++	       cbl->cbl_recall_type != RETURN_ALL);
++	BUG_ON(cbl->cbl_recall_type == RETURN_FILE && !inode);
++	BUG_ON(cbl->cbl_seg.iomode != IOMODE_READ &&
++	       cbl->cbl_seg.iomode != IOMODE_RW &&
++	       cbl->cbl_seg.iomode != IOMODE_ANY);
++
++	if (nfsd_serv == NULL) {
++		dprintk("NFSD nfsd_layout_recall_cb: nfsd_serv == NULL\n");
++		return -ENOENT;
++	}
++
++	nfs4_lock_state();
++	status = -ENOENT;
++	if (inode) {
++		lrfile = find_file(inode);
++		if (!lrfile) {
++			dprintk("NFSD nfsd_layout_recall_cb: "
++				"nfs4_file not found\n");
++			goto err;
++		}
++		if (cbl->cbl_recall_type == RETURN_FSID)
++			cbl->cbl_fsid = lrfile->fi_fsid;
++	}
++
++	INIT_LIST_HEAD(&todolist);
++
++	/* If no cookie provided by FS, return a default one */
++	if (!cbl->cbl_cookie)
++		cbl->cbl_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++
++	status = create_layout_recall_list(&todolist, &todo_len, cbl, lrfile);
++	if (list_empty(&todolist)) {
++		status = -ENOENT;
++	} else {
++		/* process todolist even if create_layout_recall_list
++		 * returned an error */
++		int status2 = spawn_layout_recall(sb, &todolist, todo_len);
++		if (status2)
++			status = status2;
++	}
++
++err:
++	nfs4_unlock_state();
++	if (lrfile)
++		put_nfs4_file(lrfile);
++	return (todo_len && status) ? -EAGAIN : status;
++}
++
++struct create_device_notify_list_arg {
++	struct list_head *todolist;
++	struct nfsd4_pnfs_cb_dev_list *ndl;
++};
++
++static int
++create_device_notify_per_cl(struct nfs4_client *clp, void *p)
++{
++	struct nfs4_notify_device *cbnd;
++	struct create_device_notify_list_arg *arg = p;
++
++	if (atomic_read(&clp->cl_deviceref) <= 0)
++		return 0;
++
++	cbnd = kmalloc(sizeof(*cbnd), GFP_KERNEL);
++	if (!cbnd)
++		return -ENOMEM;
++
++	cbnd->nd_list = arg->ndl;
++	cbnd->nd_client = clp;
++	list_add(&cbnd->nd_perclnt, arg->todolist);
++	return 0;
++}
++
++/* Create a list of clients to send device notifications. */
++int
++create_device_notify_list(struct list_head *todolist,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	int status;
++	struct create_device_notify_list_arg arg = {
++		.todolist = todolist,
++		.ndl = ndl,
++	};
++
++	nfs4_lock_state();
++	status = filter_confirmed_clients(create_device_notify_per_cl, &arg);
++	nfs4_unlock_state();
++
++	return status;
++}
++
++/*
++ * For each client that a device, send a device notification.
++ * XXX: Need to track which clients have which devices.
++ */
++int nfsd_device_notify_cb(struct super_block *sb,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	struct nfs4_notify_device *cbnd;
++	unsigned int notify_num = 0;
++	int status2, status = 0;
++	struct list_head todolist;
++
++	BUG_ON(!ndl || ndl->cbd_len == 0 || !ndl->cbd_list);
++
++	dprintk("NFSD %s: cbl %p len %u\n", __func__, ndl, ndl->cbd_len);
++
++	if (nfsd_serv == NULL)
++		return -ENOENT;
++
++	INIT_LIST_HEAD(&todolist);
++
++	status = create_device_notify_list(&todolist, ndl);
++
++	while (!list_empty(&todolist)) {
++		cbnd = list_entry(todolist.next, struct nfs4_notify_device,
++				  nd_perclnt);
++		list_del_init(&cbnd->nd_perclnt);
++		status2 = nfsd4_cb_notify_device(cbnd);
++		pnfs_clear_device_notify(cbnd->nd_client);
++		if (status2) {
++			kfree(cbnd);
++			status = status2;
++		}
++		notify_num++;
++	}
++
++	dprintk("NFSD %s: status %d clients %u\n",
++		__func__, status, notify_num);
++	return status;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
+@@ -0,0 +1,461 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++
++#include <linux/nfs4.h>
++#include <linux/nfsd/const.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/sunrpc/clnt.h>
++
++#include "nfsfh.h"
++#include "nfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Just use a linked list. Do not expect more than 32 dlm_device_entries
++ * the first implementation will just use one device per cluster file system
++ */
++
++static LIST_HEAD(dlm_device_list);
++static DEFINE_SPINLOCK(dlm_device_list_lock);
++
++struct dlm_device_entry {
++	struct list_head	dlm_dev_list;
++	char			disk_name[DISK_NAME_LEN];
++	int			num_ds;
++	char			ds_list[NFSD_DLM_DS_LIST_MAX];
++};
++
++static struct dlm_device_entry *
++_nfsd4_find_pnfs_dlm_device(char *disk_name)
++{
++	struct dlm_device_entry *dlm_pdev;
++
++	dprintk("--> %s  disk name %s\n", __func__, disk_name);
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
++		dprintk("%s Look for dlm_pdev %s\n", __func__,
++			dlm_pdev->disk_name);
++		if (!memcmp(dlm_pdev->disk_name, disk_name, strlen(disk_name))) {
++			spin_unlock(&dlm_device_list_lock);
++			return dlm_pdev;
++		}
++	}
++	spin_unlock(&dlm_device_list_lock);
++	return NULL;
++}
++
++static struct dlm_device_entry *
++nfsd4_find_pnfs_dlm_device(struct super_block *sb) {
++	char dname[BDEVNAME_SIZE];
++
++	bdevname(sb->s_bdev, dname);
++	return _nfsd4_find_pnfs_dlm_device(dname);
++}
++
++ssize_t
++nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen)
++{
++	char *pos = buf;
++	ssize_t size = 0;
++	struct dlm_device_entry *dlm_pdev;
++	int ret = -EINVAL;
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list)
++	{
++		int advanced;
++		advanced = snprintf(pos, buflen - size, "%s:%s\n", dlm_pdev->disk_name, dlm_pdev->ds_list);
++		if (advanced >= buflen - size)
++			goto out;
++		size += advanced;
++		pos += advanced;
++	}
++	ret = size;
++
++out:
++	spin_unlock(&dlm_device_list_lock);
++	return ret;
++}
++
++bool nfsd4_validate_pnfs_dlm_device(char *ds_list, int *num_ds)
++{
++	char *start = ds_list;
++
++	*num_ds = 0;
++
++	while (*start) {
++		struct sockaddr_storage tempAddr;
++		int ipLen = strcspn(start, ",");
++
++		if (!rpc_pton(start, ipLen, (struct sockaddr *)&tempAddr, sizeof(tempAddr)))
++			return false;
++		(*num_ds)++;
++		start += ipLen + 1;
++	}
++	return true;
++}
++
++/*
++ * pnfs_dlm_device string format:
++ *     block-device-path:<ds1 ipv4 address>,<ds2 ipv4 address>
++ *
++ * Examples
++ *     /dev/sda:192.168.1.96,192.168.1.97' creates a data server list with
++ *     two data servers for the dlm cluster file system mounted on /dev/sda.
++ *
++ *     /dev/sda:192.168.1.96,192.168.1.100'
++ *     replaces the data server list for /dev/sda
++ *
++ *     Only the deviceid == 1 is supported. Can add device id to
++ *     pnfs_dlm_device string when needed.
++ *
++ *     Only the round robin each data server once stripe index is supported.
++ */
++int
++nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
++
++{
++	struct dlm_device_entry *new, *found;
++	char *bufp = pnfs_dlm_device;
++	char *endp = bufp + strlen(bufp);
++	int err = -ENOMEM;
++
++	dprintk("--> %s len %d\n", __func__, len);
++
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return err;
++
++	err = -EINVAL;
++	/* disk_name */
++	/* FIXME: need to check for valid disk_name. search superblocks?
++	 * check for slash dev slash ?
++	 */
++	len = strcspn(bufp, ":");
++	if (len > DISK_NAME_LEN)
++		goto out_free;
++	memcpy(new->disk_name, bufp, len);
++
++	err = -EINVAL;
++	bufp += len + 1;
++	if (bufp >= endp)
++		goto out_free;
++
++	/* data server list */
++	/* FIXME: need to check for comma separated valid ip format */
++	len = strcspn(bufp, ":");
++	if (len > NFSD_DLM_DS_LIST_MAX)
++		goto out_free;
++	memcpy(new->ds_list, bufp, len);
++
++
++	/*  validate the ips */
++	if (!nfsd4_validate_pnfs_dlm_device(new->ds_list, &(new->num_ds)))
++		goto out_free;
++
++	dprintk("%s disk_name %s num_ds %d ds_list %s\n", __func__,
++		new->disk_name, new->num_ds, new->ds_list);
++
++	found = _nfsd4_find_pnfs_dlm_device(new->disk_name);
++	if (found) {
++		/* FIXME: should compare found->ds_list with new->ds_list
++		 * and if it is different, kick off a CB_NOTIFY change
++		 * deviceid.
++		 */
++		dprintk("%s pnfs_dlm_device %s:%s already in cache "
++			" replace ds_list with new ds_list %s\n", __func__,
++			found->disk_name, found->ds_list, new->ds_list);
++		memset(found->ds_list, 0, DISK_NAME_LEN);
++		memcpy(found->ds_list, new->ds_list, strlen(new->ds_list));
++		found->num_ds = new->num_ds;
++		kfree(new);
++	} else {
++		dprintk("%s Adding pnfs_dlm_device %s:%s\n", __func__,
++				new->disk_name, new->ds_list);
++		spin_lock(&dlm_device_list_lock);
++		list_add(&new->dlm_dev_list, &dlm_device_list);
++		spin_unlock(&dlm_device_list_lock);
++	}
++	dprintk("<-- %s Success\n", __func__);
++	return 0;
++
++out_free:
++	kfree(new);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++void nfsd4_pnfs_dlm_shutdown(void)
++{
++	struct dlm_device_entry *dlm_pdev, *next;
++
++	dprintk("--> %s\n", __func__);
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry_safe (dlm_pdev, next, &dlm_device_list,
++				  dlm_dev_list) {
++		list_del(&dlm_pdev->dlm_dev_list);
++		kfree(dlm_pdev);
++	}
++	spin_unlock(&dlm_device_list_lock);
++}
++
++static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
++				     u32 layout_type,
++				     struct nfsd4_pnfs_dev_iter_res *res)
++{
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		printk(KERN_ERR "%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return -ENOTSUPP;
++	}
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++	return 0;
++}
++
++static int nfsd4_pnfs_dlm_getdevinfo(struct super_block *sb,
++				     struct exp_xdr_stream *xdr,
++				     u32 layout_type,
++				     const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err, len, i = 0;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_devaddr *daddr;
++	struct dlm_device_entry *dlm_pdev;
++	char   *bufp;
++
++	err = -ENOTSUPP;
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		dprintk("%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return err;
++	}
++
++	/* We only hand out a deviceid of 1 in LAYOUTGET, so a GETDEVICEINFO
++	 * with a gdia_device_id != 1 is invalid.
++	 */
++	err = -EINVAL;
++	if (devid->devid != 1) {
++		dprintk("%s: WARNING: didn't receive a deviceid of "
++			"1 (got: 0x%llx)\n", __func__, devid->devid);
++		return err;
++	}
++
++	/*
++	 * If the DS list has not been established, return -EINVAL
++	 */
++	dlm_pdev = nfsd4_find_pnfs_dlm_device(sb);
++	if (!dlm_pdev) {
++		dprintk("%s: DEBUG: disk %s Not Found\n", __func__,
++			sb->s_bdev->bd_disk->disk_name);
++		return err;
++	}
++
++	dprintk("%s: Found disk %s with DS list |%s|\n",
++		__func__, dlm_pdev->disk_name, dlm_pdev->ds_list);
++
++	memset(&fdev, '\0', sizeof(fdev));
++	fdev.fl_device_length = dlm_pdev->num_ds;
++
++	err = -ENOMEM;
++	len = sizeof(*fdev.fl_device_list) * fdev.fl_device_length;
++	fdev.fl_device_list = kzalloc(len, GFP_KERNEL);
++	if (!fdev.fl_device_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a device list "
++			"buffer for %d DSes.\n", __func__, i);
++		fdev.fl_device_length = 0;
++		goto out;
++	}
++
++	/* Set a simple stripe indicie */
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = kzalloc(sizeof(u32) *
++				     fdev.fl_stripeindices_length, GFP_KERNEL);
++
++	if (!fdev.fl_stripeindices_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a stripeindices "
++			"list buffer for %d DSes.\n", __func__, i);
++		goto out;
++	}
++	for (i = 0; i < fdev.fl_stripeindices_length; i++)
++		fdev.fl_stripeindices_list[i] = i;
++
++	/* Transfer the data server list with a single multipath entry */
++	bufp = dlm_pdev->ds_list;
++	for (i = 0; i < fdev.fl_device_length; i++) {
++		daddr = kmalloc(sizeof(*daddr), GFP_KERNEL);
++		if (!daddr) {
++			printk(KERN_ERR "%s: ERROR: unable to kmalloc a device "
++				"addr buffer.\n", __func__);
++			goto out;
++		}
++
++		daddr->r_netid.data = "tcp";
++		daddr->r_netid.len = 3;
++
++		len = strcspn(bufp, ",");
++		daddr->r_addr.data = kmalloc(len + 4, GFP_KERNEL);
++		memcpy(daddr->r_addr.data, bufp, len);
++		/*
++		 * append the port number.  interpreted as two more bytes
++		 * beyond the quad: ".8.1" -> 0x08.0x01 -> 0x0801 = port 2049.
++		 */
++		memcpy(daddr->r_addr.data + len, ".8.1", 4);
++		daddr->r_addr.len = len + 4;
++
++		fdev.fl_device_list[i].fl_multipath_length = 1;
++		fdev.fl_device_list[i].fl_multipath_list = daddr;
++
++		dprintk("%s: encoding DS |%s|\n", __func__, bufp);
++
++		bufp += len + 1;
++	}
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	for (i = 0; i < fdev.fl_device_length; i++)
++		kfree(fdev.fl_device_list[i].fl_multipath_list);
++	kfree(fdev.fl_device_list);
++	kfree(fdev.fl_stripeindices_list);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize >= NFSSVC_MAXBLKSIZE)
++		return blocksize;
++	return NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++}
++
++/*
++ * Look up inode block device in pnfs_dlm_device list.
++ * Hash on the inode->i_ino and number of data servers.
++ */
++static int dlm_ino_hash(struct inode *ino)
++{
++	struct dlm_device_entry *de;
++	u32 hash_mask = 0;
++
++	/* If can't find the inode block device in the pnfs_dlm_deivce list
++	 * then don't hand out a layout
++	 */
++	de = nfsd4_find_pnfs_dlm_device(ino->i_sb);
++	if (!de)
++		return -1;
++	hash_mask = de->num_ds - 1;
++	return ino->i_ino & hash_mask;
++}
++
++static enum nfsstat4 nfsd4_pnfs_dlm_layoutget(struct inode *inode,
++			   struct exp_xdr_stream *xdr,
++			   const struct nfsd4_pnfs_layoutget_arg *args,
++			   struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++	int index;
++	enum nfsstat4 rc = NFS4_OK;
++
++	dprintk("%s: LAYOUT_GET\n", __func__);
++
++	/* DLM exported file systems only support layouts for READ */
++	if (res->lg_seg.iomode == IOMODE_RW)
++		return NFS4ERR_BADIOMODE;
++
++	index = dlm_ino_hash(inode);
++	dprintk("%s first stripe index %d i_ino %lu\n", __func__, index,
++		inode->i_ino);
++	if (index < 0)
++		return NFS4ERR_LAYOUTUNAVAILABLE;
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	/* Always give out whole file layouts */
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	/* Always give out READ ONLY layouts */
++	res->lg_seg.iomode = IOMODE_READ;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = false;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = args->lg_sbid;
++	layout->device_id.devid = 1;                                /*FSFTEMP*/
++	layout->lg_first_stripe_index = index;                      /*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	memcpy(fhp, args->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++/* For use by DLM cluster file systems exported by pNFSD */
++const struct pnfs_export_operations pnfs_dlm_export_ops = {
++	.layout_type = nfsd4_pnfs_dlm_layouttype,
++	.get_device_info = nfsd4_pnfs_dlm_getdevinfo,
++	.get_device_iter = nfsd4_pnfs_dlm_getdeviter,
++	.layout_get = nfsd4_pnfs_dlm_layoutget,
++};
++EXPORT_SYMBOL(pnfs_dlm_export_ops);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
+@@ -0,0 +1,620 @@
++/*
++*  linux/fs/nfsd/nfs4pnfsds.c
++*
++*  Copyright (c) 2005 The Regents of the University of Michigan.
++*  All rights reserved.
++*
++*  Andy Adamson <andros@umich.edu>
++*
++*  Redistribution and use in source and binary forms, with or without
++*  modification, are permitted provided that the following conditions
++*  are met:
++*
++*  1. Redistributions of source code must retain the above copyright
++*     notice, this list of conditions and the following disclaimer.
++*  2. Redistributions in binary form must reproduce the above copyright
++*     notice, this list of conditions and the following disclaimer in the
++*     documentation and/or other materials provided with the distribution.
++*  3. Neither the name of the University nor the names of its
++*     contributors may be used to endorse or promote products derived
++*     from this software without specific prior written permission.
++*
++*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*
++*/
++#if defined(CONFIG_PNFSD)
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++#include <linux/param.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/debug.h>
++#include <linux/nfs4.h>
++#include <linux/exportfs.h>
++#include <linux/sched.h>
++
++#include "nfsd.h"
++#include "pnfsd.h"
++#include "state.h"
++
++/*
++ *******************
++ *   	 PNFS
++ *******************
++ */
++/*
++ * Hash tables for pNFS Data Server state
++ *
++ * mds_nodeid:	list of struct pnfs_mds_id one per Metadata server (MDS) using
++ *		this data server (DS).
++ *
++ * mds_clid_hashtbl[]: uses clientid_hashval(), hash of all clientids obtained
++ *			from any MDS.
++ *
++ * ds_stid_hashtbl[]: uses stateid_hashval(), hash of all stateids obtained
++ *			from any MDS.
++ *
++ */
++/* Hash tables for clientid state */
++#define CLIENT_HASH_BITS                 4
++#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
++#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
++
++#define clientid_hashval(id) \
++	((id) & CLIENT_HASH_MASK)
++
++/* hash table for pnfs_ds_stateid */
++#define STATEID_HASH_BITS              10
++#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
++#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
++
++#define stateid_hashval(owner_id, file_id)  \
++	(((owner_id) + (file_id)) & STATEID_HASH_MASK)
++
++static struct list_head mds_id_tbl;
++static struct list_head mds_clid_hashtbl[CLIENT_HASH_SIZE];
++static struct list_head ds_stid_hashtbl[STATEID_HASH_SIZE];
++
++static inline void put_ds_clientid(struct pnfs_ds_clientid *dcp);
++static inline void put_ds_mdsid(struct pnfs_mds_id *mdp);
++
++/* Mutex for data server state.  Needs to be separate from
++ * mds state mutex since a node can be both mds and ds */
++static DEFINE_MUTEX(ds_mutex);
++static struct thread_info *ds_mutex_owner;
++
++static void
++ds_lock_state(void)
++{
++	mutex_lock(&ds_mutex);
++	ds_mutex_owner = current_thread_info();
++}
++
++static void
++ds_unlock_state(void)
++{
++	BUG_ON(ds_mutex_owner != current_thread_info());
++	ds_mutex_owner = NULL;
++	mutex_unlock(&ds_mutex);
++}
++
++static int
++cmp_clid(const clientid_t *cl1, const clientid_t *cl2)
++{
++	return (cl1->cl_boot == cl2->cl_boot) &&
++	       (cl1->cl_id == cl2->cl_id);
++}
++
++void
++nfs4_pnfs_state_init(void)
++{
++	int i;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&mds_clid_hashtbl[i]);
++
++	for (i = 0; i < STATEID_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&ds_stid_hashtbl[i]);
++
++	INIT_LIST_HEAD(&mds_id_tbl);
++}
++
++static struct pnfs_mds_id *
++find_pnfs_mds_id(u32 mdsid)
++{
++	struct pnfs_mds_id *local = NULL;
++
++	dprintk("pNFSD: %s\n", __func__);
++	list_for_each_entry(local, &mds_id_tbl, di_hash) {
++		if (local->di_mdsid == mdsid)
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_clientid *
++find_pnfs_ds_clientid(const clientid_t *clid)
++{
++	struct pnfs_ds_clientid *local = NULL;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = clientid_hashval(clid->cl_id);
++	list_for_each_entry(local, &mds_clid_hashtbl[hashval], dc_hash) {
++		if (cmp_clid(&local->dc_mdsclid, clid))
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_stateid *
++find_pnfs_ds_stateid(stateid_t *stid)
++{
++	struct pnfs_ds_stateid *local = NULL;
++	u32 st_id = stid->si_stateownerid;
++	u32 f_id = stid->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_for_each_entry(local, &ds_stid_hashtbl[hashval], ds_hash)
++		if ((local->ds_stid.si_stateownerid == st_id) &&
++				(local->ds_stid.si_fileid == f_id) &&
++				(local->ds_stid.si_boot == stid->si_boot)) {
++			stateid_t *sid = &local->ds_stid;
++			dprintk("NFSD: %s <-- %p ds_flags %lx " STATEID_FMT "\n",
++				__func__, local, local->ds_flags,
++				STATEID_VAL(sid));
++			return local;
++		}
++	return NULL;
++}
++
++static void
++release_ds_mdsid(struct kref *kref)
++{
++	struct pnfs_mds_id *mdp =
++		container_of(kref, struct pnfs_mds_id, di_ref);
++	dprintk("pNFSD: %s\n", __func__);
++
++	list_del(&mdp->di_hash);
++	list_del(&mdp->di_mdsclid);
++	kfree(mdp);
++}
++
++static void
++release_ds_clientid(struct kref *kref)
++{
++	struct pnfs_ds_clientid *dcp =
++		container_of(kref, struct pnfs_ds_clientid, dc_ref);
++	struct pnfs_mds_id *mdp;
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(dcp->dc_mdsid);
++	if (mdp)
++		put_ds_mdsid(mdp);
++
++	list_del(&dcp->dc_hash);
++	list_del(&dcp->dc_stateid);
++	list_del(&dcp->dc_permdsid);
++	kfree(dcp);
++}
++
++static void
++release_ds_stateid(struct kref *kref)
++{
++	struct pnfs_ds_stateid *dsp =
++		container_of(kref, struct pnfs_ds_stateid, ds_ref);
++	struct pnfs_ds_clientid *dcp;
++	dprintk("pNFS %s: dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid(&dsp->ds_mdsclid);
++	if (dcp)
++		put_ds_clientid(dcp);
++
++	list_del(&dsp->ds_hash);
++	list_del(&dsp->ds_perclid);
++	kfree(dsp);
++}
++
++static inline void
++put_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_put(&dcp->dc_ref, release_ds_clientid);
++}
++
++static inline void
++get_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_get(&dcp->dc_ref);
++}
++
++static inline void
++put_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_put(&mdp->di_ref, release_ds_mdsid);
++}
++
++static inline void
++get_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_get(&mdp->di_ref);
++}
++
++static inline void
++put_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_put(&dsp->ds_ref, release_ds_stateid);
++}
++
++static inline void
++get_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_get(&dsp->ds_ref);
++}
++
++void
++nfs4_pnfs_state_shutdown(void)
++{
++	struct pnfs_ds_stateid *dsp;
++	int i;
++
++	dprintk("pNFSD %s: -->\n", __func__);
++
++	ds_lock_state();
++	for (i = 0; i < STATEID_HASH_SIZE; i++) {
++		while (!list_empty(&ds_stid_hashtbl[i])) {
++			dsp = list_entry(ds_stid_hashtbl[i].next,
++					 struct pnfs_ds_stateid, ds_hash);
++			put_ds_stateid(dsp);
++		}
++	}
++	ds_unlock_state();
++}
++
++static struct pnfs_mds_id *
++alloc_init_mds_id(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = kmalloc(sizeof(*mdp), GFP_KERNEL);
++	if (!mdp)
++		return NULL;
++	INIT_LIST_HEAD(&mdp->di_hash);
++	INIT_LIST_HEAD(&mdp->di_mdsclid);
++	list_add(&mdp->di_hash, &mds_id_tbl);
++	mdp->di_mdsid = gsp->dsid;
++	mdp->di_mdsboot = 0;
++	kref_init(&mdp->di_ref);
++	return mdp;
++}
++
++static struct pnfs_ds_clientid *
++alloc_init_ds_clientid(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++	struct pnfs_ds_clientid *dcp;
++	clientid_t *clid = (clientid_t *)&gsp->clid;
++	unsigned int hashval = clientid_hashval(clid->cl_id);
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(gsp->dsid);
++	if (!mdp) {
++		mdp = alloc_init_mds_id(gsp);
++		if (!mdp)
++			return NULL;
++	} else {
++		get_ds_mdsid(mdp);
++	}
++
++	dcp = kmalloc(sizeof(*dcp), GFP_KERNEL);
++	if (!dcp)
++		return NULL;
++
++	INIT_LIST_HEAD(&dcp->dc_hash);
++	INIT_LIST_HEAD(&dcp->dc_stateid);
++	INIT_LIST_HEAD(&dcp->dc_permdsid);
++	list_add(&dcp->dc_hash, &mds_clid_hashtbl[hashval]);
++	list_add(&dcp->dc_permdsid, &mdp->di_mdsclid);
++	dcp->dc_mdsclid = *clid;
++	kref_init(&dcp->dc_ref);
++	dcp->dc_mdsid = gsp->dsid;
++	return dcp;
++}
++
++static struct pnfs_ds_stateid *
++alloc_init_ds_stateid(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct pnfs_ds_stateid *dsp;
++	u32 st_id = stidp->si_stateownerid;
++	u32 f_id  = stidp->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	dsp = kmalloc(sizeof(*dsp), GFP_KERNEL);
++	if (!dsp)
++		return dsp;
++
++	INIT_LIST_HEAD(&dsp->ds_hash);
++	INIT_LIST_HEAD(&dsp->ds_perclid);
++	memcpy(&dsp->ds_stid, stidp, sizeof(stateid_t));
++	fh_copy_shallow(&dsp->ds_fh, &cfh->fh_handle);
++	dsp->ds_access = 0;
++	dsp->ds_status = 0;
++	dsp->ds_flags = 0L;
++	kref_init(&dsp->ds_ref);
++	set_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	clear_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	init_waitqueue_head(&dsp->ds_waitq);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_add(&dsp->ds_hash, &ds_stid_hashtbl[hashval]);
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++static int
++update_ds_stateid(struct pnfs_ds_stateid *dsp, struct svc_fh *cfh,
++		  struct pnfs_get_state *gsp)
++{
++	struct pnfs_ds_clientid *dcp;
++	int new = 0;
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid((clientid_t *)&gsp->clid);
++	if (!dcp) {
++		dcp = alloc_init_ds_clientid(gsp);
++		if (!dcp)
++			return 1;
++		new = 1;
++	}
++	if (test_bit(DS_STATEID_NEW, &dsp->ds_flags)) {
++		list_add(&dsp->ds_perclid, &dcp->dc_stateid);
++		if (!new)
++			get_ds_clientid(dcp);
++	}
++
++	memcpy(&dsp->ds_stid, &gsp->stid, sizeof(stateid_t));
++	dsp->ds_access = gsp->access;
++	dsp->ds_status = 0;
++	dsp->ds_verifier[0] = gsp->verifier[0];
++	dsp->ds_verifier[1] = gsp->verifier[1];
++	memcpy(&dsp->ds_mdsclid, &gsp->clid, sizeof(clientid_t));
++	set_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	clear_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	return 0;
++}
++
++int
++nfs4_pnfs_cb_change_state(struct pnfs_get_state *gs)
++{
++	stateid_t *stid = (stateid_t *)&gs->stid;
++	struct pnfs_ds_stateid *dsp;
++
++	dprintk("pNFSD: %s stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stid));
++
++	ds_lock_state();
++	dsp = find_pnfs_ds_stateid(stid);
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	if (dsp)
++		return 0;
++	return -ENOENT;
++}
++
++/* Retrieves and validates stateid.
++ * If stateid exists and its fields match, return it.
++ * If stateid exists but either the generation or
++ * ownerids don't match, check with mds to see if it is valid.
++ * If the stateid doesn't exist, the first thread creates a
++ * invalid *marker* stateid, then checks to see if the
++ * stateid exists on the mds.  If so, it validates the *marker*
++ * stateid and updates its fields.  Subsequent threads that
++ * find the *marker* stateid wait until it is valid or an error
++ * occurs.
++ * Called with ds_state_lock.
++ */
++static struct pnfs_ds_stateid *
++nfsv4_ds_get_state(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct inode *ino = cfh->fh_dentry->d_inode;
++	struct super_block *sb;
++	struct pnfs_ds_stateid *dsp = NULL;
++	struct pnfs_get_state gs = {
++		.access = 0,
++	};
++	int status = 0, waiter = 0;
++
++	dprintk("pNFSD: %s -->\n", __func__);
++
++	dsp = find_pnfs_ds_stateid(stidp);
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags) &&
++	    (stidp->si_generation == dsp->ds_stid.si_generation))
++		goto out_noput;
++
++	sb = ino->i_sb;
++	if (!sb || !sb->s_pnfs_op->get_state)
++		goto out_noput;
++
++	/* Uninitialize current state if it exists yet it doesn't match.
++	 * If it is already invalid, another thread is checking state */
++	if (dsp) {
++		if (!test_and_clear_bit(DS_STATEID_VALID, &dsp->ds_flags))
++			waiter = 1;
++	} else {
++		dsp = alloc_init_ds_stateid(cfh, stidp);
++		if (!dsp)
++			goto out_noput;
++	}
++
++	dprintk("pNFSD: %s Starting loop\n", __func__);
++	get_ds_stateid(dsp);
++	while (!test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		ds_unlock_state();
++
++		/* Another thread is checking the state */
++		if (waiter) {
++			dprintk("pNFSD: %s waiting\n", __func__);
++			wait_event_interruptible_timeout(dsp->ds_waitq,
++				(test_bit(DS_STATEID_VALID, &dsp->ds_flags) ||
++				 test_bit(DS_STATEID_ERROR, &dsp->ds_flags)),
++				 msecs_to_jiffies(1024));
++			dprintk("pNFSD: %s awake\n", __func__);
++			ds_lock_state();
++			if (test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++				goto out;
++
++			continue;
++		}
++
++		/* Validate stateid on mds */
++		dprintk("pNFSD: %s Checking state on MDS\n", __func__);
++		memcpy(&gs.stid, stidp, sizeof(stateid_t));
++		status = sb->s_pnfs_op->get_state(ino, &cfh->fh_handle, &gs);
++		dprintk("pNFSD: %s from MDS status %d\n", __func__, status);
++		ds_lock_state();
++		/* if !status and stateid is valid, update id and mark valid */
++		if (status || update_ds_stateid(dsp, cfh, &gs)) {
++			set_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++			/* remove invalid stateid from list */
++			put_ds_stateid(dsp);
++			wake_up(&dsp->ds_waitq);
++			goto out;
++		}
++
++		wake_up(&dsp->ds_waitq);
++	}
++out:
++	if (dsp)
++		put_ds_stateid(dsp);
++out_noput:
++	if (dsp)
++		dprintk("pNFSD: %s <-- dsp %p ds_flags %lx " STATEID_FMT "\n",
++			__func__, dsp, dsp->ds_flags, STATEID_VAL(&dsp->ds_stid));
++	/* If error, return null */
++	if (dsp && test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++		dsp = NULL;
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++int
++nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *cfh, stateid_t *stateid)
++{
++	struct pnfs_ds_stateid *dsp;
++	int status = 0;
++
++	dprintk("pNFSD: %s --> " STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	/* Must release state lock while verifying stateid on mds */
++	nfs4_unlock_state();
++	ds_lock_state();
++	dsp = nfsv4_ds_get_state(cfh, stateid);
++	if (dsp) {
++		get_ds_stateid(dsp);
++		dprintk("pNFSD: %s Found " STATEID_FMT "\n", __func__,
++			STATEID_VAL(&dsp->ds_stid));
++
++		dprintk("NFSD: %s: dsp %p fh_size %u:%u "
++			"fh [%08x:%08x:%08x:%08x]:[%08x:%08x:%08x:%08x] "
++			"gen %x:%x\n",
++			__func__, dsp,
++			cfh->fh_handle.fh_size, dsp->ds_fh.fh_size,
++			((unsigned *)&cfh->fh_handle.fh_base)[0],
++			((unsigned *)&cfh->fh_handle.fh_base)[1],
++			((unsigned *)&cfh->fh_handle.fh_base)[2],
++			((unsigned *)&cfh->fh_handle.fh_base)[3],
++			((unsigned *)&dsp->ds_fh.fh_base)[0],
++			((unsigned *)&dsp->ds_fh.fh_base)[1],
++			((unsigned *)&dsp->ds_fh.fh_base)[2],
++			((unsigned *)&dsp->ds_fh.fh_base)[3],
++			stateid->si_generation, dsp->ds_stid.si_generation);
++	}
++
++	if (!dsp ||
++	    (cfh->fh_handle.fh_size != dsp->ds_fh.fh_size) ||
++	    (memcmp(&cfh->fh_handle.fh_base, &dsp->ds_fh.fh_base,
++		    dsp->ds_fh.fh_size) != 0) ||
++	    (stateid->si_generation > dsp->ds_stid.si_generation))
++		status = nfserr_bad_stateid;
++	else if (stateid->si_generation < dsp->ds_stid.si_generation)
++		status = nfserr_old_stateid;
++
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++	nfs4_lock_state();
++	dprintk("pNFSD: %s <-- status %d\n", __func__, be32_to_cpu(status));
++	return status;
++}
++
++void
++nfs4_ds_get_verifier(stateid_t *stateid, struct super_block *sb, u32 *p)
++{
++	struct pnfs_ds_stateid *dsp = NULL;
++
++	dprintk("pNFSD: %s --> stid %p\n", __func__, stateid);
++
++	ds_lock_state();
++	if (stateid != NULL) {
++		dsp = find_pnfs_ds_stateid(stateid);
++		if (dsp)
++			get_ds_stateid(dsp);
++	}
++
++	/* XXX: Should we fetch the stateid or wait if some other
++	 * thread is currently retrieving the stateid ? */
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		*p++ = dsp->ds_verifier[0];
++		*p++ = dsp->ds_verifier[1];
++		put_ds_stateid(dsp);
++	} else {
++		/* must be on MDS */
++		ds_unlock_state();
++		sb->s_pnfs_op->get_verifier(sb, p);
++		ds_lock_state();
++		p += 2;
++	}
++	ds_unlock_state();
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
+@@ -34,10 +34,14 @@
+  */
+ #include <linux/file.h>
+ #include <linux/slab.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "cache.h"
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_PROC
+ 
+@@ -372,6 +376,24 @@ nfsd4_open(struct svc_rqst *rqstp, struc
+ 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ 	 */
+ 	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
++#if defined(CONFIG_SPNFS)
++	if (!status && spnfs_enabled()) {
++		struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
++
++		status = spnfs_open(inode, open);
++		if (status) {
++			dprintk(
++			     "nfsd: pNFS could not be enabled for inode: %lu\n",
++			     inode->i_ino);
++			/*
++			 * XXX When there's a failure then need to indicate to
++			 * future ops that no pNFS is available.  Should I save
++			 * the status in the inode?  It's kind of a big hammer.
++			 * But there may be no stripes available?
++			 */
++		}
++	}
++#endif /* CONFIG_SPNFS */
+ out:
+ 	if (open->op_stateowner) {
+ 		nfs4_get_stateowner(open->op_stateowner);
+@@ -454,16 +476,30 @@ nfsd4_access(struct svc_rqst *rqstp, str
+ 			   &access->ac_supported);
+ }
+ 
++static void
++nfsd4_get_verifier(struct super_block *sb, nfs4_verifier *verf)
++{
++	u32 *p = (u32 *)verf->data;
++
++#if defined(CONFIG_PNFSD)
++	if (sb->s_pnfs_op && sb->s_pnfs_op->get_verifier) {
++		nfs4_ds_get_verifier(NULL, sb, p);
++		return;
++	}
++#endif /* CONFIG_PNFSD */
++
++	*p++ = nfssvc_boot.tv_sec;
++	*p++ = nfssvc_boot.tv_usec;
++}
++
+ static __be32
+ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	     struct nfsd4_commit *commit)
+ {
+ 	__be32 status;
+ 
+-	u32 *p = (u32 *)commit->co_verf.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+-
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &commit->co_verf);
+ 	status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+ 			     commit->co_count);
+ 	if (status == nfserr_symlink)
+@@ -816,7 +852,6 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ {
+ 	stateid_t *stateid = &write->wr_stateid;
+ 	struct file *filp = NULL;
+-	u32 *p;
+ 	__be32 status = nfs_ok;
+ 	unsigned long cnt;
+ 
+@@ -838,13 +873,49 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ 
+ 	cnt = write->wr_buflen;
+ 	write->wr_how_written = write->wr_stable_how;
+-	p = (u32 *)write->wr_verifier.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+ 
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &write->wr_verifier);
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(cstate->current_fh.fh_dentry->d_inode, 0)) {
++                status = bl_layoutrecall(cstate->current_fh.fh_dentry->d_inode,
++		    RETURN_FILE, write->wr_offset, write->wr_buflen);
++                if (!status) {
++                        status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++                }
++        } else
++#endif
++		
++	if (spnfs_enabled()) {
++		status = spnfs_write(cstate->current_fh.fh_dentry->d_inode,
++			write->wr_offset, write->wr_buflen, write->wr_vlen,
++			rqstp);
++		if (status == nfs_ok) {
++			/* DMXXX: HACK to get filesize set */
++			/* write one byte at offset+length-1 */
++			struct kvec k[1];
++			char zero = 0;
++			unsigned long cnt = 1;
++
++			k[0].iov_base = (void *)&zero;
++			k[0].iov_len = 1;
++			nfsd_write(rqstp, &cstate->current_fh, filp,
++				   write->wr_offset+write->wr_buflen-1, k, 1,
++				   &cnt, &write->wr_how_written);
++		}
++	} else /* we're not an MDS */
++		status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++#else
+ 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
+ 			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+ 			     &cnt, &write->wr_how_written);
++#endif /* CONFIG_SPNFS */
++
+ 	if (filp)
+ 		fput(filp);
+ 
+@@ -935,6 +1006,306 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+ 	return status == nfserr_same ? nfs_ok : status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++static __be32
++nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
++		    unsigned int layout_type)
++{
++	int status, type;
++
++	/* check to see if pNFS  is supported. */
++	status = nfserr_layoutunavailable;
++	if (exp && exp->ex_pnfs == 0) {
++		dprintk("%s: Underlying file system "
++			"is not exported over pNFS\n", __func__);
++		goto out;
++	}
++	if (!sb->s_pnfs_op || !sb->s_pnfs_op->layout_type) {
++		dprintk("%s: Underlying file system "
++			"does not support pNFS\n", __func__);
++		goto out;
++	}
++
++	type = sb->s_pnfs_op->layout_type(sb);
++
++	/* check to see if requested layout type is supported. */
++	status = nfserr_unknown_layouttype;
++	if (!type)
++		dprintk("BUG: %s: layout_type 0 is reserved and must not be "
++			"used by filesystem\n", __func__);
++	else if (type != layout_type)
++		dprintk("%s: requested layout type %d "
++		       "does not match supported type %d\n",
++			__func__, layout_type, type);
++	else
++		status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevlist(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevlist *gdlp)
++{
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++	int status;
++
++	dprintk("%s: type %u maxdevices %u cookie %llu verf %llu\n",
++		__func__, gdlp->gd_layout_type, gdlp->gd_maxdevices,
++		gdlp->gd_cookie, gdlp->gd_verf);
++
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* We must be able to encode at list one device */
++	if (!gdlp->gd_maxdevices)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     gdlp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Do nothing if underlying file system does not support
++	 * getdevicelist */
++	if (!sb->s_pnfs_op->get_device_iter) {
++		status = nfserr_notsupp;
++		goto out;
++	}
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdlp->gd_fhp = &cstate->current_fh;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevinfo(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevinfo *gdp)
++{
++	struct super_block *sb;
++	int status;
++	clientid_t clid;
++
++	dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
++	       __func__, gdp->gd_layout_type, gdp->gd_devid.sbid,
++	       gdp->gd_devid.devid, gdp->gd_maxcount);
++
++	status = nfserr_inval;
++	sb = find_sbid_id(gdp->gd_devid.sbid);
++	dprintk("%s: sb %p\n", __func__, sb);
++	if (!sb) {
++		status = nfserr_noent;
++		goto out;
++	}
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, NULL, gdp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdp->gd_sb = sb;
++
++	/* Update notifications */
++	copy_clientid(&clid, cstate->session);
++	pnfs_set_device_notify(&clid, gdp->gd_notify_types);
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutget(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutget *lgp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lgp->lg_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_badiomode;
++	if (lgp->lg_seg.iomode != IOMODE_READ &&
++	    lgp->lg_seg.iomode != IOMODE_RW) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lgp->lg_seg.iomode);
++		goto out;
++	}
++
++	/* Set up arguments so layout can be retrieved at encode time */
++	lgp->lg_fhp = current_fh;
++	copy_clientid((clientid_t *)&lgp->lg_seg.clientid, cstate->session);
++	status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutcommit(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	int status;
++	struct inode *ino = NULL;
++	struct iattr ia;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	dprintk("NFSD: nfsd4_layoutcommit \n");
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	ino = current_fh->fh_dentry->d_inode;
++	if (!ino)
++		goto out;
++
++	status = nfserr_inval;
++	sb = ino->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lcp->args.lc_seg.layout_type);
++	if (status)
++		goto out;
++
++	/* This will only extend the file length.  Do a quick
++	 * check to see if there is any point in waiting for the update
++	 * locks.
++	 * TODO: Is this correct for all back ends?
++	 */
++	dprintk("%s:new offset: %d new size: %llu old size: %lld\n",
++		__func__, lcp->args.lc_newoffset, lcp->args.lc_last_wr + 1,
++		ino->i_size);
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lcp->args.lc_seg.clientid, cstate->session);
++	lcp->res.lc_size_chg = 0;
++	if (sb->s_pnfs_op->layout_commit) {
++		status = sb->s_pnfs_op->layout_commit(ino, &lcp->args, &lcp->res);
++		dprintk("%s:layout_commit result %d\n", __func__, status);
++	} else {
++		fh_lock(current_fh);
++		if ((lcp->args.lc_newoffset == 0) ||
++		    ((lcp->args.lc_last_wr + 1) <= ino->i_size)) {
++			status = 0;
++			lcp->res.lc_size_chg = 0;
++			fh_unlock(current_fh);
++			goto out;
++		}
++
++		/* Try our best to update the file size */
++		dprintk("%s: Modifying file size\n", __func__);
++		ia.ia_valid = ATTR_SIZE;
++		ia.ia_size = lcp->args.lc_last_wr + 1;
++		status = notify_change(current_fh->fh_dentry, &ia);
++		fh_unlock(current_fh);
++		dprintk("%s:notify_change result %d\n", __func__, status);
++	}
++
++	if (!status && lcp->res.lc_size_chg &&
++	    EX_ISSYNC(current_fh->fh_export)) {
++		dprintk("%s: Synchronously writing inode size %llu\n",
++			__func__, ino->i_size);
++		write_inode_now(ino, 1);
++		lcp->res.lc_newsize = i_size_read(ino);
++	}
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutreturn(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lrp->args.lr_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	if (lrp->args.lr_return_type != RETURN_FILE &&
++	    lrp->args.lr_return_type != RETURN_FSID &&
++	    lrp->args.lr_return_type != RETURN_ALL) {
++		dprintk("pNFS %s: invalid return_type %d\n", __func__,
++			lrp->args.lr_return_type);
++		goto out;
++	}
++
++	status = nfserr_inval;
++	if (lrp->args.lr_seg.iomode != IOMODE_READ &&
++	    lrp->args.lr_seg.iomode != IOMODE_RW &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lrp->args.lr_seg.iomode);
++		goto out;
++	}
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lrp->args.lr_seg.clientid, cstate->session);
++	lrp->lrs_present = (lrp->args.lr_return_type == RETURN_FILE);
++	status = nfs4_pnfs_return_layout(sb, current_fh, lrp);
++out:
++	dprintk("pNFS %s: status %d return_type 0x%x lrs_present %d\n",
++		__func__, status, lrp->args.lr_return_type, lrp->lrs_present);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * NULL call.
+  */
+@@ -1317,6 +1688,29 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH,
+ 		.op_name = "OP_RECLAIM_COMPLETE",
+ 	},
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICELIST] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevlist,
++		.op_name = "OP_GETDEVICELIST",
++	},
++	[OP_GETDEVICEINFO] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevinfo,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_GETDEVICEINFO",
++	},
++	[OP_LAYOUTGET] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutget,
++		.op_name = "OP_LAYOUTGET",
++	},
++	[OP_LAYOUTCOMMIT] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
++		.op_name = "OP_LAYOUTCOMMIT",
++	},
++	[OP_LAYOUTRETURN] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
++		.op_name = "OP_LAYOUTRETURN",
++	},
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
+@@ -42,6 +42,8 @@
+ #include "xdr4.h"
+ #include "vfs.h"
+ 
++#include "pnfsd.h"
++
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+@@ -60,8 +62,6 @@ static u64 current_sessionid = 1;
+ #define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+ 
+ /* forward declarations */
+-static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+-static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+ static void nfs4_set_recdir(char *recdir);
+ 
+@@ -69,6 +69,7 @@ static void nfs4_set_recdir(char *recdir
+ 
+ /* Currently used for almost all code touching nfsv4 state: */
+ static DEFINE_MUTEX(client_mutex);
++struct task_struct *client_mutex_owner;
+ 
+ /*
+  * Currently used for the del_recall_lru and file hash table.  In an
+@@ -86,11 +87,21 @@ void
+ nfs4_lock_state(void)
+ {
+ 	mutex_lock(&client_mutex);
++	client_mutex_owner = current;
++}
++
++#define BUG_ON_UNLOCKED_STATE() BUG_ON(client_mutex_owner != current)
++
++void
++nfs4_bug_on_unlocked_state(void)
++{
++	BUG_ON(client_mutex_owner != current);
+ }
+ 
+ void
+ nfs4_unlock_state(void)
+ {
++	client_mutex_owner = NULL;
+ 	mutex_unlock(&client_mutex);
+ }
+ 
+@@ -109,7 +120,7 @@ opaque_hashval(const void *ptr, int nbyt
+ 
+ static struct list_head del_recall_lru;
+ 
+-static inline void
++inline void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+ 	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+@@ -120,7 +131,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ 	}
+ }
+ 
+-static inline void
++inline void
+ get_nfs4_file(struct nfs4_file *fi)
+ {
+ 	atomic_inc(&fi->fi_ref);
+@@ -230,7 +241,10 @@ nfs4_close_delegation(struct nfs4_delega
+ 	 * but we want to remove the lease in any case. */
+ 	if (dp->dl_flock)
+ 		vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(filp);
++	nfs4_lock_state();
+ }
+ 
+ /* Called under the state lock. */
+@@ -266,8 +280,8 @@ static DEFINE_SPINLOCK(client_lock);
+  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+  * used in reboot/reset lease grace period processing
+  *
+- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+- * setclientid_confirmed info. 
++ * conf_id_hashtbl[], and conf_str_hashtbl[] hold
++ * confirmed setclientid_confirmed info.
+  *
+  * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+  * setclientid info.
+@@ -292,6 +306,7 @@ static void unhash_generic_stateid(struc
+ 	list_del(&stp->st_hash);
+ 	list_del(&stp->st_perfile);
+ 	list_del(&stp->st_perstateowner);
++	release_pnfs_ds_dev_list(stp);
+ }
+ 
+ static void free_generic_stateid(struct nfs4_stateid *stp)
+@@ -345,7 +360,10 @@ static void release_open_stateid(struct 
+ {
+ 	unhash_generic_stateid(stp);
+ 	release_stateid_lockowners(stp);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(stp->st_vfs_file);
++	nfs4_lock_state();
+ 	free_generic_stateid(stp);
+ }
+ 
+@@ -739,6 +757,8 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
++	BUG_ON_UNLOCKED_STATE();
++
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -758,6 +778,7 @@ expire_client(struct nfs4_client *clp)
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
++	pnfs_expire_client(clp);
+ 	nfsd4_set_callback_client(clp, NULL);
+ 	if (clp->cl_cb_conn.cb_xprt)
+ 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+@@ -770,6 +791,13 @@ expire_client(struct nfs4_client *clp)
+ 	spin_unlock(&client_lock);
+ }
+ 
++void expire_client_lock(struct nfs4_client *clp)
++{
++	nfs4_lock_state();
++	expire_client(clp);
++	nfs4_unlock_state();
++}
++
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+ {
+ 	memcpy(target->cl_verifier.data, source->data,
+@@ -859,6 +887,11 @@ static struct nfs4_client *create_client
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++	INIT_LIST_HEAD(&clp->cl_layoutrecalls);
++	atomic_set(&clp->cl_deviceref, 0);
++#endif /* CONFIG_PNFSD */
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
+ 	clp->cl_time = get_seconds();
+@@ -908,7 +941,7 @@ move_to_confirmed(struct nfs4_client *cl
+ 	renew_client(clp);
+ }
+ 
+-static struct nfs4_client *
++struct nfs4_client *
+ find_confirmed_client(clientid_t *clid)
+ {
+ 	struct nfs4_client *clp;
+@@ -978,6 +1011,24 @@ find_unconfirmed_client_by_str(const cha
+ 	return NULL;
+ }
+ 
++int
++filter_confirmed_clients(int (* func)(struct nfs4_client *, void *),
++			 void *arg)
++{
++	struct nfs4_client *clp, *next;
++	int i, status = 0;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		list_for_each_entry_safe (clp, next, &conf_str_hashtbl[i],
++					  cl_strhash) {
++			status = func(clp, arg);
++			if (status)
++				break;
++		}
++
++	return status;
++}
++
+ static void
+ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
+ {
+@@ -1110,8 +1161,12 @@ nfsd4_replay_cache_entry(struct nfsd4_co
+ static void
+ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
+ {
+-	/* pNFS is not supported */
++#if defined(CONFIG_PNFSD)
++	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
++				  EXCHGID4_FLAG_USE_PNFS_DS;
++#else  /* CONFIG_PNFSD */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
++#endif /* CONFIG_PNFSD */
+ 
+ 	/* Referrals are supported, Migration is not. */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
+@@ -1301,6 +1356,13 @@ nfsd4_create_session(struct svc_rqst *rq
+ 	struct nfsd4_clid_slot *cs_slot = NULL;
+ 	int status = 0;
+ 
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	/* XXX hack to get local ip address */
++	memcpy(&pnfsd_lexp_addr, &rqstp->rq_xprt->xpt_local,
++		sizeof(pnfsd_lexp_addr));
++	pnfs_lexp_addr_len = rqstp->rq_xprt->xpt_locallen;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
+ 	nfs4_lock_state();
+ 	unconf = find_unconfirmed_client(&cr_ses->clientid);
+ 	conf = find_confirmed_client(&cr_ses->clientid);
+@@ -1340,25 +1402,26 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(rqstp->rq_xprt);
+-			rpc_copy_addr(
+-				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+-				sa);
+-			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+-			unconf->cl_cb_conn.cb_minorversion =
+-				cstate->minorversion;
+-			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+-			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+-		}
++		if (is_ds_only_session(unconf->cl_exchange_flags))
++			cr_ses->flags &= ~SESSION4_BACK_CHAN;
++
+ 		conf = unconf;
+ 	} else {
+ 		status = nfserr_stale_clientid;
+ 		goto out;
+ 	}
+ 
++	if (cr_ses->flags & SESSION4_BACK_CHAN) {
++		conf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++		svc_xprt_get(rqstp->rq_xprt);
++		rpc_copy_addr((struct sockaddr *)&conf->cl_cb_conn.cb_addr, sa);
++		conf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
++		conf->cl_cb_conn.cb_minorversion = cstate->minorversion;
++		conf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
++		conf->cl_cb_seq_nr = 1;
++		nfsd4_probe_callback(conf, &conf->cl_cb_conn);
++	}
++
+ 	/*
+ 	 * We do not support RDMA or persistent sessions
+ 	 */
+@@ -1746,7 +1809,7 @@ out:
+ 
+ /* OPEN Share state helper functions */
+ static inline struct nfs4_file *
+-alloc_init_file(struct inode *ino)
++alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
+ {
+ 	struct nfs4_file *fp;
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1760,6 +1823,16 @@ alloc_init_file(struct inode *ino)
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++#if defined(CONFIG_PNFSD)
++		INIT_LIST_HEAD(&fp->fi_layouts);
++		INIT_LIST_HEAD(&fp->fi_layout_states);
++		fp->fi_fsid.major = current_fh->fh_export->ex_fsid;
++		fp->fi_fsid.minor = 0;
++		fp->fi_fhlen = current_fh->fh_handle.fh_size;
++		BUG_ON(fp->fi_fhlen > sizeof(fp->fi_fhval));
++		memcpy(fp->fi_fhval, &current_fh->fh_handle.fh_base,
++		       fp->fi_fhlen);
++#endif /* CONFIG_PNFSD */
+ 		spin_lock(&recall_lock);
+ 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ 		spin_unlock(&recall_lock);
+@@ -1768,7 +1841,7 @@ alloc_init_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
+-static void
++void
+ nfsd4_free_slab(struct kmem_cache **slab)
+ {
+ 	if (*slab == NULL)
+@@ -1784,6 +1857,7 @@ nfsd4_free_slabs(void)
+ 	nfsd4_free_slab(&file_slab);
+ 	nfsd4_free_slab(&stateid_slab);
+ 	nfsd4_free_slab(&deleg_slab);
++	nfsd4_free_pnfs_slabs();
+ }
+ 
+ static int
+@@ -1805,6 +1879,8 @@ nfsd4_init_slabs(void)
+ 			sizeof(struct nfs4_delegation), 0, 0, NULL);
+ 	if (deleg_slab == NULL)
+ 		goto out_nomem;
++	if (nfsd4_init_pnfs_slabs())
++		goto out_nomem;
+ 	return 0;
+ out_nomem:
+ 	nfsd4_free_slabs();
+@@ -1878,6 +1954,9 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners);
+ 	INIT_LIST_HEAD(&stp->st_perfile);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+@@ -1919,6 +1998,7 @@ find_openstateowner_str(unsigned int has
+ {
+ 	struct nfs4_stateowner *so = NULL;
+ 
++	BUG_ON_UNLOCKED_STATE();
+ 	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ 		if (same_owner_str(so, &open->op_owner, &open->op_clientid))
+ 			return so;
+@@ -1927,7 +2007,7 @@ find_openstateowner_str(unsigned int has
+ }
+ 
+ /* search file_hashtbl[] for file */
+-static struct nfs4_file *
++struct nfs4_file *
+ find_file(struct inode *ino)
+ {
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1945,6 +2025,18 @@ find_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
++struct nfs4_file *
++find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
++{
++	struct nfs4_file *fp;
++
++	fp = find_file(ino);
++	if (fp)
++		return fp;
++
++	return alloc_init_file(ino, current_fh);
++}
++
+ static inline int access_valid(u32 x, u32 minorversion)
+ {
+ 	if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
+@@ -2503,7 +2595,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+ 			goto out;
+ 		status = nfserr_resource;
+-		fp = alloc_init_file(ino);
++		fp = alloc_init_file(ino, current_fh);
+ 		if (fp == NULL)
+ 			goto out;
+ 	}
+@@ -2730,7 +2822,7 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ 	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
+ }
+ 
+-static int
++int
+ STALE_STATEID(stateid_t *stateid)
+ {
+ 	if (stateid->si_boot == boot_time)
+@@ -2740,6 +2832,16 @@ STALE_STATEID(stateid_t *stateid)
+ 	return 1;
+ }
+ 
++__be32
++nfs4_check_stateid(stateid_t *stateid)
++{
++	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++		return nfserr_bad_stateid;
++	if (STALE_STATEID(stateid))
++		return nfserr_stale_stateid;
++	return 0;
++}
++
+ static inline int
+ access_permit_read(unsigned long access_bmap)
+ {
+@@ -2848,6 +2950,24 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	if (grace_disallows_io(ino))
+ 		return nfserr_grace;
+ 
++#if defined(CONFIG_PNFSD)
++	if (pnfs_fh_is_ds(&current_fh->fh_handle)) {
++		if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++			status = nfserr_bad_stateid;
++		else
++#ifdef CONFIG_GFS2_FS_LOCKING_DLM
++		{
++			dprintk("%s Don't check DS stateid\n", __func__);
++			return 0;
++		}
++#else /* CONFIG_GFS2_FS_LOCKING_DLM */
++			status = nfs4_preprocess_pnfs_ds_stateid(current_fh,
++								 stateid);
++#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
++		goto out;
++	}
++#endif /* CONFIG_PNFSD */
++
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 
+@@ -2924,13 +3044,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 	*stpp = NULL;
+ 	*sopp = NULL;
+ 
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+-		dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
+-		return nfserr_bad_stateid;
+-	}
+-
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		return status;
+ 
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+@@ -3205,11 +3321,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 	nfs4_lock_state();
+-	status = nfserr_bad_stateid;
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+-		goto out;
+-	status = nfserr_stale_stateid;
+-	if (STALE_STATEID(stateid))
++	status = nfs4_check_stateid(stateid);
++	if (status)
+ 		goto out;
+ 	status = nfserr_bad_stateid;
+ 	if (!is_delegation_stateid(stateid))
+@@ -3238,26 +3351,6 @@ out:
+ #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
+ #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+ 
+-static inline u64
+-end_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	end = start + len;
+-	return end >= start ? end: NFS4_MAX_UINT64;
+-}
+-
+-/* last octet in a range */
+-static inline u64
+-last_byte_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	BUG_ON(!len);
+-	end = start + len;
+-	return end > start ? end - 1: NFS4_MAX_UINT64;
+-}
+-
+ #define lockownerid_hashval(id) \
+         ((id) & LOCK_HASH_MASK)
+ 
+@@ -3274,7 +3367,7 @@ static struct list_head lock_ownerid_has
+ static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+ 
+-static struct nfs4_stateid *
++struct nfs4_stateid *
+ find_stateid(stateid_t *stid, int flags)
+ {
+ 	struct nfs4_stateid *local;
+@@ -3303,7 +3396,7 @@ find_stateid(stateid_t *stid, int flags)
+ 	return NULL;
+ }
+ 
+-static struct nfs4_delegation *
++struct nfs4_delegation *
+ find_delegation_stateid(struct inode *ino, stateid_t *stid)
+ {
+ 	struct nfs4_file *fp;
+@@ -3436,6 +3529,9 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	INIT_LIST_HEAD(&stp->st_perfile);
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+@@ -3998,6 +4094,9 @@ nfs4_state_init(void)
+ 	INIT_LIST_HEAD(&client_lru);
+ 	INIT_LIST_HEAD(&del_recall_lru);
+ 	reclaim_str_hashtbl_size = 0;
++#if defined(CONFIG_PNFSD)
++	nfs4_pnfs_state_init();
++#endif /* CONFIG_PNFSD */
+ 	return 0;
+ }
+ 
+@@ -4110,6 +4209,7 @@ __nfs4_state_shutdown(void)
+ 	}
+ 
+ 	nfsd4_shutdown_recdir();
++	nfs4_pnfs_state_shutdown();
+ 	nfs4_init = 0;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
+@@ -47,9 +47,14 @@
+ #include <linux/nfsd_idmap.h>
+ #include <linux/nfs4_acl.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -1234,6 +1239,138 @@ nfsd4_decode_sequence(struct nfsd4_compo
+ 	DECODE_TAIL;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static __be32
++nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16 + sizeof(nfs4_verifier));
++	READ32(gdevl->gd_layout_type);
++	READ32(gdevl->gd_maxdevices);
++	READ64(gdevl->gd_cookie);
++	COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	u32 num;
++	DECODE_HEAD;
++
++	READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
++	READ64(gdev->gd_devid.sbid);
++	READ64(gdev->gd_devid.devid);
++	READ32(gdev->gd_layout_type);
++	READ32(gdev->gd_maxcount);
++	READ32(num);
++	if (num) {
++		READ_BUF(4);
++		READ32(gdev->gd_notify_types);
++	} else {
++		gdev->gd_notify_types = 0;
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_layoutget *lgp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(36);
++	READ32(lgp->lg_signal);
++	READ32(lgp->lg_seg.layout_type);
++	READ32(lgp->lg_seg.iomode);
++	READ64(lgp->lg_seg.offset);
++	READ64(lgp->lg_seg.length);
++	READ64(lgp->lg_minlength);
++	nfsd4_decode_stateid(argp, &lgp->lg_sid);
++	READ_BUF(4);
++	READ32(lgp->lg_maxcount);
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	DECODE_HEAD;
++	u32 timechange;
++
++	READ_BUF(20);
++	READ64(lcp->args.lc_seg.offset);
++	READ64(lcp->args.lc_seg.length);
++	READ32(lcp->args.lc_reclaim);
++	nfsd4_decode_stateid(argp, &lcp->lc_sid);
++	READ_BUF(4);
++	READ32(lcp->args.lc_newoffset);
++	if (lcp->args.lc_newoffset) {
++		READ_BUF(8);
++		READ64(lcp->args.lc_last_wr);
++	} else
++		lcp->args.lc_last_wr = 0;
++	READ_BUF(4);
++	READ32(timechange);
++	if (timechange) {
++		READ_BUF(12);
++		READ64(lcp->args.lc_mtime.seconds);
++		READ32(lcp->args.lc_mtime.nseconds);
++	} else {
++		lcp->args.lc_mtime.seconds = 0;
++		lcp->args.lc_mtime.nseconds = 0;
++	}
++	READ_BUF(8);
++	READ32(lcp->args.lc_seg.layout_type);
++	/* XXX: saving XDR'ed layout update. Since we don't have the
++	 * current_fh yet, and therefore no export_ops, we can't call
++	 * the layout specific decode routines. File and pVFS2
++	 * do not use the layout update....
++	 */
++	READ32(lcp->args.lc_up_len);
++	if (lcp->args.lc_up_len > 0) {
++		READ_BUF(lcp->args.lc_up_len);
++		READMEM(lcp->args.lc_up_layout, lcp->args.lc_up_len);
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16);
++	READ32(lrp->args.lr_reclaim);
++	READ32(lrp->args.lr_seg.layout_type);
++	READ32(lrp->args.lr_seg.iomode);
++	READ32(lrp->args.lr_return_type);
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		READ_BUF(16);
++		READ64(lrp->args.lr_seg.offset);
++		READ64(lrp->args.lr_seg.length);
++		nfsd4_decode_stateid(argp, &lrp->lr_sid);
++		READ_BUF(4);
++		READ32(lrp->args.lrf_body_len);
++		if (lrp->args.lrf_body_len > 0) {
++			READ_BUF(lrp->args.lrf_body_len);
++			READMEM(lrp->args.lrf_body, lrp->args.lrf_body_len);
++		}
++	}
++
++	DECODE_TAIL;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+ {
+@@ -1335,11 +1472,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+@@ -2136,6 +2281,36 @@ out_acl:
+ 		}
+ 		WRITE64(stat.ino);
+ 	}
++#if defined(CONFIG_PNFSD)
++	if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
++		struct super_block *sb = dentry->d_inode->i_sb;
++		int type = 0;
++
++		/* Query the filesystem for supported pNFS layout types.
++		 * Currently, we only support one layout type per file system.
++		 * The export_ops->layout_type() returns the pnfs_layouttype4.
++		 */
++		buflen -= 4;
++		if (buflen < 0)		/* length */
++			goto out_resource;
++
++		if (sb && sb->s_pnfs_op && sb->s_pnfs_op->layout_type)
++			type = sb->s_pnfs_op->layout_type(sb);
++		if (type) {
++			if ((buflen -= 4) < 0)	/* type */
++				goto out_resource;
++			WRITE32(1); 	/* length */
++			WRITE32(type);  /* type */
++		} else
++			WRITE32(0);  /* length */
++	}
++
++	if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		if ((buflen -= 4) < 0)
++			goto out_resource;
++		WRITE32(stat.blksize);
++	}
++#endif /* CONFIG_PNFSD */
+ 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ 		WRITE32(3);
+ 		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+@@ -2366,6 +2541,10 @@ nfsd4_encode_commit(struct nfsd4_compoun
+ 	if (!nfserr) {
+ 		RESERVE_SPACE(8);
+ 		WRITEMEM(commit->co_verf.data, 8);
++		dprintk("NFSD: nfsd4_encode_commit: verifier %x:%x\n",
++			((u32 *)(&commit->co_verf.data))[0],
++			((u32 *)(&commit->co_verf.data))[1]);
++
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -2620,9 +2799,20 @@ nfsd4_encode_read(struct nfsd4_compoundr
+ 	}
+ 	read->rd_vlen = v;
+ 
++#if defined(CONFIG_SPNFS)
++	if (spnfs_enabled())
++		nfserr = spnfs_read(read->rd_fhp->fh_dentry->d_inode,
++				    read->rd_offset, &maxcount, read->rd_vlen,
++				    resp->rqstp);
++	else /* we're not an MDS */
++		nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
++			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
++			&maxcount);
++#else
+ 	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ 			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ 			&maxcount);
++#endif /* CONFIG_SPNFS */
+ 
+ 	if (nfserr == nfserr_symlink)
+ 		nfserr = nfserr_inval;
+@@ -2926,6 +3116,9 @@ nfsd4_encode_write(struct nfsd4_compound
+ 		WRITE32(write->wr_bytes_written);
+ 		WRITE32(write->wr_how_written);
+ 		WRITEMEM(write->wr_verifier.data, 8);
++		dprintk("NFSD: nfsd4_encode_write: verifier %x:%x\n",
++			((u32 *)(&write->wr_verifier.data))[0],
++			((u32 *)(&write->wr_verifier.data))[1]);
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -3069,6 +3262,343 @@ nfsd4_encode_sequence(struct nfsd4_compo
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/* Uses the export interface to iterate through the available devices
++ * and encodes them on the response stream.
++ */
++static  __be32
++nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
++			      struct nfsd4_pnfs_getdevlist *gdevl,
++			      unsigned int *dev_count)
++{
++	struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
++	__be32 nfserr;
++	int status;
++	__be32 *p;
++	struct nfsd4_pnfs_dev_iter_res res = {
++		.gd_cookie = gdevl->gd_cookie,
++		.gd_verf = gdevl->gd_verf,
++		.gd_eof = 0
++	};
++	u64 sbid;
++
++	dprintk("%s: Begin\n", __func__);
++
++	sbid = find_create_sbid(sb);
++	*dev_count = 0;
++	do {
++		status = sb->s_pnfs_op->get_device_iter(sb,
++							gdevl->gd_layout_type,
++							&res);
++		if (status) {
++			if (status == -ENOENT) {
++				res.gd_eof = 1;
++				/* return success */
++				break;
++			}
++			nfserr = nfserrno(status);
++			goto out_err;
++		}
++
++		/* Encode device id and layout type */
++		RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
++		WRITE64((__be64)sbid);
++		WRITE64(res.gd_devid);	/* devid minor */
++		ADJUST_ARGS();
++		(*dev_count)++;
++	} while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
++	gdevl->gd_cookie = res.gd_cookie;
++	gdevl->gd_verf = res.gd_verf;
++	gdevl->gd_eof = res.gd_eof;
++	nfserr = nfs_ok;
++out_err:
++	dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
++	return nfserr;
++}
++
++/* Encodes the response of get device list.
++*/
++static __be32
++nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	unsigned int dev_count = 0, lead_count;
++	u32 *p_in = resp->p;
++	__be32 *p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	/* Ensure we have room for cookie, verifier, and devlist len,
++	 * which we will backfill in after we encode as many devices as possible
++	 */
++	lead_count = 8 + sizeof(nfs4_verifier) + 4;
++	RESERVE_SPACE(lead_count);
++	/* skip past these values */
++	p += XDR_QUADLEN(lead_count);
++	ADJUST_ARGS();
++
++	/* Iterate over as many device ids as possible on the xdr stream */
++	nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
++	if (nfserr)
++		goto out_err;
++
++	/* Backfill in cookie, verf and number of devices encoded */
++	p = p_in;
++	WRITE64(gdevl->gd_cookie);
++	WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++	WRITE32(dev_count);
++
++	/* Skip over devices */
++	p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
++	ADJUST_ARGS();
++
++	/* are we at the end of devices? */
++	RESERVE_SPACE(4);
++	WRITE32(gdevl->gd_eof);
++	ADJUST_ARGS();
++
++	dprintk("%s: done.\n", __func__);
++
++	nfserr = nfs_ok;
++out:
++	return nfserr;
++out_err:
++	p = p_in;
++	ADJUST_ARGS();
++	goto out;
++}
++
++/* For a given device id, have the file system retrieve and encode the
++ * associated device.  For file layout, the encoding function is
++ * passed down to the file system.  The file system then has the option
++ * of using this encoding function or one of its own.
++ *
++ * Note: the file system must return the XDR size of struct device_addr4
++ * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
++ * gdir_mincount calculation.
++ */
++static __be32
++nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	struct super_block *sb;
++	int maxcount = 0, type_notify_len = 12;
++	__be32 *p, *p_save = NULL, *p_in = resp->p;
++	struct exp_xdr_stream xdr;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = gdev->gd_sb;
++
++	if (gdev->gd_maxcount != 0) {
++		/* FIXME: this will be bound by the session max response */
++		maxcount = svc_max_payload(resp->rqstp);
++		if (maxcount > gdev->gd_maxcount)
++			maxcount = gdev->gd_maxcount;
++
++		/* Ensure have room for type and notify field */
++		maxcount -= type_notify_len;
++		if (maxcount < 0) {
++			nfserr = -ETOOSMALL;
++			goto toosmall;
++		}
++	}
++
++	RESERVE_SPACE(4);
++	WRITE32(gdev->gd_layout_type);
++	ADJUST_ARGS();
++
++	/* If maxcount is 0 then just update notifications */
++	if (gdev->gd_maxcount == 0)
++		goto handle_notifications;
++
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
++						&gdev->gd_devid);
++	if (nfserr)
++		goto err;
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++	BUG_ON(xdr.p > xdr.end);
++
++	/* Update the xdr stream with the number of bytes encoded
++	 * by the file system.
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++handle_notifications:
++	/* Encode supported device notifications */
++	RESERVE_SPACE(4);
++	if (sb->s_pnfs_op->set_device_notify) {
++		struct pnfs_devnotify_arg dn_args;
++
++		dn_args.dn_layout_type = gdev->gd_layout_type;
++		dn_args.dn_devid = gdev->gd_devid;
++		dn_args.dn_notify_types = gdev->gd_notify_types;
++		nfserr = sb->s_pnfs_op->set_device_notify(sb, &dn_args);
++		if (nfserr)
++			goto err;
++		WRITE32(dn_args.dn_notify_types);
++	} else {
++		WRITE32(0);
++	}
++	ADJUST_ARGS();
++
++out:
++	return nfserrno(nfserr);
++toosmall:
++	dprintk("%s: maxcount too small\n", __func__);
++	RESERVE_SPACE(4);
++	WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
++	ADJUST_ARGS();
++	goto out;
++err:
++	/* Rewind to the beginning */
++	p = p_in;
++	ADJUST_ARGS();
++	if (nfserr == -ETOOSMALL)
++		goto toosmall;
++	printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
++	goto out;
++}
++
++static __be32
++nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
++		       __be32 nfserr,
++		       struct nfsd4_pnfs_layoutget *lgp)
++{
++	int maxcount, leadcount;
++	struct super_block *sb;
++	struct exp_xdr_stream xdr;
++	__be32 *p, *p_save, *p_start = resp->p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = lgp->lg_fhp->fh_dentry->d_inode->i_sb;
++	maxcount = PAGE_SIZE;
++	if (maxcount > lgp->lg_maxcount)
++		maxcount = lgp->lg_maxcount;
++
++	/* Check for space on xdr stream */
++	leadcount = 36 + sizeof(stateid_opaque_t);
++	RESERVE_SPACE(leadcount);
++	/* encode layout metadata after file system encodes layout */
++	p += XDR_QUADLEN(leadcount);
++	ADJUST_ARGS();
++
++	/* Ensure have room for ret_on_close, off, len, iomode, type */
++	maxcount -= leadcount;
++	if (maxcount < 0) {
++		printk(KERN_ERR "%s: buffer too small\n", __func__);
++		nfserr = nfserr_toosmall;
++		goto err;
++	}
++
++	/* Set xdr info so file system can encode layout */
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	/* Retrieve, encode, and merge layout; process stateid */
++	nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
++	if (nfserr)
++		goto err;
++
++	/* Ensure file system returned enough bytes for the client
++	 * to access.
++	 */
++	if (lgp->lg_seg.length < lgp->lg_minlength) {
++		nfserr = nfserr_badlayout;
++		goto err;
++	}
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++
++	/* Rewind to beginning and encode attrs */
++	resp->p = p_start;
++	RESERVE_SPACE(4);
++	WRITE32(lgp->lg_roc);	/* return on close */
++	ADJUST_ARGS();
++	nfsd4_encode_stateid(resp, &lgp->lg_sid);
++	RESERVE_SPACE(28);
++	/* Note: response logr_layout array count, always one for now */
++	WRITE32(1);
++	WRITE64(lgp->lg_seg.offset);
++	WRITE64(lgp->lg_seg.length);
++	WRITE32(lgp->lg_seg.iomode);
++	WRITE32(lgp->lg_seg.layout_type);
++
++	/* Update the xdr stream with the number of bytes written
++	 * by the file system
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++	return nfs_ok;
++err:
++	resp->p = p_start;
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lcp->res.lc_size_chg);
++	ADJUST_ARGS();
++	if (lcp->res.lc_size_chg) {
++		RESERVE_SPACE(8);
++		WRITE64(lcp->res.lc_newsize);
++		ADJUST_ARGS();
++	}
++out:
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lrp->lrs_present != 0);    /* got stateid? */
++	ADJUST_ARGS();
++	if (lrp->lrs_present)
++		nfsd4_encode_stateid(resp, &lrp->lr_sid);
++out:
++	return nfserr;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+ {
+@@ -3129,11 +3659,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
+@@ -13,10 +13,15 @@
+ #include <linux/nfsd/syscall.h>
+ #include <linux/lockd/lockd.h>
+ #include <linux/sunrpc/clnt.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "nfsd.h"
+ #include "cache.h"
+ 
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ /*
+  *	We have a single directory with 9 nodes in it.
+  */
+@@ -49,6 +54,9 @@ enum {
+ 	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
++#ifdef CONFIG_PNFSD
++	NFSD_pnfs_dlm_device,
++#endif
+ };
+ 
+ /*
+@@ -74,6 +82,9 @@ static ssize_t write_leasetime(struct fi
+ static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
++#ifdef CONFIG_PNFSD
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size);
++#endif
+ 
+ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ 	[NFSD_Svc] = write_svc,
+@@ -96,6 +107,9 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
++#ifdef CONFIG_PNFSD
++	[NFSD_pnfs_dlm_device] = write_pnfs_dlm_device,
++#endif
+ };
+ 
+ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+@@ -1349,6 +1363,68 @@ static ssize_t write_recoverydir(struct 
+ 
+ #endif
+ 
++#ifdef CONFIG_PNFSD
++
++static ssize_t __write_pnfs_dlm_device(struct file *file, char *buf,
++				       size_t size)
++{
++	char *mesg = buf;
++	char *pnfs_dlm_device;
++	int max_size = NFSD_PNFS_DLM_DEVICE_MAX;
++	int len, ret = 0;
++
++	if (size > 0) {
++		ret = -EINVAL;
++		if (size > max_size || buf[size-1] != '\n')
++			return ret;
++		buf[size-1] = 0;
++
++		pnfs_dlm_device = mesg;
++		len = qword_get(&mesg, pnfs_dlm_device, size);
++		if (len <= 0)
++			return ret;
++
++		ret = nfsd4_set_pnfs_dlm_device(pnfs_dlm_device, len);
++	} else
++		return nfsd4_get_pnfs_dlm_device_list(buf, SIMPLE_TRANSACTION_LIMIT);
++
++	return ret <= 0 ? ret : strlen(buf);
++}
++
++/**
++ * write_pnfs_dlm_device - Set or report the current pNFS data server list
++ *
++ * Input:
++ *			buf:		ignored
++ *			size:		zero
++ *
++ * OR
++ *
++ * Input:
++ *			buf:		C string containing a block device name,
++ *					a colon, and then a comma separated
++ *					list of pNFS data server IPv4 addresses
++ *			size:		non-zero length of C string in @buf
++ * Output:
++ *	On success:	passed-in buffer filled with '\n'-terminated C
++ *			string containing a block device name, a colon, and
++ *			then a comma separated list of pNFS
++ *			data server IPv4 addresses.
++ *			return code is the size in bytes of the string
++ *	On error:	return code is a negative errno value
++ */
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __write_pnfs_dlm_device(file, buf, size);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
++}
++
++#endif /* CONFIG_PNFSD */
++
+ /*----------------------------------------------------------------------------*/
+ /*
+  *	populating the filesystem.
+@@ -1383,6 +1459,10 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
++#ifdef CONFIG_PNFSD
++		[NFSD_pnfs_dlm_device] = {"pnfs_dlm_device", &transaction_ops,
++					   S_IWUSR|S_IRUSR},
++#endif
+ 		/* last one */ {""}
+ 	};
+ 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
+@@ -1421,6 +1501,9 @@ static int create_proc_exports_entry(voi
+ }
+ #endif
+ 
++#if defined(CONFIG_SPNFS_BLOCK)
++int nfsd_bl_init(void);
++#endif
+ static int __init init_nfsd(void)
+ {
+ 	int retval;
+@@ -1443,6 +1526,15 @@ static int __init init_nfsd(void)
+ 	retval = create_proc_exports_entry();
+ 	if (retval)
+ 		goto out_free_idmap;
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	retval = spnfs_init_proc();
++	if (retval != 0)
++		goto out_free_idmap;
++#if defined(CONFIG_SPNFS_BLOCK)
++	nfsd_bl_init();
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ 	retval = register_filesystem(&nfsd_fs_type);
+ 	if (retval)
+ 		goto out_free_all;
+@@ -1465,7 +1557,22 @@ out_free_stat:
+ 
+ static void __exit exit_nfsd(void)
+ {
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	remove_proc_entry("fs/nfs/spnfs/recall", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/getfh", NULL);
++	remove_proc_entry("fs/nfs/spnfs/config", NULL);
++	remove_proc_entry("fs/nfs/spnfs/ctl", NULL);
++	remove_proc_entry("fs/nfs/spnfs", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutsegsize", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS_LAYOUTSEGMENTS */
++
+ 	nfsd_export_shutdown();
++	nfsd4_pnfs_dlm_shutdown();
+ 	nfsd_reply_cache_shutdown();
+ 	remove_proc_entry("fs/nfs/exports", NULL);
+ 	remove_proc_entry("fs/nfs", NULL);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
+@@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD0
+ 
++#if defined(CONFIG_PNFSD)
++#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
++	(NFSD4_SUPPORTED_ATTRS_WORD1 | FATTR4_WORD1_FS_LAYOUT_TYPES)
++#else /* CONFIG_PNFSD */
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD1
++#endif /* CONFIG_PNFSD */
+ 
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
++	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT | \
++	 FATTR4_WORD2_LAYOUT_BLKSIZE)
+ 
+ static inline u32 nfsd_suppattrs0(u32 minorversion)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
+@@ -10,6 +10,7 @@
+ #include <linux/exportfs.h>
+ 
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
+ #include "nfsd.h"
+ #include "vfs.h"
+ #include "auth.h"
+@@ -139,6 +140,7 @@ static inline __be32 check_pseudo_root(s
+ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ 	struct knfsd_fh	*fh = &fhp->fh_handle;
++	int fsid_type;
+ 	struct fid *fid = NULL, sfid;
+ 	struct svc_export *exp;
+ 	struct dentry *dentry;
+@@ -159,7 +161,8 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 			return error;
+ 		if (fh->fh_auth_type != 0)
+ 			return error;
+-		len = key_len(fh->fh_fsid_type) / 4;
++		fsid_type = pnfs_fh_fsid_type(fh);
++		len = key_len(fsid_type) / 4;
+ 		if (len == 0)
+ 			return error;
+ 		if  (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+@@ -172,7 +175,7 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 		data_left -= len;
+ 		if (data_left < 0)
+ 			return error;
+-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
++		exp = rqst_exp_find(rqstp, fsid_type, fh->fh_auth);
+ 		fid = (struct fid *)(fh->fh_auth + len);
+ 	} else {
+ 		__u32 tfh[2];
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
+@@ -14,6 +14,7 @@ enum nfsd_fsid {
+ 	FSID_UUID8,
+ 	FSID_UUID16,
+ 	FSID_UUID16_INUM,
++	FSID_MAX
+ };
+ 
+ enum fsid_source {
+@@ -205,4 +206,42 @@ fh_unlock(struct svc_fh *fhp)
+ 	}
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/*
++ * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
++ * to a DS by LAYOUTGET.  nfs4_preprocess_stateid_op() uses this to decide how
++ * to handle a given stateid.
++ */
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return fh->fh_fsid_type >= FSID_MAX;
++}
++
++static inline void pnfs_fh_mark_ds(struct knfsd_fh *fh)
++{
++	BUG_ON(fh->fh_version != 1);
++	BUG_ON(pnfs_fh_is_ds(fh));
++	fh->fh_fsid_type += FSID_MAX;
++}
++
++#else  /* CONFIG_PNFSD */
++
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return 0;
++}
++
++#endif /* CONFIG_PNFSD */
++
++/* allows fh_verify() to check the real fsid_type (i.e., not overloaded). */
++static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
++{
++	int fsid_type = fh->fh_fsid_type;
++
++	if (pnfs_fh_is_ds(fh))
++		return fsid_type - FSID_MAX;
++	return fsid_type;
++}
++
+ #endif /* _LINUX_NFSD_FH_INT_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
+@@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
+ 
+ };
+ 
+-u32 nfsd_supported_minorversion;
++u32 nfsd_supported_minorversion = NFSD_SUPPORTED_MINOR_VERSION;
+ 
+ int nfsd_vers(int vers, enum vers_op change)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
+@@ -0,0 +1,143 @@
++/*
++ *  Copyright (c) 2005 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef LINUX_NFSD_PNFSD_H
++#define LINUX_NFSD_PNFSD_H
++
++#include <linux/list.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#include "state.h"
++#include "xdr4.h"
++
++/* outstanding layout stateid */
++struct nfs4_layout_state {
++	struct list_head	ls_perfile;
++	struct list_head	ls_layouts; /* list of nfs4_layouts */
++	struct kref		ls_ref;
++	struct nfs4_client	*ls_client;
++	struct nfs4_file	*ls_file;
++	stateid_t		ls_stateid;
++};
++
++/* outstanding layout */
++struct nfs4_layout {
++	struct list_head		lo_perfile;	/* hash by f_id */
++	struct list_head		lo_perclnt;	/* hash by clientid */
++	struct list_head		lo_perstate;
++	struct nfs4_file		*lo_file;	/* backpointer */
++	struct nfs4_client		*lo_client;
++	struct nfs4_layout_state	*lo_state;
++	struct nfsd4_layout_seg 	lo_seg;
++};
++
++struct pnfs_inval_state {
++	struct knfsd_fh		mdsfh; /* needed only by invalidate all */
++	stateid_t		stid;
++	clientid_t		clid;
++	u32			status;
++};
++
++/* pNFS Data Server state */
++#define DS_STATEID_VALID   0
++#define DS_STATEID_ERROR   1
++#define DS_STATEID_NEW     2
++
++struct pnfs_ds_stateid {
++	struct list_head	ds_hash;        /* ds_stateid hash entry */
++	struct list_head	ds_perclid;     /* per client hash entry */
++	stateid_t		ds_stid;
++	struct knfsd_fh		ds_fh;
++	unsigned long		ds_access;
++	u32			ds_status;      /* from MDS */
++	u32			ds_verifier[2]; /* from MDS */
++	wait_queue_head_t	ds_waitq;
++	unsigned long		ds_flags;
++	struct kref		ds_ref;
++	clientid_t		ds_mdsclid;
++};
++
++struct pnfs_ds_clientid {
++	struct list_head	dc_hash;        /* mds_clid_hashtbl entry */
++	struct list_head	dc_stateid;     /* ds_stateid head */
++	struct list_head	dc_permdsid;    /* per mdsid hash entry */
++	clientid_t		dc_mdsclid;
++	struct kref		dc_ref;
++	uint32_t		dc_mdsid;
++};
++
++struct pnfs_mds_id {
++	struct list_head	di_hash;        /* mds_nodeid list entry */
++	struct list_head	di_mdsclid;     /* mds_clientid head */
++	uint32_t		di_mdsid;
++	time_t			di_mdsboot;	/* mds boot time */
++	struct kref		di_ref;
++};
++
++/* notify device request (from exported filesystem) */
++struct nfs4_notify_device {
++	struct nfsd4_pnfs_cb_dev_list  *nd_list;
++	struct nfs4_client	       *nd_client;
++	struct list_head	        nd_perclnt;
++
++	void				*nd_args;	/* nfsd internal */
++};
++
++u64 find_create_sbid(struct super_block *);
++struct super_block *find_sbid_id(u64);
++__be32 nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
++int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
++					struct nfsd4_pnfs_layoutreturn *);
++int nfs4_pnfs_cb_get_state(struct super_block *, struct pnfs_get_state *);
++int nfs4_pnfs_cb_change_state(struct pnfs_get_state *);
++void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++int put_layoutrecall(struct nfs4_layoutrecall *);
++void nomatching_layout(struct nfs4_layoutrecall *);
++void *layoutrecall_done(struct nfs4_layoutrecall *);
++int nfsd4_cb_layout(struct nfs4_layoutrecall *);
++int nfsd_layout_recall_cb(struct super_block *, struct inode *,
++			  struct nfsd4_pnfs_cb_layout *);
++int nfsd_device_notify_cb(struct super_block *,
++			  struct nfsd4_pnfs_cb_dev_list *);
++int nfsd4_cb_notify_device(struct nfs4_notify_device *);
++void pnfs_set_device_notify(clientid_t *, unsigned int types);
++void pnfs_clear_device_notify(struct nfs4_client *);
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++extern struct sockaddr pnfsd_lexp_addr;
++extern size_t pnfs_lexp_addr_len;
++
++extern void pnfsd_lexp_init(struct inode *);
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#endif /* LINUX_NFSD_PNFSD_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
+@@ -0,0 +1,225 @@
++/*
++ * linux/fs/nfsd/pnfs_lexp.c
++ *
++ * pNFS export of local filesystems.
++ *
++ * Export local file systems over the files layout type.
++ * The MDS (metadata server) functions also as a single DS (data server).
++ * This is mostly useful for development and debugging purposes.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Copyright (C) 2008 Benny Halevy, <bhalevy@panasas.com>
++ *
++ * Initial implementation was based on the pnfs-gfs2 patches done
++ * by David M. Richter <richterd@citi.umich.edu>
++ */
++
++#include <linux/sunrpc/svc_xprt.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++struct sockaddr pnfsd_lexp_addr;
++size_t pnfs_lexp_addr_len;
++
++static int
++pnfsd_lexp_layout_type(struct super_block *sb)
++{
++	int ret = LAYOUT_NFSV4_1_FILES;
++	dprintk("<-- %s: return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++pnfsd_lexp_get_device_iter(struct super_block *sb,
++			   u32 layout_type,
++			   struct nfsd4_pnfs_dev_iter_res *res)
++{
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++
++	dprintk("<-- %s: return 0\n", __func__);
++	return 0;
++}
++
++static int
++pnfsd_lexp_get_device_info(struct super_block *sb,
++			   struct exp_xdr_stream *xdr,
++			   u32 layout_type,
++			   const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_multipath fl_devices[1];
++	u32 fl_stripe_indices[1] = { 0 };
++	struct pnfs_filelayout_devaddr daddr;
++	/* %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x.%03u.%03u */
++	char daddr_buf[8*4 + 2*3 + 10];
++
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	memset(&fdev, '\0', sizeof(fdev));
++
++	if (devid->devid != 1) {
++		printk(KERN_ERR "%s: WARNING: didn't receive a deviceid of 1 "
++			"(got: 0x%llx)\n", __func__, devid->devid);
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* count the number of comma-delimited DS IPs */
++	fdev.fl_device_length = 1;
++	fdev.fl_device_list = fl_devices;
++
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = fl_stripe_indices;
++
++	daddr.r_addr.data = daddr_buf;
++	daddr.r_addr.len = sizeof(daddr_buf);
++	err = __svc_print_netaddr(&pnfsd_lexp_addr, &daddr.r_addr);
++	if (err < 0)
++		goto out;
++	daddr.r_addr.len = err;
++	switch (pnfsd_lexp_addr.sa_family) {
++	case AF_INET:
++		daddr.r_netid.data = "tcp";
++		daddr.r_netid.len = 3;
++		break;
++	case AF_INET6:
++		daddr.r_netid.data = "tcp6";
++		daddr.r_netid.len = 4;
++		break;
++	default:
++		BUG();
++	}
++	fdev.fl_device_list[0].fl_multipath_length = 1;
++	fdev.fl_device_list[0].fl_multipath_list = &daddr;
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	dprintk("<-- %s: return %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize < NFSSVC_MAXBLKSIZE)
++		blocksize = NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++	dprintk("%s: return %d\n", __func__, blocksize);
++	return blocksize;
++}
++
++static enum nfsstat4
++pnfsd_lexp_layout_get(struct inode *inode,
++		      struct exp_xdr_stream *xdr,
++		      const struct nfsd4_pnfs_layoutget_arg *arg,
++		      struct nfsd4_pnfs_layoutget_res *res)
++{
++	enum nfsstat4 rc = NFS4_OK;
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++
++	dprintk("--> %s: inode=%p\n", __func__, inode);
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = true;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = arg->lg_sbid;
++	layout->device_id.devid = 1;				/*FSFTEMP*/
++	layout->lg_first_stripe_index = 0;			/*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	memcpy(fhp, arg->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	dprintk("<-- %s: return %d\n", __func__, rc);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++pnfsd_lexp_layout_commit(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutcommit_arg *args,
++			 struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int
++pnfsd_lexp_layout_return(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int pnfsd_lexp_get_state(struct inode *inode, struct knfsd_fh *fh,
++				struct pnfs_get_state *p)
++{
++	return 0;	/* just use the current stateid */
++}
++
++static struct pnfs_export_operations pnfsd_lexp_ops = {
++	.layout_type = pnfsd_lexp_layout_type,
++	.get_device_info = pnfsd_lexp_get_device_info,
++	.get_device_iter = pnfsd_lexp_get_device_iter,
++	.layout_get = pnfsd_lexp_layout_get,
++	.layout_commit = pnfsd_lexp_layout_commit,
++	.layout_return = pnfsd_lexp_layout_return,
++	.get_state = pnfsd_lexp_get_state,
++};
++
++void
++pnfsd_lexp_init(struct inode *inode)
++{
++	dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
++	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
+@@ -0,0 +1,535 @@
++/*
++ * fs/nfsd/spnfs_com.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ * Based heavily on idmap.c
++ *
++ */
++
++/*
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/nfsd/debug.h>
++
++#include <linux/nfsd4_spnfs.h>
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PROC
++
++static ssize_t   spnfs_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++		     char __user *, size_t);
++static ssize_t   spnfs_pipe_downcall(struct file *, const char __user *,
++		     size_t);
++static void      spnfs_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops spnfs_upcall_ops = {
++	.upcall		= spnfs_pipe_upcall,
++	.downcall	= spnfs_pipe_downcall,
++	.destroy_msg	= spnfs_pipe_destroy_msg,
++};
++
++/* evil global variable */
++struct spnfs *global_spnfs;
++struct spnfs_config *spnfs_config;
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++int spnfs_use_layoutsegments;
++uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++/*
++ * Used by spnfs_enabled()
++ * Tracks if the subsystem has been initialized at some point.  It doesn't
++ * matter if it's not currently initialized.
++ */
++static int spnfs_enabled_at_some_point;
++
++/* call this to start the ball rolling */
++/* code it like we're going to avoid the global variable in the future */
++int
++nfsd_spnfs_new(void)
++{
++	struct spnfs *spnfs = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	if (global_spnfs != NULL)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	spnfs = kzalloc(sizeof(*spnfs), GFP_KERNEL);
++	if (spnfs == NULL){
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	spnfs->spnfs_dentry = rpc_mkpipe(nd.path.dentry, "spnfs", spnfs,
++					 &spnfs_upcall_ops, 0);
++	if (IS_ERR(spnfs->spnfs_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++
++	mutex_init(&spnfs->spnfs_lock);
++	mutex_init(&spnfs->spnfs_plock);
++	init_waitqueue_head(&spnfs->spnfs_wq);
++
++	global_spnfs = spnfs;
++	spnfs_enabled_at_some_point = 1;
++
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(spnfs);
++	return rc;
++}
++
++/* again, code it like we're going to remove the global variable */
++void
++nfsd_spnfs_delete(void)
++{
++	struct spnfs *spnfs = global_spnfs;
++
++	if (!spnfs)
++		return;
++	rpc_unlink(spnfs->spnfs_dentry);
++	rpc_put_mount();
++	global_spnfs = NULL;
++	kfree(spnfs);
++}
++
++/* RPC pipefs upcall/downcall routines */
++/* looks like this code is invoked by the rpc_pipe code */
++/* to handle upcalls on things we've queued elsewhere */
++/* See nfs_idmap_id for an exmaple of enqueueing */
++static ssize_t
++spnfs_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
++    char __user *dst, size_t buflen)
++{
++	char *data = (char *)msg->data + msg->copied;
++	ssize_t mlen = msg->len - msg->copied;
++	ssize_t left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen -= left;
++	msg->copied += mlen;
++	msg->errno = 0;
++	return mlen;
++}
++
++static ssize_t
++spnfs_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
++	struct spnfs *spnfs = (struct spnfs *)rpci->private;
++	struct spnfs_msg *im_in = NULL, *im = &spnfs->spnfs_im;
++	int ret;
++
++	if (mlen != sizeof(struct spnfs_msg))
++		return -ENOSPC;
++
++	im_in = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im_in == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(im_in, src, mlen) != 0)
++		return -EFAULT;
++
++	mutex_lock(&spnfs->spnfs_plock);
++
++	ret = mlen;
++	im->im_status = im_in->im_status;
++	/* If we got an error, terminate now, and wake up pending upcalls */
++	if (!(im_in->im_status & SPNFS_STATUS_SUCCESS)) {
++		wake_up(&spnfs->spnfs_wq);
++		goto out;
++	}
++
++	ret = -EINVAL;
++	/* Did we match the current upcall? */
++	/* DMXXX: do not understand the comment above, from original code */
++	/* DMXXX: when do we _not_ match the current upcall? */
++	/* DMXXX: anyway, let's to a simplistic check */
++	if (im_in->im_type == im->im_type) {
++		/* copy the response into the spnfs struct */
++		memcpy(&im->im_res, &im_in->im_res, sizeof(im->im_res));
++		ret = mlen;
++	} else
++		dprintk("spnfs: downcall type != upcall type\n");
++
++
++	wake_up(&spnfs->spnfs_wq);
++/* DMXXX handle rval processing */
++out:
++	mutex_unlock(&spnfs->spnfs_plock);
++	kfree(im_in);
++	return ret;
++}
++
++static void
++spnfs_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	struct spnfs_msg *im = msg->data;
++	struct spnfs *spnfs = container_of(im, struct spnfs, spnfs_im);
++
++	if (msg->errno >= 0)
++		return;
++	mutex_lock(&spnfs->spnfs_plock);
++	im->im_status = SPNFS_STATUS_FAIL;  /* DMXXX */
++	wake_up(&spnfs->spnfs_wq);
++	mutex_unlock(&spnfs->spnfs_plock);
++}
++
++/* generic upcall.  called by functions in spnfs_ops.c  */
++int
++spnfs_upcall(struct spnfs *spnfs, struct spnfs_msg *upmsg,
++		union spnfs_msg_res *res)
++{
++	struct rpc_pipe_msg msg;
++	struct spnfs_msg *im;
++	DECLARE_WAITQUEUE(wq, current);
++	int ret = -EIO;
++	int rval;
++
++	im = &spnfs->spnfs_im;
++
++	mutex_lock(&spnfs->spnfs_lock);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	memset(im, 0, sizeof(*im));
++	memcpy(im, upmsg, sizeof(*upmsg));
++
++	memset(&msg, 0, sizeof(msg));
++	msg.data = im;
++	msg.len = sizeof(*im);
++
++	add_wait_queue(&spnfs->spnfs_wq, &wq);
++	rval = rpc_queue_upcall(spnfs->spnfs_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&spnfs->spnfs_wq, &wq);
++		goto out;
++	}
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&spnfs->spnfs_plock);
++	schedule();
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&spnfs->spnfs_wq, &wq);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	if (im->im_status & SPNFS_STATUS_SUCCESS) {
++		/* copy our result from the upcall */
++		memcpy(res, &im->im_res, sizeof(*res));
++		ret = 0;
++	}
++
++out:
++	memset(im, 0, sizeof(*im));
++	mutex_unlock(&spnfs->spnfs_plock);
++	mutex_unlock(&spnfs->spnfs_lock);
++	return(ret);
++}
++
++/*
++ * This is used to determine if the spnfsd daemon has been started at
++ * least once since the system came up.  This is used to by the export
++ * mechanism to decide if spnfs is in use.
++ *
++ * Returns non-zero if the spnfsd has initialized the communication pipe
++ * at least once.
++ */
++int spnfs_enabled(void)
++{
++	return spnfs_enabled_at_some_point;
++}
++
++#ifdef CONFIG_PROC_FS
++
++/*
++ * procfs virtual files for user/kernel space communication:
++ *
++ * ctl - currently just an on/off switch...can be expanded
++ * getfh - fd to fh conversion
++ * recall - recall a layout from the command line, for example:
++ *		echo <path> > /proc/fs/spnfs/recall
++ * config - configuration info, e.g., stripe size, num ds, etc.
++ */
++
++/*-------------- start ctl -------------------------*/
++static ssize_t ctl_write(struct file *file, const char __user *buf,
++			 size_t count, loff_t *offset)
++{
++	int cmd, rc;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (cmd) {
++		rc = nfsd_spnfs_new();
++		if (rc != 0)
++			return rc;
++	} else
++		nfsd_spnfs_delete();
++
++	return count;
++}
++
++static const struct file_operations ctl_ops = {
++	.write		= ctl_write,
++};
++/*-------------- end ctl ---------------------------*/
++
++/*-------------- start config -------------------------*/
++static ssize_t config_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	static struct spnfs_config cfg;
++
++	if (copy_from_user(&cfg, buf, count))
++		return -EFAULT;
++
++	spnfs_config = &cfg;
++	return 0;
++}
++
++static const struct file_operations config_ops = {
++	.write		= config_write,
++};
++/*-------------- end config ---------------------------*/
++
++/*-------------- start getfh -----------------------*/
++static int getfh_open(struct inode *inode, struct file *file)
++{
++	file->private_data = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
++	if (file->private_data == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static ssize_t getfh_read(struct file *file, char __user *buf, size_t count,
++			  loff_t *offset)
++{
++	if (copy_to_user(buf, file->private_data, sizeof(struct nfs_fh)))
++		return -EFAULT;
++
++	return count;
++}
++
++static ssize_t getfh_write(struct file *file, const char __user *buf,
++			   size_t count, loff_t *offset)
++{
++	int fd;
++
++	if (copy_from_user((int *)&fd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (spnfs_getfh(fd, file->private_data) != 0)
++		return -EIO;
++
++	return count;
++}
++
++static int getfh_release(struct inode *inode, struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static const struct file_operations getfh_ops = {
++	.open		= getfh_open,
++	.read		= getfh_read,
++	.write		= getfh_write,
++	.release	= getfh_release,
++};
++/*-------------- end getfh ------------------------*/
++
++
++/*-------------- start recall layout --------------*/
++static ssize_t recall_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	char input[128];
++	char *path, *str, *p;
++	int rc;
++	u64 off = 0, len = 0;
++
++	if (count > 128)
++		return -EINVAL;
++
++	if (copy_from_user(input, buf, count))
++		return -EFAULT;
++
++	/* assumes newline-terminated path */
++	p = memchr(input, '\n', count);
++	if (p == NULL)
++		return -EINVAL;
++	*p = '\0';
++
++	/*
++	 * Scan for path and, optionally, an offset and length
++	 * of a layout segment to be recalled; if there are two
++	 * fields, they're assumed to be path and offset.
++	 */
++	p = input;
++	path = strsep(&p, " ");
++	if (path == NULL)
++		return -EINVAL;
++
++	str = strsep(&p, " ");
++	if (str != NULL) {
++		rc = strict_strtoull(str, 10, &off);
++		if (rc != 0)
++			return -EINVAL;
++
++		str = strsep(&p, " ");
++		if (str != NULL) {
++			rc = strict_strtoull(str, 10, &len);
++			if (rc != 0)
++				return -EINVAL;
++		}
++	}
++
++	rc = spnfs_test_layoutrecall(path, off, len);
++	if (rc != 0)
++		return rc;
++
++	return count;
++}
++
++static const struct file_operations recall_ops = {
++	.write		= recall_write,
++};
++/*-------------- end recall layout --------------*/
++
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++/*-------------- start layoutseg -------------------------*/
++static ssize_t layoutseg_write(struct file *file, const char __user *buf,
++			       size_t count, loff_t *offset)
++{
++	char cmd[3];
++
++	if (copy_from_user(cmd, buf, 1))
++		return -EFAULT;
++	if (cmd[0] == '0')
++		spnfs_use_layoutsegments = 0;
++	else
++		spnfs_use_layoutsegments = 1;
++
++	return count;
++}
++
++static const struct file_operations layoutseg_ops = {
++	.write		= layoutseg_write,
++};
++/*-------------- end layoutseg ---------------------------*/
++
++/*-------------- start layoutsegsize -------------------------*/
++static ssize_t layoutsegsize_write(struct file *file, const char __user *buf,
++				   size_t count, loff_t *offset)
++{
++	char cmd[50];
++
++	if (copy_from_user(cmd, buf, 49))
++		return -EFAULT;
++	layoutsegment_size = simple_strtoull(cmd, NULL, 10);
++
++	return count;
++}
++
++static const struct file_operations layoutsegsize_ops = {
++	.write		= layoutsegsize_write,
++};
++/*-------------- end layoutsegsize ---------------------------*/
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++int
++spnfs_init_proc(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = proc_mkdir("fs/spnfs", NULL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry = create_proc_entry("fs/spnfs/ctl", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &ctl_ops;
++
++	entry = create_proc_entry("fs/spnfs/config", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &config_ops;
++
++	entry = create_proc_entry("fs/spnfs/getfh", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &getfh_ops;
++
++	entry = create_proc_entry("fs/spnfs/recall", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &recall_ops;
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++	entry = create_proc_entry("fs/spnfs/layoutseg", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutseg_ops;
++
++	entry = create_proc_entry("fs/spnfs/layoutsegsize", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutsegsize_ops;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	return 0;
++}
++#endif /* CONFIG_PROC_FS */
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
+@@ -0,0 +1,878 @@
++/*
++ * fs/nfsd/spnfs_ops.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ *
++ */
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/namei.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++/* comment out CONFIG_SPNFS_TEST for non-test behaviour */
++/* #define CONFIG_SPNFS_TEST 1 */
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PNFS
++
++/*
++ * The functions that are called from elsewhere in the kernel
++ * to perform tasks in userspace
++ *
++ */
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++extern int spnfs_use_layoutsegments;
++extern uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++extern struct spnfs *global_spnfs;
++
++int
++spnfs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++enum nfsstat4
++spnfs_layoutget(struct inode *inode, struct exp_xdr_stream *xdr,
++		const struct nfsd4_pnfs_layoutget_arg *lg_arg,
++		struct nfsd4_pnfs_layoutget_res *lg_res)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct pnfs_filelayout_layout *flp = NULL;
++	int status, i;
++	enum nfsstat4 nfserr;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	im->im_type = SPNFS_TYPE_LAYOUTGET;
++	im->im_args.layoutget_args.inode = inode->i_ino;
++	im->im_args.layoutget_args.generation = inode->i_generation;
++
++	/* call function to queue the msg for upcall */
++	if (spnfs_upcall(spnfs, im, res) != 0) {
++		dprintk("failed spnfs upcall: layoutget\n");
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto layoutget_cleanup;
++	}
++	status = res->layoutget_res.status;
++	if (status != 0) {
++		/* FIXME? until user mode is fixed, translate system error */
++		switch (status) {
++		case -E2BIG:
++		case -ETOOSMALL:
++			nfserr = NFS4ERR_TOOSMALL;
++			break;
++		case -ENOMEM:
++		case -EAGAIN:
++		case -EINTR:
++			nfserr = NFS4ERR_LAYOUTTRYLATER;
++			break;
++		case -ENOENT:
++			nfserr = NFS4ERR_BADLAYOUT;
++			break;
++ 		default:
++			nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		}
++		dprintk("spnfs layout_get upcall: status=%d nfserr=%u\n",
++			status, nfserr);
++		goto layoutget_cleanup;
++	}
++
++	lg_res->lg_return_on_close = 0;
++#if defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	/* if spnfs_use_layoutsegments & layoutsegment_size == 0, use */
++	/* the amount requested by the client.			      */
++	if (spnfs_use_layoutsegments) {
++		if (layoutsegment_size != 0)
++			lg_res->lg_seg.length = layoutsegment_size;
++	} else
++		lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#else
++	lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	flp = kmalloc(sizeof(struct pnfs_filelayout_layout), GFP_KERNEL);
++	if (flp == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	flp->device_id.sbid = lg_arg->lg_sbid;
++	flp->device_id.devid = res->layoutget_res.devid;
++	flp->lg_layout_type = 1; /* XXX */
++	flp->lg_stripe_type = res->layoutget_res.stripe_type;
++	flp->lg_commit_through_mds = 0;
++	flp->lg_stripe_unit =  res->layoutget_res.stripe_size;
++	flp->lg_first_stripe_index = 0;
++	flp->lg_pattern_offset = 0;
++	flp->lg_fh_length = res->layoutget_res.stripe_count;
++
++	flp->lg_fh_list = kmalloc(flp->lg_fh_length * sizeof(struct knfsd_fh),
++				  GFP_KERNEL);
++	if (flp->lg_fh_list == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	/*
++	 * FIX: Doing an extra copy here.  Should group res.flist's fh_len
++	 * and fh_val into a knfsd_fh structure.
++	 */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		flp->lg_fh_list[i].fh_size = res->layoutget_res.flist[i].fh_len;
++		memcpy(&flp->lg_fh_list[i].fh_base,
++		       res->layoutget_res.flist[i].fh_val,
++		       res->layoutget_res.flist[i].fh_len);
++	}
++
++	/* encode the layoutget body */
++	nfserr = filelayout_encode_layout(xdr, flp);
++
++layoutget_cleanup:
++	if (flp) {
++		if (flp->lg_fh_list)
++			kfree(flp->lg_fh_list);
++		kfree(flp);
++	}
++	kfree(im);
++	kfree(res);
++
++	return nfserr;
++}
++
++int
++spnfs_layoutcommit(void)
++{
++	return 0;
++}
++
++int
++spnfs_layoutreturn(struct inode *inode,
++		   const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	return 0;
++}
++
++int
++spnfs_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block *sb;
++	struct nfsd4_pnfs_cb_layout lr;
++
++	switch (type) {
++	case RETURN_FILE:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for ino = %lu\n",
++			__func__, inode->i_ino);
++		break;
++	case RETURN_FSID:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++			__func__);
++		return 0;
++	case RETURN_ALL:
++		/* XXX figure out how to get a sb since there's no inode ptr */
++		dprintk("%s: recalling all layouts (unimplemented)\n",
++			__func__);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	lr.cbl_recall_type = type;
++	lr.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	lr.cbl_seg.clientid = 0;
++	lr.cbl_seg.offset = offset;
++	lr.cbl_seg.length = len;
++	lr.cbl_seg.iomode = IOMODE_ANY;
++	lr.cbl_layoutchanged = 0;
++
++	nfsd_layout_recall_cb(sb, inode, &lr);
++
++	return 0;
++}
++
++
++int
++spnfs_test_layoutrecall(char *path, u64 offset, u64 len)
++{
++	struct nameidata nd;
++	struct inode *inode;
++	int type, rc;
++
++	dprintk("%s: path=%s, offset=%llu, len=%llu\n",
++		__func__, path, offset, len);
++
++	if (strcmp(path, "all") == 0) {
++		inode = NULL;
++		type = RETURN_ALL;
++	} else {
++		rc = path_lookup(path, 0, &nd);
++		if (rc != 0)
++			return -ENOENT;
++
++		/*
++		 * XXX todo: add a RETURN_FSID scenario here...maybe if
++		 * inode is a dir...
++		 */
++
++		inode = nd.path.dentry->d_inode;
++		type = RETURN_FILE;
++	}
++
++	if (len == 0)
++		len = NFS4_MAX_UINT64;
++
++	rc = spnfs_layoutrecall(inode, type, offset, len);
++
++	if (type != RETURN_ALL)
++		path_put(&nd.path);
++	return rc;
++}
++
++int
++spnfs_getdeviceiter(struct super_block *sb,
++		    u32 layout_type,
++		    struct nfsd4_pnfs_dev_iter_res *gd_res)
++{
++	struct spnfs *spnfs = global_spnfs;   /* XXX keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEITER;
++	im->im_args.getdeviceiter_args.cookie = gd_res->gd_cookie;
++	im->im_args.getdeviceiter_args.verf = gd_res->gd_verf;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceiter_out;
++	}
++	status = res->getdeviceiter_res.status;
++
++	if (res->getdeviceiter_res.eof)
++		gd_res->gd_eof = 1;
++	else {
++		gd_res->gd_devid = res->getdeviceiter_res.devid;
++		gd_res->gd_cookie = res->getdeviceiter_res.cookie;
++		gd_res->gd_verf = res->getdeviceiter_res.verf;
++		gd_res->gd_eof = 0;
++	}
++
++getdeviceiter_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++#ifdef CONFIG_SPNFS_TEST
++/*
++ * Setup the rq_res xdr_buf.  The svc_rqst rq_respages[1] page contains the
++ * 1024 encoded stripe indices.
++ *
++ * Skip the devaddr4 length and encode the indicies count (1024) in the
++ * rq_res.head and set the rq_res.head length.
++ *
++ * Set the rq_res page_len to 4096 (for the 1024 stripe indices).
++ * Set the rq_res xdr_buf tail base to rq_respages[0] just after the
++ * rq_res head to hold the rest of the getdeviceinfo return.
++ *
++ * So rq_respages[rq_resused - 1] contains the rq_res.head and rq_res.tail and
++ * rq_respages[rq_resused] contains the rq_res.pages.
++ */
++static int spnfs_test_indices_xdr(struct pnfs_xdr_info *info,
++				  const struct pnfs_filelayout_device *fdev)
++{
++	struct nfsd4_compoundres *resp = info->resp;
++	struct svc_rqst *rqstp = resp->rqstp;
++	struct xdr_buf *xb = &resp->rqstp->rq_res;
++	__be32 *p;
++
++	p = nfsd4_xdr_reserve_space(resp, 8);
++	p++; /* Fill in length later */
++	*p++ = cpu_to_be32(fdev->fl_stripeindices_length); /* 1024 */
++	resp->p = p;
++
++	xb->head[0].iov_len = (char *)resp->p - (char *)xb->head[0].iov_base;
++	xb->pages = &rqstp->rq_respages[rqstp->rq_resused];
++	xb->page_base = 0;
++	xb->page_len = PAGE_SIZE; /* page of 1024 encoded indices */
++	xb->tail[0].iov_base = resp->p;
++	resp->end = xb->head[0].iov_base + PAGE_SIZE;
++	xb->tail[0].iov_len = (char *)resp->end - (char *)resp->p;
++	return 0;
++}
++/*
++ * Return a stripeindices of length 1024 to test
++ * the pNFS client multipage getdeviceinfo implementation.
++ *
++ * Encode a page of stripe indices.
++ */
++static void spnfs_set_test_indices(struct pnfs_filelayout_device *fldev,
++				  struct spnfs_device *dev,
++				  struct pnfs_devinfo_arg *info)
++{
++	struct svc_rqst *rqstp = info->xdr.resp->rqstp;
++	__be32 *p;
++	int i, j = 0;
++
++	p = (__be32 *)page_address(rqstp->rq_respages[rqstp->rq_resused]);
++	fldev->fl_stripeindices_length = 1024;
++	/* round-robin the data servers device index into the stripe indicie */
++	for (i = 0; i < 1024; i++) {
++		*p++ = cpu_to_be32(j);
++		if (j < dev->dscount - 1)
++			j++;
++		else
++			j = 0;
++	}
++	fldev->fl_stripeindices_list = NULL;
++}
++#endif /* CONFIG_SPNFS_TEST */
++
++int
++spnfs_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    u32 layout_type,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct spnfs *spnfs = global_spnfs;
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct spnfs_device *dev;
++	struct pnfs_filelayout_device *fldev = NULL;
++	struct pnfs_filelayout_multipath *mp = NULL;
++	struct pnfs_filelayout_devaddr *fldap = NULL;
++	int status = 0, i, len;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEINFO;
++	/* XXX FIX: figure out what to do about fsid */
++	im->im_args.getdeviceinfo_args.devid = devid->devid;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceinfo_out;
++	}
++	status = res->getdeviceinfo_res.status;
++	if (status != 0)
++		goto getdeviceinfo_out;
++
++	dev = &res->getdeviceinfo_res.devinfo;
++
++	/* Fill in the device data, i.e., nfs4_1_file_layout_ds_addr4 */
++	fldev = kzalloc(sizeof(struct pnfs_filelayout_device), GFP_KERNEL);
++	if (fldev == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	/*
++	 * Stripe count is the same as data server count for our purposes
++	 */
++	fldev->fl_stripeindices_length = dev->dscount;
++	fldev->fl_device_length = dev->dscount;
++
++	/* Set stripe indices */
++#ifdef CONFIG_SPNFS_TEST
++	spnfs_set_test_indices(fldev, dev, info);
++	fldev->fl_enc_stripe_indices = spnfs_test_indices_xdr;
++#else /* CONFIG_SPNFS_TEST */
++	fldev->fl_stripeindices_list =
++		kmalloc(fldev->fl_stripeindices_length * sizeof(u32),
++			GFP_KERNEL);
++	if (fldev->fl_stripeindices_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_stripeindices_length; i++)
++		fldev->fl_stripeindices_list[i] = i;
++#endif /* CONFIG_SPNFS_TEST */
++
++	/*
++	 * Set the device's data server addresses  No multipath for spnfs,
++	 * so mp length is always 1.
++	 *
++	 */
++	fldev->fl_device_list =
++		kmalloc(fldev->fl_device_length *
++			sizeof(struct pnfs_filelayout_multipath),
++			GFP_KERNEL);
++	if (fldev->fl_device_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_device_length; i++) {
++		mp = &fldev->fl_device_list[i];
++		mp->fl_multipath_length = 1;
++		mp->fl_multipath_list =
++			kmalloc(sizeof(struct pnfs_filelayout_devaddr),
++				GFP_KERNEL);
++		if (mp->fl_multipath_list == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		fldap = mp->fl_multipath_list;
++
++		/*
++		 * Copy the netid into the device address, for example: "tcp"
++		 */
++		len = strlen(dev->dslist[i].netid);
++		fldap->r_netid.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_netid.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_netid.data, dev->dslist[i].netid, len);
++		fldap->r_netid.len = len;
++
++		/*
++		 * Copy the network address into the device address,
++		 * for example: "10.35.9.16.08.01"
++		 */
++		len = strlen(dev->dslist[i].addr);
++		fldap->r_addr.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_addr.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_addr.data, dev->dslist[i].addr, len);
++		fldap->r_addr.len = len;
++	}
++
++	/* encode the device data */
++	status = filelayout_encode_devinfo(xdr, fldev);
++
++getdeviceinfo_out:
++	if (fldev) {
++		kfree(fldev->fl_stripeindices_list);
++		if (fldev->fl_device_list) {
++			for (i = 0; i < fldev->fl_device_length; i++) {
++				fldap =
++				    fldev->fl_device_list[i].fl_multipath_list;
++				kfree(fldap->r_netid.data);
++				kfree(fldap->r_addr.data);
++				kfree(fldap);
++			}
++			kfree(fldev->fl_device_list);
++		}
++		kfree(fldev);
++	}
++
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_setattr(void)
++{
++	return 0;
++}
++
++int
++spnfs_open(struct inode *inode, struct nfsd4_open *open)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	im->im_type = SPNFS_TYPE_OPEN;
++	im->im_args.open_args.inode = inode->i_ino;
++	im->im_args.open_args.generation = inode->i_generation;
++	im->im_args.open_args.create = open->op_create;
++	im->im_args.open_args.createmode = open->op_createmode;
++	im->im_args.open_args.truncate = open->op_truncate;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto open_out;
++	}
++	status = res->open_res.status;
++
++open_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_create(void)
++{
++	return 0;
++}
++
++/*
++ * Invokes the spnfsd with the inode number of the object to remove.
++ * The file has already been removed on the MDS, so all the spnsfd
++ * daemon does is remove the stripes.
++ * Returns 0 on success otherwise error code
++ */
++int
++spnfs_remove(unsigned long ino, unsigned long generation)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	im->im_type = SPNFS_TYPE_REMOVE;
++	im->im_args.remove_args.inode = ino;
++	im->im_args.remove_args.generation = generation;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto remove_out;
++	}
++	status = res->remove_res.status;
++
++remove_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++static int
++read_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	 struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_read(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		if (err == 0)
++			break;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++     struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDONLY | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0 ; vnum < vlen ; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = read_one(inode, offset + bytecount, iolen,
++			       (char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err < 0) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		if (err < iolen) {
++			bytecount += err;
++			goto read_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++read_out:
++	*lenp = bytecount;
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++	return status;
++}
++
++__be32
++spnfs_read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++	   struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return read(inode, offset, lenp, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++static int
++write_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	  struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_write(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++write(struct inode *inode, loff_t offset, size_t len, int vlen,
++      struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDWR | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto write_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0; vnum < vlen; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = write_one(inode, offset + bytecount, iolen,
++				(char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err != iolen) {
++			dprintk("spnfs_write: err=%d expected %Zd\n", err, len);
++			status = nfserr_io;
++			goto write_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++write_out:
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++
++	return status;
++}
++
++__be32
++spnfs_write(struct inode *inode, loff_t offset, size_t len, int vlen,
++	    struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return write(inode, offset, len, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++int
++spnfs_commit(void)
++{
++	return 0;
++}
++
++/*
++ * Return the state for this object.
++ * At this time simply return 0 to indicate success and use the existing state
++ */
++int
++spnfs_get_state(struct inode *inode, struct knfsd_fh *fh, struct pnfs_get_state *arg)
++{
++	return 0;
++}
++
++/*
++ * Return the filehandle for the specified file descriptor
++ */
++int
++spnfs_getfh(int fd, struct nfs_fh *fh)
++{
++	struct file *file;
++
++	file = fget(fd);
++	if (file == NULL)
++		return -EIO;
++
++	memcpy(fh, NFS_FH(file->f_dentry->d_inode), sizeof(struct nfs_fh));
++	fput(file);
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
+@@ -242,6 +242,12 @@ struct nfs4_client {
+ 	u32			cl_cb_seq_nr;
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
++#if defined(CONFIG_PNFSD)
++	struct list_head	cl_layouts;	/* outstanding layouts */
++	struct list_head	cl_layoutrecalls; /* outstanding layoutrecall
++						     callbacks */
++	atomic_t		cl_deviceref;	/* Num outstanding devs */
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static inline void
+@@ -342,12 +348,31 @@ struct nfs4_file {
+ 	struct list_head        fi_hash;    /* hash by "struct inode *" */
+ 	struct list_head        fi_stateids;
+ 	struct list_head	fi_delegations;
++#if defined(CONFIG_PNFSD)
++	struct list_head	fi_layouts;
++	struct list_head	fi_layout_states;
++#endif /* CONFIG_PNFSD */
+ 	struct inode		*fi_inode;
+ 	u32                     fi_id;      /* used with stateowner->so_id 
+ 					     * for stateid_hashtbl hash */
+ 	bool			fi_had_conflict;
++#if defined(CONFIG_PNFSD)
++	/* used by layoutget / layoutrecall */
++	struct nfs4_fsid	fi_fsid;
++	u32			fi_fhlen;
++	u8			fi_fhval[NFS4_FHSIZE];
++#endif /* CONFIG_PNFSD */
+ };
+ 
++#if defined(CONFIG_PNFSD)
++/* pNFS Metadata server state */
++
++struct pnfs_ds_dev_entry {
++	struct list_head	dd_dev_entry; /* st_pnfs_ds_id entry */
++	u32			dd_dsid;
++};
++#endif /* CONFIG_PNFSD */
++
+ /*
+ * nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+ *
+@@ -370,6 +395,9 @@ struct nfs4_stateid {
+ 	struct list_head              st_perfile;
+ 	struct list_head              st_perstateowner;
+ 	struct list_head              st_lockowners;
++#if defined(CONFIG_PNFSD)
++	struct list_head              st_pnfs_ds_id;
++#endif /* CONFIG_PNFSD */
+ 	struct nfs4_stateowner      * st_stateowner;
+ 	struct nfs4_file            * st_file;
+ 	stateid_t                     st_stateid;
+@@ -421,6 +449,34 @@ extern void nfsd4_recdir_purge_old(void)
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+ extern void release_session_client(struct nfsd4_session *);
++extern void nfsd4_free_slab(struct kmem_cache **);
++extern struct nfs4_file *find_file(struct inode *);
++extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
++extern void put_nfs4_file(struct nfs4_file *);
++extern void get_nfs4_file(struct nfs4_file *);
++extern struct nfs4_client *find_confirmed_client(clientid_t *);
++extern struct nfs4_stateid *find_stateid(stateid_t *, int flags);
++extern struct nfs4_delegation *find_delegation_stateid(struct inode *, stateid_t *);
++extern __be32 nfs4_check_stateid(stateid_t *);
++extern void expire_client_lock(struct nfs4_client *);
++extern int filter_confirmed_clients(int (* func)(struct nfs4_client *, void *), void *);
++
++#if defined(CONFIG_PNFSD)
++extern int nfsd4_init_pnfs_slabs(void);
++extern void nfsd4_free_pnfs_slabs(void);
++extern void pnfs_expire_client(struct nfs4_client *);
++extern void release_pnfs_ds_dev_list(struct nfs4_stateid *);
++extern void nfs4_pnfs_state_init(void);
++extern void nfs4_pnfs_state_shutdown(void);
++extern void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++extern int nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *, stateid_t *);
++#else /* CONFIG_PNFSD */
++static inline void nfsd4_free_pnfs_slabs(void) {}
++static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
++static inline void pnfs_expire_client(struct nfs4_client *clp) {}
++static inline void release_pnfs_ds_dev_list(struct nfs4_stateid *stp) {}
++static inline void nfs4_pnfs_state_shutdown(void) {}
++#endif /* CONFIG_PNFSD */
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+@@ -434,4 +490,24 @@ nfs4_get_stateowner(struct nfs4_stateown
+ 	kref_get(&so->so_ref);
+ }
+ 
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
+ #endif   /* NFSD4_STATE_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
+@@ -37,7 +37,12 @@
+ #ifdef CONFIG_NFSD_V4
+ #include <linux/nfs4_acl.h>
+ #include <linux/nfsd_idmap.h>
++#include <linux/security.h>
++#include <linux/nfsd4_spnfs.h>
+ #endif /* CONFIG_NFSD_V4 */
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
+ 
+ #include "nfsd.h"
+ #include "vfs.h"
+@@ -383,6 +388,12 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ 					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
+ 			if (err)
+ 				goto out;
++#if defined(CONFIG_SPNFS_BLOCK)
++			if (pnfs_block_enabled(inode, 0)) {
++				err = bl_layoutrecall(inode, RETURN_FILE,
++				    iap->ia_size, inode->i_size - iap->ia_size);
++			}
++#endif /* CONFIG_SPNFS_BLOCK */
+ 		}
+ 
+ 		/*
+@@ -1703,6 +1714,11 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	struct inode	*fdir, *tdir;
+ 	__be32		err;
+ 	int		host_err;
++#ifdef CONFIG_SPNFS
++	unsigned long ino = 0;
++	unsigned long generation = 0;
++	unsigned int nlink = 0;
++#endif /* CONFIG_SPNFS */
+ 
+ 	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
+ 	if (err)
+@@ -1766,7 +1782,26 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	if (host_err)
+ 		goto out_dput_new;
+ 
++#ifdef CONFIG_SPNFS
++	/*
++	 * if the target is a preexisting regular file, remember the
++	 * inode number and generation so we can delete the stripes;
++	 * save the link count as well so that the stripes only get
++	 * get deleted when the last link is deleted
++	 */
++	if (ndentry && ndentry->d_inode && S_ISREG(ndentry->d_inode->i_mode)) {
++		ino = ndentry->d_inode->i_ino;
++		generation = ndentry->d_inode->i_generation;
++		nlink = ndentry->d_inode->i_nlink;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
++#ifdef CONFIG_SPNFS
++	if (spnfs_enabled() && (!host_err && ino && nlink == 1))
++		spnfs_remove(ino, generation);
++#endif /* CONFIG_SPNFS */
++
+ 	if (!host_err) {
+ 		host_err = commit_metadata(tfhp);
+ 		if (!host_err)
+@@ -1807,6 +1842,11 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	struct inode	*dirp;
+ 	__be32		err;
+ 	int		host_err;
++#if defined(CONFIG_SPNFS)
++	unsigned long	ino;
++	unsigned long	generation;
++	unsigned int	nlink;
++#endif /* defined(CONFIG_SPNFS) */
+ 
+ 	err = nfserr_acces;
+ 	if (!flen || isdotent(fname, flen))
+@@ -1830,6 +1870,17 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 		goto out;
+ 	}
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * Remember the inode number to communicate to the spnfsd
++	 * for removal of stripes; save the link count as well so that
++	 * the stripes only get get deleted when the last link is deleted
++	 */
++	ino = rdentry->d_inode->i_ino;
++	generation = rdentry->d_inode->i_generation;
++	nlink = rdentry->d_inode->i_nlink;
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	if (!type)
+ 		type = rdentry->d_inode->i_mode & S_IFMT;
+ 
+@@ -1854,6 +1905,29 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	if (!host_err)
+ 		host_err = commit_metadata(fhp);
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * spnfs: notify spnfsd of removal to destroy stripes
++	 */
++/*
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (sb->s_export_op->spnfs_remove) {
++*/
++	dprintk("%s check if spnfs_enabled\n", __FUNCTION__);
++	if (spnfs_enabled() && nlink == 1) {
++		BUG_ON(ino == 0);
++		dprintk("%s calling spnfs_remove inumber=%ld\n",
++			__FUNCTION__, ino);
++		if (spnfs_remove(ino, generation) == 0) {
++			dprintk("%s spnfs_remove success\n", __FUNCTION__);
++		} else {
++			/* XXX How do we make this atomic? */
++			printk(KERN_WARNING "nfsd: pNFS could not "
++				"remove stripes for inode: %ld\n", ino);
++		}
++	}
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ out_nfserr:
+ 	err = nfserrno(host_err);
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
+@@ -37,6 +37,8 @@
+ #ifndef _LINUX_NFSD_XDR4_H
+ #define _LINUX_NFSD_XDR4_H
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++
+ #include "state.h"
+ #include "nfsd.h"
+ 
+@@ -385,6 +387,51 @@ struct nfsd4_reclaim_complete {
+ 	u32 rca_one_fs;
+ };
+ 
++struct nfsd4_pnfs_getdevinfo {
++	struct nfsd4_pnfs_deviceid gd_devid;	/* request */
++	u32			gd_layout_type;	/* request */
++	u32			gd_maxcount;	/* request */
++	u32			gd_notify_types;/* request */
++	struct super_block	*gd_sb;
++};
++
++struct nfsd4_pnfs_getdevlist {
++	u32             gd_layout_type;	/* request */
++	u32		gd_maxdevices;	/* request */
++	u64		gd_cookie;	/* request - response */
++	u64		gd_verf;	/* request - response */
++	struct svc_fh 	*gd_fhp;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++struct nfsd4_pnfs_layoutget {
++	u64			lg_minlength;	/* request */
++	u32			lg_signal;	/* request */
++	u32			lg_maxcount;	/* request */
++	struct svc_fh		*lg_fhp;	/* request */
++	stateid_t		lg_sid;		/* request/response */
++	struct nfsd4_layout_seg	lg_seg;		/* request/response */
++	u32			lg_roc;		/* response */
++};
++
++struct nfsd4_pnfs_layoutcommit {
++	struct nfsd4_pnfs_layoutcommit_arg args;
++	stateid_t		lc_sid;		/* request */
++	struct nfsd4_pnfs_layoutcommit_res res;
++};
++
++enum layoutreturn_flags {
++	LR_FLAG_INTERN = 1 << 0,	/* internal return */
++	LR_FLAG_EXPIRE = 1 << 1,	/* return on client expiration */
++};
++
++struct nfsd4_pnfs_layoutreturn {
++	struct nfsd4_pnfs_layoutreturn_arg args;
++	u32			lr_flags;
++	stateid_t		lr_sid;		/* request/resopnse */
++	u32			lrs_present;	/* response */
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -426,6 +473,13 @@ struct nfsd4_op {
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
+ 		struct nfsd4_reclaim_complete	reclaim_complete;
++#if defined(CONFIG_PNFSD)
++		struct nfsd4_pnfs_getdevlist	pnfs_getdevlist;
++		struct nfsd4_pnfs_getdevinfo	pnfs_getdevinfo;
++		struct nfsd4_pnfs_layoutget	pnfs_layoutget;
++		struct nfsd4_pnfs_layoutcommit	pnfs_layoutcommit;
++		struct nfsd4_pnfs_layoutreturn	pnfs_layoutreturn;
++#endif /* CONFIG_PNFSD */
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
+@@ -28,6 +28,7 @@
+ #include <linux/aio.h>
+ #include <linux/gfp.h>
+ #include <linux/swap.h>
++#include <linux/pnfs_xdr.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -36,6 +37,7 @@
+ #include "internal.h"
+ #include "iostat.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_FILE
+ 
+@@ -388,12 +390,17 @@ static int nfs_write_begin(struct file *
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ 	struct page *page;
+ 	int once_thru = 0;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+ 		file->f_path.dentry->d_name.name,
+ 		mapping->host->i_ino, len, (long long) pos);
+ 
++	pnfs_update_layout(mapping->host,
++			   nfs_file_open_context(file),
++			   0, NFS4_MAX_UINT64, IOMODE_RW,
++			   &lseg);
+ start:
+ 	/*
+ 	 * Prevent starvation issues if someone is doing a consistency
+@@ -402,17 +409,22 @@ start:
+ 	ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ 			nfs_wait_bit_killable, TASK_KILLABLE);
+ 	if (ret)
+-		return ret;
++		goto out;
+ 
+ 	page = grab_cache_page_write_begin(mapping, index, flags);
+-	if (!page)
+-		return -ENOMEM;
++	if (!page) {
++		ret = -ENOMEM;
++		goto out;
++	}
+ 	*pagep = page;
+ 
+-	ret = nfs_flush_incompatible(file, page);
++	ret = nfs_flush_incompatible(file, page, lseg);
+ 	if (ret) {
+ 		unlock_page(page);
+ 		page_cache_release(page);
++		*pagep = NULL;
++		*fsdata = NULL;
++		goto out;
+ 	} else if (!once_thru &&
+ 		   nfs_want_read_modify_write(file, page, pos, len)) {
+ 		once_thru = 1;
+@@ -421,6 +433,12 @@ start:
+ 		if (!ret)
+ 			goto start;
+ 	}
++	ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
++ out:
++	if (ret) {
++		put_lseg(lseg);
++		*fsdata = NULL;
++	}
+ 	return ret;
+ }
+ 
+@@ -430,6 +448,7 @@ static int nfs_write_end(struct file *fi
+ {
+ 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ 	int status;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+@@ -456,10 +475,17 @@ static int nfs_write_end(struct file *fi
+ 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+ 	}
+ 
+-	status = nfs_updatepage(file, page, offset, copied);
++	lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
++	status = pnfs_write_end(file, page, pos, len, copied, lseg);
++	if (status)
++		goto out;
++	status = nfs_updatepage(file, page, offset, copied, lseg, fsdata);
+ 
++ out:
+ 	unlock_page(page);
+ 	page_cache_release(page);
++	pnfs_write_end_cleanup(file, fsdata);
++	put_lseg(lseg);
+ 
+ 	if (status < 0)
+ 		return status;
+@@ -570,6 +596,8 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	/* make sure the cache has finished storing the page */
+ 	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+ 
++	/* XXX Do we want to call pnfs_update_layout here? */
++
+ 	lock_page(page);
+ 	mapping = page->mapping;
+ 	if (mapping != dentry->d_inode->i_mapping)
+@@ -580,11 +608,11 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	if (pagelen == 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_flush_incompatible(filp, page);
++	ret = nfs_flush_incompatible(filp, page, NULL);
+ 	if (ret != 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_updatepage(filp, page, 0, pagelen);
++	ret = nfs_updatepage(filp, page, 0, pagelen, NULL, NULL);
+ out_unlock:
+ 	if (!ret)
+ 		return VM_FAULT_LOCKED;
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
+@@ -48,6 +48,7 @@
+ #include "internal.h"
+ #include "fscache.h"
+ #include "dns_resolve.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -278,7 +279,7 @@ nfs_fhget(struct super_block *sb, struct
+ 		 */
+ 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
+ 		if (S_ISREG(inode->i_mode)) {
+-			inode->i_fop = &nfs_file_operations;
++			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ 			inode->i_data.a_ops = &nfs_file_aops;
+ 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ 		} else if (S_ISDIR(inode->i_mode)) {
+@@ -530,6 +531,68 @@ out:
+ 	return err;
+ }
+ 
++static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
++{
++	atomic_set(&l_ctx->count, 1);
++	l_ctx->lockowner = current->files;
++	l_ctx->pid = current->tgid;
++	INIT_LIST_HEAD(&l_ctx->list);
++}
++
++static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *pos;
++
++	list_for_each_entry(pos, &ctx->lock_context.list, list) {
++		if (pos->lockowner != current->files)
++			continue;
++		if (pos->pid != current->tgid)
++			continue;
++		atomic_inc(&pos->count);
++		return pos;
++	}
++	return NULL;
++}
++
++struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *res, *new = NULL;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	spin_lock(&inode->i_lock);
++	res = __nfs_find_lock_context(ctx);
++	if (res == NULL) {
++		spin_unlock(&inode->i_lock);
++		new = kmalloc(sizeof(*new), GFP_KERNEL);
++		if (new == NULL)
++			return NULL;
++		nfs_init_lock_context(new);
++		spin_lock(&inode->i_lock);
++		res = __nfs_find_lock_context(ctx);
++		if (res == NULL) {
++			list_add_tail(&new->list, &ctx->lock_context.list);
++			new->open_context = ctx;
++			res = new;
++			new = NULL;
++		}
++	}
++	spin_unlock(&inode->i_lock);
++	kfree(new);
++	return res;
++}
++
++void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
++{
++	struct nfs_open_context *ctx = l_ctx->open_context;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
++		return;
++	list_del(&l_ctx->list);
++	spin_unlock(&inode->i_lock);
++	kfree(l_ctx);
++}
++
+ /**
+  * nfs_close_context - Common close_context() routine NFSv2/v3
+  * @ctx: pointer to context
+@@ -566,11 +629,11 @@ static struct nfs_open_context *alloc_nf
+ 		path_get(&ctx->path);
+ 		ctx->cred = get_rpccred(cred);
+ 		ctx->state = NULL;
+-		ctx->lockowner = current->files;
+ 		ctx->flags = 0;
+ 		ctx->error = 0;
+ 		ctx->dir_cookie = 0;
+-		atomic_set(&ctx->count, 1);
++		nfs_init_lock_context(&ctx->lock_context);
++		ctx->lock_context.open_context = ctx;
+ 	}
+ 	return ctx;
+ }
+@@ -578,15 +641,16 @@ static struct nfs_open_context *alloc_nf
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ 	if (ctx != NULL)
+-		atomic_inc(&ctx->count);
++		atomic_inc(&ctx->lock_context.count);
+ 	return ctx;
+ }
++EXPORT_SYMBOL(get_nfs_open_context);
+ 
+ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
+ {
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 
+-	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
++	if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ 		return;
+ 	list_del(&ctx->list);
+ 	spin_unlock(&inode->i_lock);
+@@ -933,6 +997,7 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->time_start = jiffies;
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
++EXPORT_SYMBOL(nfs_fattr_init);
+ 
+ struct nfs_fattr *nfs_alloc_fattr(void)
+ {
+@@ -1142,6 +1207,14 @@ static int nfs_update_inode(struct inode
+ 		server->fsid = fattr->fsid;
+ 
+ 	/*
++	 * file needs layout commit, server attributes may be stale
++	 */
++	if (layoutcommit_needed(nfsi) && nfsi->change_attr >= fattr->change_attr) {
++		dprintk("NFS: %s: layoutcommit is needed for file %s/%ld\n",
++			__func__, inode->i_sb->s_id, inode->i_ino);
++		return 0;
++	}
++	/*
+ 	 * Update the read time so we don't revalidate too often.
+ 	 */
+ 	nfsi->read_cache_jiffies = fattr->time_start;
+@@ -1340,9 +1413,10 @@ static int nfs_update_inode(struct inode
+  */
+ void nfs4_clear_inode(struct inode *inode)
+ {
++	pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	/* If we are holding a delegation, return it! */
+ 	nfs_inode_return_delegation_noreclaim(inode);
+-	/* First call standard NFS clear_inode() code */
+ 	nfs_clear_inode(inode);
+ }
+ #endif
+@@ -1367,7 +1441,10 @@ struct inode *nfs_alloc_inode(struct sup
+ 
+ void nfs_destroy_inode(struct inode *inode)
+ {
+-	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
++	struct nfs_inode *nfsi = NFS_I(inode);
++
++	pnfs_destroy_layout(nfsi);
++	kmem_cache_free(nfs_inode_cachep, nfsi);
+ }
+ 
+ static inline void nfs4_init_once(struct nfs_inode *nfsi)
+@@ -1377,6 +1454,11 @@ static inline void nfs4_init_once(struct
+ 	nfsi->delegation = NULL;
+ 	nfsi->delegation_state = 0;
+ 	init_rwsem(&nfsi->rwsem);
++#ifdef CONFIG_NFS_V4_1
++	init_waitqueue_head(&nfsi->lo_waitq);
++	nfsi->pnfs_layout_suspend = 0;
++	nfsi->layout = NULL;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif
+ }
+ 
+@@ -1488,6 +1570,12 @@ static int __init init_nfs_fs(void)
+ 	if (err)
+ 		goto out0;
+ 
++#ifdef CONFIG_NFS_V4_1
++	err = pnfs_initialize();
++	if (err)
++		goto out00;
++#endif /* CONFIG_NFS_V4_1 */
++
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_register(&nfs_rpcstat);
+ #endif
+@@ -1498,6 +1586,10 @@ out:
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++out00:
++	pnfs_uninitialize();
++#endif /* CONFIG_NFS_V4_1 */
+ 	nfs_destroy_directcache();
+ out0:
+ 	nfs_destroy_writepagecache();
+@@ -1531,6 +1623,9 @@ static void __exit exit_nfs_fs(void)
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++	pnfs_uninitialize();
++#endif
+ 	unregister_nfs_fs();
+ 	nfs_fs_proc_exit();
+ 	nfsiod_stop();
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
+@@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
+ 					   struct nfs_fattr *);
+ extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
+ extern int nfs4_check_client_ready(struct nfs_client *clp);
++extern int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++		const struct sockaddr *sa2);
++extern int nfs4_set_client(struct nfs_server *server,
++		const char *hostname,
++		const struct sockaddr *addr,
++		const size_t addrlen,
++		const char *ip_addr,
++		rpc_authflavor_t authflavour,
++		int proto, const struct rpc_timeout *timeparms,
++		u32 minorversion);
+ #ifdef CONFIG_PROC_FS
+ extern int __init nfs_fs_proc_init(void);
+ extern void nfs_fs_proc_exit(void);
+@@ -201,6 +211,8 @@ extern const u32 nfs41_maxwrite_overhead
+ extern struct rpc_procinfo nfs4_procedures[];
+ #endif
+ 
++extern int nfs4_recover_expired_lease(struct nfs_client *clp);
++
+ /* proc.c */
+ void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+ 
+@@ -248,10 +260,31 @@ extern int nfs4_get_rootfh(struct nfs_se
+ #endif
+ 
+ /* read.c */
++extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
++extern int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
+ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+ 
+ /* write.c */
++extern int nfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int pnfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int nfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++			       int how);
++extern int pnfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++				int how, int pnfs);
+ extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
++extern void nfs_mark_list_commit(struct list_head *head);
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+ 		struct page *, struct page *);
+diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
+--- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
+@@ -79,10 +79,48 @@ config NFS_V4_1
+ 	depends on NFS_V4 && EXPERIMENTAL
+ 	help
+ 	  This option enables support for minor version 1 of the NFSv4 protocol
+-	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
++	  (RFC5661) including support for the parallel NFS (pNFS) features
++	  in the kernel's NFS client.
+ 
+ 	  Unless you're an NFS developer, say N.
+ 
++config PNFS_FILE_LAYOUT
++	tristate "NFS client support for the pNFS nfs-files layout (DEVELOPER ONLY)"
++	depends on NFS_FS && NFS_V4_1
++	default y
++	help
++	  This option enables support for the pNFS nfs-files layout.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFS_OBJLAYOUT
++	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
++	help
++	  Say M here if you want your pNFS client to support the Objects Layout Driver.
++	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
++	  upper level driver (SCSI_OSD_ULD).
++
++	  If unsure, say N.
++
++config PNFS_PANLAYOUT
++	tristate "Provide support for the Panasas OSD Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on PNFS_OBJLAYOUT
++	help
++	  Say M or y here if you want your pNFS client to support the Panasas OSD Layout Driver.
++
++	  If unsure, say N.
++
++config PNFS_BLOCK
++	tristate "Provide a pNFS block client (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1
++	select MD
++	select BLK_DEV_DM
++	help
++	  Say M or y here if you want your pNfs client to support the block protocol
++
++	  If unsure, say N.
++
+ config ROOT_NFS
+ 	bool "Root file system on NFS"
+ 	depends on NFS_FS=y && IP_PNP
+diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
+--- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
+@@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
+ 			   delegation.o idmap.o \
+ 			   callback.o callback_xdr.o callback_proc.o \
+ 			   nfs4namespace.o
++nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
+ nfs-$(CONFIG_SYSCTL) += sysctl.o
+ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
++
++obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
++nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
++
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
++obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
+@@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs3_dir_inode_operations,
+ 	.file_inode_ops	= &nfs3_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs3_proc_get_root,
+ 	.getattr	= nfs3_proc_getattr,
+ 	.setattr	= nfs3_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
+@@ -0,0 +1,765 @@
++/*
++ *  linux/fs/nfs/nfs4filelayout.c
++ *
++ *  Module for the pnfs nfs4 file layout driver.
++ *  Defines all I/O and Policy interface operations, plus code
++ *  to register itself with the pNFS client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/time.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++
++#include "nfs4filelayout.h"
++#include "nfs4_fs.h"
++#include "internal.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dean Hildebrand <dhildebz@eecs.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4 file layout driver");
++
++/* Callback operations to the pNFS client */
++struct pnfs_client_operations *pnfs_callback_ops;
++
++/* Forward declaration */
++struct layoutdriver_io_operations filelayout_io_operations;
++
++int
++filelayout_initialize_mountpoint(struct nfs_server *nfss,
++				 const struct nfs_fh *mntfh)
++{
++	int status = nfs4_alloc_init_deviceid_cache(nfss->nfs_client,
++						nfs4_fl_free_deviceid_callback);
++	if (status) {
++		printk(KERN_WARNING "%s: deviceid cache could not be "
++			"initialized\n", __func__);
++		return status;
++	}
++	dprintk("%s: deviceid cache has been initialized successfully\n",
++		__func__);
++	return 0;
++}
++
++/* Uninitialize a mountpoint by destroying its device list */
++int
++filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
++{
++	dprintk("--> %s\n", __func__);
++
++	if (nfss->pnfs_curr_ld && nfss->nfs_client->cl_devid_cache)
++		nfs4_put_deviceid_cache(nfss->nfs_client);
++	return 0;
++}
++
++/* This function is used by the layout driver to calculate the
++ * offset of the file on the dserver based on whether the
++ * layout type is STRIPE_DENSE or STRIPE_SPARSE
++ */
++static loff_t
++filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++
++	switch (flseg->stripe_type) {
++	case STRIPE_SPARSE:
++		return offset;
++
++	case STRIPE_DENSE:
++	{
++		u32 stripe_width;
++		u64 tmp, off;
++		u32 unit = flseg->stripe_unit;
++
++		stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
++		tmp = off = offset - flseg->pattern_offset;
++		do_div(tmp, stripe_width);
++		return tmp * unit + do_div(off, unit);
++	}
++	default:
++		BUG();
++	}
++
++	/* We should never get here... just to stop the gcc warning */
++	return 0;
++}
++
++/*
++ * Call ops for the async read/write cases
++ * In the case of dense layouts, the offset needs to be reset to its
++ * original value.
++ */
++static void filelayout_read_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	if (rdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			rdata->args.offset, rdata->fldata.orig_offset);
++		rdata->args.offset = rdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	rdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_read_release(void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	put_lseg(rdata->pdata.lseg);
++	rdata->pdata.lseg = NULL;
++	rdata->pdata.call_ops->rpc_release(data);
++}
++
++static void filelayout_write_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	if (wdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			wdata->args.offset, wdata->fldata.orig_offset);
++		wdata->args.offset = wdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_write_release(void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	put_lseg(wdata->pdata.lseg);
++	wdata->pdata.lseg = NULL;
++	wdata->pdata.call_ops->rpc_release(data);
++}
++
++struct rpc_call_ops filelayout_read_call_ops = {
++	.rpc_call_prepare = nfs_read_prepare,
++	.rpc_call_done = filelayout_read_call_done,
++	.rpc_release = filelayout_read_release,
++};
++
++struct rpc_call_ops filelayout_write_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_write_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/* Perform sync or async reads.
++ *
++ * An optimization for the NFS file layout driver
++ * allows the original read/write data structs to be passed in the
++ * last argument.
++ *
++ * TODO: join with write_pagelist?
++ */
++static enum pnfs_try_status
++filelayout_read_pagelist(struct nfs_read_data *data, unsigned nr_pages)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
++		__func__, data->inode->i_ino, nr_pages,
++		data->args.pgbase, (size_t)data->args.count, offset);
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s USE DS:ip %x %s\n", __func__,
++		htonl(ds->ds_ip_addr), ds->r_addr);
++
++	/* just try the first data server for the index..*/
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++
++	/*
++	 * Now get the file offset on the dserver
++	 * Set the read offset to this offset, and
++	 * save the original offset in orig_offset
++	 * In the case of aync reads, the offset will be reset in the
++	 * call_ops->rpc_call_done() routine.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/* Perform an asynchronous read */
++	nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
++			  &filelayout_read_call_ops);
++
++	data->pdata.pnfs_error = 0;
++
++	return PNFS_ATTEMPTED;
++}
++
++/* Perform async writes. */
++static enum pnfs_try_status
++filelayout_write_pagelist(struct nfs_write_data *data, unsigned nr_pages, int sync)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu %s\n", __func__,
++		data->inode->i_ino, sync, (size_t) data->args.count, offset,
++		htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
++
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++	/*
++	 * Get the file offset on the dserver. Set the write offset to
++	 * this offset and save the original offset.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/*
++	 * Perform an asynchronous write The offset will be reset in the
++	 * call_ops->rpc_call_done() routine
++	 */
++	nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
++			   &filelayout_write_call_ops, sync);
++
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++}
++
++/*
++ * Create a filelayout layout structure and return it.  The pNFS client
++ * will use the pnfs_layout_type type to refer to the layout for this
++ * inode from now on.
++ */
++static struct pnfs_layout_type *
++filelayout_alloc_layout(struct inode *inode)
++{
++	struct nfs4_filelayout *flp;
++
++	dprintk("NFS_FILELAYOUT: allocating layout\n");
++	flp =  kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
++	return flp ? &flp->fl_layout : NULL;
++}
++
++/* Free a filelayout layout structure */
++static void
++filelayout_free_layout(struct pnfs_layout_type *lo)
++{
++	dprintk("NFS_FILELAYOUT: freeing layout\n");
++	kfree(FILE_LO(lo));
++}
++
++/*
++ * filelayout_check_layout()
++ *
++ * Make sure layout segment parameters are sane WRT the device.
++ *
++ * Notes:
++ * 1) current code insists that # stripe index = # data servers in ds_list
++ *    which is wrong.
++ * 2) pattern_offset is ignored and must == 0 which is wrong;
++ * 3) the pattern_offset needs to be a mutliple of the stripe unit.
++ * 4) stripe unit is multiple of page size
++ */
++
++static int
++filelayout_check_layout(struct pnfs_layout_type *lo,
++			struct pnfs_layout_segment *lseg)
++{
++	struct nfs4_filelayout_segment *fl = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	int status = -EINVAL;
++	struct nfs_server *nfss = NFS_SERVER(PNFS_INODE(lo));
++
++	dprintk("--> %s\n", __func__);
++	dsaddr = nfs4_pnfs_device_item_find(nfss->nfs_client, &fl->dev_id);
++	if (dsaddr == NULL) {
++		dsaddr = get_device_info(PNFS_INODE(lo), &fl->dev_id);
++		if (dsaddr == NULL) {
++			dprintk("%s NO device for dev_id %s\n",
++				__func__, deviceid_fmt(&fl->dev_id));
++			goto out;
++		}
++	}
++	if (fl->first_stripe_index < 0 ||
++	    fl->first_stripe_index > dsaddr->stripe_count) {
++		dprintk("%s Bad first_stripe_index %d\n",
++				__func__, fl->first_stripe_index);
++		goto out;
++	}
++
++	if (fl->pattern_offset != 0) {
++		dprintk("%s Unsupported no-zero pattern_offset %Ld\n",
++				__func__, fl->pattern_offset);
++		goto out;
++	}
++
++	if (fl->stripe_unit % PAGE_SIZE) {
++		dprintk("%s Stripe unit (%u) not page aligned\n",
++			__func__, fl->stripe_unit);
++		goto out;
++	}
++
++	/* XXX only support SPARSE packing. Don't support use MDS open fh */
++	if (!(fl->num_fh == 1 || fl->num_fh == dsaddr->ds_num)) {
++		dprintk("%s num_fh %u not equal to 1 or ds_num %u\n",
++			__func__, fl->num_fh, dsaddr->ds_num);
++		goto out;
++	}
++
++	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
++		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
++			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
++			nfss->wsize);
++	}
++
++	/* reference the device */
++	nfs4_set_layout_deviceid(lseg, &dsaddr->deviceid);
++
++	status = 0;
++out:
++	dprintk("--> %s returns %d\n", __func__, status);
++	return status;
++}
++
++static void _filelayout_free_lseg(struct pnfs_layout_segment *lseg);
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl);
++
++/* Decode layout and store in layoutid.  Overwrite any existing layout
++ * information for this file.
++ */
++static int
++filelayout_set_layout(struct nfs4_filelayout *flo,
++		      struct nfs4_filelayout_segment *fl,
++		      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t nfl_util;
++	int i;
++
++	dprintk("%s: set_layout_map Begin\n", __func__);
++
++	memcpy(&fl->dev_id, p, NFS4_PNFS_DEVICEID4_SIZE);
++	p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++	nfl_util = be32_to_cpup(p++);
++	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
++		fl->commit_through_mds = 1;
++	if (nfl_util & NFL4_UFLG_DENSE)
++		fl->stripe_type = STRIPE_DENSE;
++	else
++		fl->stripe_type = STRIPE_SPARSE;
++	fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
++
++	if (!flo->stripe_unit)
++		flo->stripe_unit = fl->stripe_unit;
++	else if (flo->stripe_unit != fl->stripe_unit) {
++		printk(KERN_NOTICE "%s: updating strip_unit from %u to %u\n",
++			__func__, flo->stripe_unit, fl->stripe_unit);
++		flo->stripe_unit = fl->stripe_unit;
++	}
++
++	fl->first_stripe_index = be32_to_cpup(p++);
++	p = xdr_decode_hyper(p, &fl->pattern_offset);
++	fl->num_fh = be32_to_cpup(p++);
++
++	dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu dev_id %s\n",
++		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
++		fl->pattern_offset, deviceid_fmt(&fl->dev_id));
++
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE) {
++		fl->fh_array = vmalloc(fl->num_fh * sizeof(struct nfs_fh));
++		if (fl->fh_array)
++			memset(fl->fh_array, 0,
++				fl->num_fh * sizeof(struct nfs_fh));
++	} else {
++		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh),
++					GFP_KERNEL);
++       }
++	if (!fl->fh_array)
++		return -ENOMEM;
++
++	for (i = 0; i < fl->num_fh; i++) {
++		/* fh */
++		fl->fh_array[i].size = be32_to_cpup(p++);
++		if (sizeof(struct nfs_fh) < fl->fh_array[i].size) {
++			printk(KERN_ERR "Too big fh %d received %d\n",
++				i, fl->fh_array[i].size);
++			/* Layout is now invalid, pretend it doesn't exist */
++			filelayout_free_fh_array(fl);
++			fl->num_fh = 0;
++			break;
++		}
++		memcpy(fl->fh_array[i].data, p, fl->fh_array[i].size);
++		p += XDR_QUADLEN(fl->fh_array[i].size);
++		dprintk("DEBUG: %s: fh len %d\n", __func__,
++					fl->fh_array[i].size);
++	}
++
++	return 0;
++}
++
++static struct pnfs_layout_segment *
++filelayout_alloc_lseg(struct pnfs_layout_type *layoutid,
++		      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++	struct pnfs_layout_segment *lseg;
++	int rc;
++
++	dprintk("--> %s\n", __func__);
++	lseg = kzalloc(sizeof(struct pnfs_layout_segment) +
++		       sizeof(struct nfs4_filelayout_segment), GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++
++	rc = filelayout_set_layout(flo, LSEG_LD_DATA(lseg), lgr);
++
++	if (rc != 0 || filelayout_check_layout(layoutid, lseg)) {
++		_filelayout_free_lseg(lseg);
++		lseg = NULL;
++	}
++	return lseg;
++}
++
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
++{
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE)
++		vfree(fl->fh_array);
++	else
++		kfree(fl->fh_array);
++
++	fl->fh_array = NULL;
++}
++
++static void
++_filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	filelayout_free_fh_array(LSEG_LD_DATA(lseg));
++	kfree(lseg);
++}
++
++static void
++filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("--> %s\n", __func__);
++	nfs4_unset_layout_deviceid(lseg, lseg->deviceid,
++				   nfs4_fl_free_deviceid_callback);
++	_filelayout_free_lseg(lseg);
++}
++
++/* Allocate a new nfs_write_data struct and initialize */
++static struct nfs_write_data *
++filelayout_clone_write_data(struct nfs_write_data *old)
++{
++	static struct nfs_write_data *new;
++
++	new = nfs_commitdata_alloc();
++	if (!new)
++		goto out;
++	kref_init(&new->refcount);
++	new->parent      = old;
++	kref_get(&old->refcount);
++	new->inode       = old->inode;
++	new->cred        = old->cred;
++	new->args.offset = 0;
++	new->args.count  = 0;
++	new->res.count   = 0;
++	new->res.fattr   = &new->fattr;
++	nfs_fattr_init(&new->fattr);
++	new->res.verf    = &new->verf;
++	new->args.context = get_nfs_open_context(old->args.context);
++	new->pdata.lseg = NULL;
++	new->pdata.call_ops = old->pdata.call_ops;
++	new->pdata.how = old->pdata.how;
++out:
++	return new;
++}
++
++static void filelayout_commit_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static struct rpc_call_ops filelayout_commit_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_commit_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/*
++ * Execute a COMMIT op to the MDS or to each data server on which a page
++ * in 'pages' exists.
++ * Invoke the pnfs_commit_complete callback.
++ */
++enum pnfs_try_status
++filelayout_commit(struct nfs_write_data *data, int sync)
++{
++	LIST_HEAD(head);
++	struct nfs_page *req;
++	loff_t file_offset = 0;
++	u16 idx, i;
++	struct list_head **ds_page_list = NULL;
++	u16 *indices_used;
++	int num_indices_seen = 0;
++	const struct rpc_call_ops *call_ops;
++	struct rpc_clnt *clnt;
++	struct nfs_write_data **clone_list = NULL;
++	struct nfs_write_data *dsdata;
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("%s data %p sync %d\n", __func__, data, sync);
++
++	/* Alloc room for both in one go */
++	ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
++			       (sizeof(u16) + sizeof(struct list_head *)),
++			       GFP_KERNEL);
++	if (!ds_page_list)
++		goto mem_error;
++	indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
++	/*
++	 * Sort pages based on which ds to send to.
++	 * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
++	 * Note we are assuming there is only a single lseg in play.
++	 * When that is not true, we could first sort on lseg, then
++	 * sort within each as we do here.
++	 */
++	while (!list_empty(&data->pages)) {
++		req = nfs_list_entry(data->pages.next);
++		nfs_list_remove_request(req);
++		if (!req->wb_lseg ||
++		    ((struct nfs4_filelayout_segment *)
++		     LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
++			idx = NFS4_PNFS_MAX_MULTI_CNT;
++		else {
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
++		}
++		if (ds_page_list[idx]) {
++			/* Already seen this idx */
++			list_add(&req->wb_list, ds_page_list[idx]);
++		} else {
++			/* New idx not seen so far */
++			list_add_tail(&req->wb_list, &head);
++			indices_used[num_indices_seen++] = idx;
++		}
++		ds_page_list[idx] = &req->wb_list;
++	}
++	/* Once created, clone must be released via call_op */
++	clone_list = kzalloc(num_indices_seen *
++			     sizeof(struct nfs_write_data *), GFP_KERNEL);
++	if (!clone_list)
++		goto mem_error;
++	for (i = 0; i < num_indices_seen - 1; i++) {
++		clone_list[i] = filelayout_clone_write_data(data);
++		if (!clone_list[i])
++			goto mem_error;
++	}
++	clone_list[i] = data;
++	/*
++	 * Now send off the RPCs to each ds.  Note that it is important
++	 * that any RPC to the MDS be sent last (or at least after all
++	 * clones have been made.)
++	 */
++	for (i = 0; i < num_indices_seen; i++) {
++		dsdata = clone_list[i];
++		idx = indices_used[i];
++		list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
++		if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
++			call_ops = data->pdata.call_ops;;
++			clnt = NFS_CLIENT(dsdata->inode);
++			ds = NULL;
++		} else {
++			struct nfs_fh *fh;
++
++			call_ops = &filelayout_commit_call_ops;
++			req = nfs_list_entry(dsdata->pages.next);
++			ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
++			if (!ds) {
++				/* Trigger retry of this chunk through MDS */
++				dsdata->task.tk_status = -EIO;
++				data->pdata.call_ops->rpc_release(dsdata);
++				continue;
++			}
++			clnt = ds->ds_clp->cl_rpcclient;
++			dsdata->fldata.ds_nfs_client = ds->ds_clp;
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
++			if (fh)
++				dsdata->args.fh = fh;
++		}
++		dprintk("%s: Initiating commit: %llu USE DS:\n",
++			__func__, file_offset);
++		print_ds(ds);
++
++		/* Send COMMIT to data server */
++		nfs_initiate_commit(dsdata, clnt, call_ops, sync);
++	}
++	kfree(clone_list);
++	kfree(ds_page_list);
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++
++ mem_error:
++	if (clone_list) {
++		for (i = 0; i < num_indices_seen - 1; i++) {
++			if (!clone_list[i])
++				break;
++			data->pdata.call_ops->rpc_release(clone_list[i]);
++		}
++		kfree(clone_list);
++	}
++	kfree(ds_page_list);
++	/* One of these will be empty, but doesn't hurt to do both */
++	nfs_mark_list_commit(&head);
++	nfs_mark_list_commit(&data->pages);
++	data->pdata.call_ops->rpc_release(data);
++	return PNFS_ATTEMPTED;
++}
++
++/* Return the stripesize for the specified file */
++ssize_t
++filelayout_get_stripesize(struct pnfs_layout_type *layoutid)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++
++	return flo->stripe_unit;
++}
++
++/*
++ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
++ *
++ * return 1 :  coalesce page
++ * return 0 :  don't coalesce page
++ */
++int
++filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++		   struct nfs_page *req)
++{
++	u64 p_stripe, r_stripe;
++
++	if (pgio->pg_boundary == 0)
++		return 1;
++	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
++	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
++
++	do_div(p_stripe, pgio->pg_boundary);
++	do_div(r_stripe, pgio->pg_boundary);
++
++	return (p_stripe == r_stripe);
++}
++
++struct layoutdriver_io_operations filelayout_io_operations = {
++	.commit                  = filelayout_commit,
++	.read_pagelist           = filelayout_read_pagelist,
++	.write_pagelist          = filelayout_write_pagelist,
++	.alloc_layout            = filelayout_alloc_layout,
++	.free_layout             = filelayout_free_layout,
++	.alloc_lseg              = filelayout_alloc_lseg,
++	.free_lseg               = filelayout_free_lseg,
++	.initialize_mountpoint   = filelayout_initialize_mountpoint,
++	.uninitialize_mountpoint = filelayout_uninitialize_mountpoint,
++};
++
++struct layoutdriver_policy_operations filelayout_policy_operations = {
++	.flags                 = PNFS_USE_RPC_CODE,
++	.get_stripesize        = filelayout_get_stripesize,
++	.pg_test               = filelayout_pg_test,
++};
++
++struct pnfs_layoutdriver_type filelayout_type = {
++	.id = LAYOUT_NFSV4_1_FILES,
++	.name = "LAYOUT_NFSV4_1_FILES",
++	.ld_io_ops = &filelayout_io_operations,
++	.ld_policy_ops = &filelayout_policy_operations,
++};
++
++static int __init nfs4filelayout_init(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
++	       __func__);
++
++	/*
++	 * Need to register file_operations struct with global list to indicate
++	 * that NFS4 file layout is a possible pNFS I/O module
++	 */
++	pnfs_callback_ops = pnfs_register_layoutdriver(&filelayout_type);
++
++	return 0;
++}
++
++static void __exit nfs4filelayout_exit(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
++	       __func__);
++
++	/* Unregister NFS4 file layout driver with pNFS client*/
++	pnfs_unregister_layoutdriver(&filelayout_type);
++}
++
++module_init(nfs4filelayout_init);
++module_exit(nfs4filelayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
+@@ -0,0 +1,636 @@
++/*
++ *  linux/fs/nfs/nfs4filelayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *  Garth Goodson   <Garth.Goodson@netapp.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/hash.h>
++
++#include <linux/nfs4.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_xdr.h>
++
++#include <asm/div64.h>
++
++#include <linux/utsname.h>
++#include <linux/vmalloc.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
++#include "nfs4filelayout.h"
++#include "internal.h"
++#include "nfs4_fs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
++
++DEFINE_SPINLOCK(nfs4_ds_cache_lock);
++static LIST_HEAD(nfs4_data_server_cache);
++
++void
++print_ds(struct nfs4_pnfs_ds *ds)
++{
++	if (ds == NULL) {
++		dprintk("%s NULL device \n", __func__);
++		return;
++	}
++	dprintk("        ip_addr %x\n", ntohl(ds->ds_ip_addr));
++	dprintk("        port %hu\n", ntohs(ds->ds_port));
++	dprintk("        client %p\n", ds->ds_clp);
++	dprintk("        ref count %d\n", atomic_read(&ds->ds_count));
++	if (ds->ds_clp)
++		dprintk("        cl_exchange_flags %x\n",
++					    ds->ds_clp->cl_exchange_flags);
++	dprintk("        ip:port %s\n", ds->r_addr);
++}
++
++void
++print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	int i;
++
++	dprintk("%s dsaddr->ds_num %d\n", __func__,
++		dsaddr->ds_num);
++	for (i = 0; i < dsaddr->ds_num; i++)
++		print_ds(dsaddr->ds_list[i]);
++}
++
++/* Debugging function assuming a 64bit major/minor split of the deviceid */
++char *
++deviceid_fmt(const struct pnfs_deviceid *dev_id)
++{
++	static char buf[17];
++	uint32_t *p = (uint32_t *)dev_id->data;
++	uint64_t major, minor;
++
++	p = xdr_decode_hyper(p, &major);
++	p = xdr_decode_hyper(p, &minor);
++
++	sprintf(buf, "%08llu %08llu", major, minor);
++	return buf;
++}
++
++/* nfs4_ds_cache_lock is held */
++static inline struct nfs4_pnfs_ds *
++_data_server_lookup(u32 ip_addr, u32 port)
++{
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
++			ntohl(ip_addr), ntohs(port));
++
++	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
++		if (ds->ds_ip_addr == ip_addr &&
++		    ds->ds_port == port) {
++			return ds;
++		}
++	}
++	return NULL;
++}
++
++/* Create an rpc to the data server defined in 'dev_list' */
++static int
++nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
++{
++	struct nfs_server	*tmp;
++	struct sockaddr_in	sin;
++	struct rpc_clnt 	*mds_clnt = mds_srv->client;
++	struct nfs_client	*clp = mds_srv->nfs_client;
++	struct sockaddr		*mds_addr;
++	int err = 0;
++
++	dprintk("--> %s ip:port %s au_flavor %d\n", __func__,
++		ds->r_addr, mds_clnt->cl_auth->au_flavor);
++
++	sin.sin_family = AF_INET;
++	sin.sin_addr.s_addr = ds->ds_ip_addr;
++	sin.sin_port = ds->ds_port;
++
++	/*
++	 * If this DS is also the MDS, use the MDS session only if the
++	 * MDS exchangeid flags show the EXCHGID4_FLAG_USE_PNFS_DS pNFS role.
++	 */
++	mds_addr = (struct sockaddr *)&clp->cl_addr;
++	if (nfs_sockaddr_cmp((struct sockaddr *)&sin, mds_addr)) {
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++			printk(KERN_INFO "ip:port %s is not a pNFS Data "
++				"Server\n", ds->r_addr);
++			err = -ENODEV;
++		} else {
++			atomic_inc(&clp->cl_count);
++			ds->ds_clp = clp;
++			dprintk("%s Using MDS Session for DS\n", __func__);
++		}
++		goto out;
++	}
++
++	/* Temporay server for nfs4_set_client */
++	tmp = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
++	if (!tmp)
++		goto out;
++
++	/*
++	 * Set a retrans, timeout interval, and authflavor equual to the MDS
++	 * values. Use the MDS nfs_client cl_ipaddr field so as to use the
++	 * same co_ownerid as the MDS.
++	 */
++	err = nfs4_set_client(tmp,
++			      mds_srv->nfs_client->cl_hostname,
++			      (struct sockaddr *)&sin,
++			      sizeof(struct sockaddr),
++			      mds_srv->nfs_client->cl_ipaddr,
++			      mds_clnt->cl_auth->au_flavor,
++			      IPPROTO_TCP,
++			      mds_clnt->cl_xprt->timeout,
++			      1 /* minorversion */);
++	if (err < 0)
++		goto out_free;
++
++	clp = tmp->nfs_client;
++
++	/* Ask for only the EXCHGID4_FLAG_USE_PNFS_DS pNFS role */
++	dprintk("%s EXCHANGE_ID for clp %p\n", __func__, clp);
++	clp->cl_exchange_flags = EXCHGID4_FLAG_USE_PNFS_DS;
++
++	err = nfs4_recover_expired_lease(clp);
++	if (!err)
++		err = nfs4_check_client_ready(clp);
++	if (err)
++		goto out_put;
++
++	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++		printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
++			ds->r_addr);
++		err = -ENODEV;
++		goto out_put;
++	}
++	/*
++	 * Mask the (possibly) returned EXCHGID4_FLAG_USE_PNFS_MDS pNFS role
++	 * The is_ds_only_session depends on this.
++	 */
++	clp->cl_exchange_flags &= ~EXCHGID4_FLAG_USE_PNFS_MDS;
++	/*
++	 * Set DS lease equal to the MDS lease, renewal is scheduled in
++	 * create_session
++	 */
++	spin_lock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
++	spin_unlock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_last_renewal = jiffies;
++
++	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
++	ds->ds_clp = clp;
++
++	dprintk("%s: ip=%x, port=%hu, rpcclient %p\n", __func__,
++				ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
++				clp->cl_rpcclient);
++out_free:
++	kfree(tmp);
++out:
++	dprintk("%s Returns %d\n", __func__, err);
++	return err;
++out_put:
++	nfs_put_client(clp);
++	goto out_free;
++}
++
++static void
++destroy_ds(struct nfs4_pnfs_ds *ds)
++{
++	dprintk("--> %s\n", __func__);
++	print_ds(ds);
++
++	if (ds->ds_clp)
++		nfs_put_client(ds->ds_clp);
++	kfree(ds);
++}
++
++static void
++nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	struct nfs4_pnfs_ds *ds;
++	int i;
++
++	dprintk("%s: device id=%s\n", __func__,
++		deviceid_fmt(&dsaddr->deviceid.de_id));
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		ds = dsaddr->ds_list[i];
++		if (ds != NULL) {
++			if (atomic_dec_and_lock(&ds->ds_count,
++						&nfs4_ds_cache_lock)) {
++				list_del_init(&ds->ds_node);
++				spin_unlock(&nfs4_ds_cache_lock);
++				destroy_ds(ds);
++			}
++		}
++	}
++	kfree(dsaddr->stripe_indices);
++	kfree(dsaddr);
++}
++
++void
++nfs4_fl_free_deviceid_callback(struct kref *kref)
++{
++	struct nfs4_deviceid *device =
++		container_of(kref, struct nfs4_deviceid, de_kref);
++	struct nfs4_file_layout_dsaddr *dsaddr =
++		container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
++
++	nfs4_fl_free_deviceid(dsaddr);
++}
++
++static void
++nfs4_pnfs_ds_add(struct inode *inode, struct nfs4_pnfs_ds **dsp,
++		 u32 ip_addr, u32 port, char *r_addr, int len)
++{
++	struct nfs4_pnfs_ds *tmp_ds, *ds;
++
++	*dsp = NULL;
++
++	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
++	if (!ds)
++		return;
++
++	spin_lock(&nfs4_ds_cache_lock);
++	tmp_ds = _data_server_lookup(ip_addr, port);
++	if (tmp_ds == NULL) {
++		ds->ds_ip_addr = ip_addr;
++		ds->ds_port = port;
++		strncpy(ds->r_addr, r_addr, len);
++		atomic_set(&ds->ds_count, 1);
++		INIT_LIST_HEAD(&ds->ds_node);
++		ds->ds_clp = NULL;
++		list_add(&ds->ds_node, &nfs4_data_server_cache);
++		*dsp = ds;
++		dprintk("%s add new data server ip 0x%x\n", __func__,
++				ds->ds_ip_addr);
++		spin_unlock(&nfs4_ds_cache_lock);
++	} else {
++		atomic_inc(&tmp_ds->ds_count);
++		*dsp = tmp_ds;
++		dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
++				__func__, tmp_ds->ds_ip_addr,
++				atomic_read(&tmp_ds->ds_count));
++		spin_unlock(&nfs4_ds_cache_lock);
++		kfree(ds);
++	}
++}
++
++static struct nfs4_pnfs_ds *
++decode_and_add_ds(uint32_t **pp, struct inode *inode)
++{
++	struct nfs4_pnfs_ds *ds = NULL;
++	char r_addr[29]; /* max size of ip/port string */
++	int len;
++	u32 ip_addr, port;
++	int tmp[6];
++	uint32_t *p = *pp;
++
++	dprintk("%s enter\n", __func__);
++	/* check and skip r_netid */
++	len = be32_to_cpup(p++);
++	/* "tcp" */
++	if (len != 3) {
++		printk("%s: ERROR: non TCP r_netid len %d\n",
++			__func__, len);
++		goto out_err;
++	}
++	/*
++	 * Read the bytes into a temporary buffer
++	 * XXX: should probably sanity check them
++	 */
++	tmp[0] = be32_to_cpup(p++);
++
++	len = be32_to_cpup(p++);
++	if (len >= sizeof(r_addr)) {
++		printk("%s: ERROR: Device ip/port too long (%d)\n",
++			__func__, len);
++		goto out_err;
++	}
++	memcpy(r_addr, p, len);
++	p += XDR_QUADLEN(len);
++	*pp = p;
++	r_addr[len] = '\0';
++	sscanf(r_addr, "%d.%d.%d.%d.%d.%d", &tmp[0], &tmp[1],
++	       &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
++	ip_addr = htonl((tmp[0]<<24) | (tmp[1]<<16) | (tmp[2]<<8) | (tmp[3]));
++	port = htons((tmp[4] << 8) | (tmp[5]));
++
++	nfs4_pnfs_ds_add(inode, &ds, ip_addr, port, r_addr, len);
++
++	dprintk("%s: addr:port string = %s\n", __func__, r_addr);
++	return ds;
++out_err:
++	dprintk("%s returned NULL\n", __func__);
++	return NULL;
++}
++
++/* Decode opaque device data and return the result */
++static struct nfs4_file_layout_dsaddr*
++decode_device(struct inode *ino, struct pnfs_device *pdev)
++{
++	int i, dummy;
++	u32 cnt, num;
++	u8 *indexp;
++	uint32_t *p = (u32 *)pdev->area, *indicesp;
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	/* Get the stripe count (number of stripe index) */
++	cnt = be32_to_cpup(p++);
++	dprintk("%s stripe count  %d\n", __func__, cnt);
++	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
++		printk(KERN_WARNING "%s: stripe count %d greater than "
++		       "supported maximum %d\n", __func__,
++			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
++		goto out_err;
++	}
++
++	/* Check the multipath list count */
++	indicesp = p;
++	p += XDR_QUADLEN(cnt << 2);
++	num = be32_to_cpup(p++);
++	dprintk("%s ds_num %u\n", __func__, num);
++	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
++		printk(KERN_WARNING "%s: multipath count %d greater than "
++			"supported maximum %d\n", __func__,
++			num, NFS4_PNFS_MAX_MULTI_CNT);
++		goto out_err;
++	}
++	dsaddr = kzalloc(sizeof(*dsaddr) +
++			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
++			GFP_KERNEL);
++	if (!dsaddr)
++		goto out_err;
++
++	dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
++	if (!dsaddr->stripe_indices)
++		goto out_err_free;
++
++	dsaddr->stripe_count = cnt;
++	dsaddr->ds_num = num;
++
++	memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id,
++	       NFS4_PNFS_DEVICEID4_SIZE);
++
++	/* Go back an read stripe indices */
++	p = indicesp;
++	indexp = &dsaddr->stripe_indices[0];
++	for (i = 0; i < dsaddr->stripe_count; i++) {
++		dummy = be32_to_cpup(p++);
++		*indexp = dummy; /* bound by NFS4_PNFS_MAX_MULTI_CNT */
++		indexp++;
++	}
++	/* Skip already read multipath list count */
++	p++;
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		int j;
++
++		dummy = be32_to_cpup(p++); /* multipath count */
++		if (dummy > 1) {
++			printk(KERN_WARNING
++			       "%s: Multipath count %d not supported, "
++			       "skipping all greater than 1\n", __func__,
++				dummy);
++		}
++		for (j = 0; j < dummy; j++) {
++			if (j == 0) {
++				dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
++				if (dsaddr->ds_list[i] == NULL)
++					goto out_err_free;
++			} else {
++				u32 len;
++				/* skip extra multipath */
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				continue;
++			}
++		}
++	}
++	nfs4_init_deviceid_node(&dsaddr->deviceid);
++
++	return dsaddr;
++
++out_err_free:
++	nfs4_fl_free_deviceid(dsaddr);
++out_err:
++	dprintk("%s ERROR: returning NULL\n", __func__);
++	return NULL;
++}
++
++/*
++ * Decode the opaque device specified in 'dev'
++ * and add it to the list of available devices.
++ * If the deviceid is already cached, nfs4_add_deviceid will return
++ * a pointer to the cached struct and throw away the new.
++ */
++static struct nfs4_file_layout_dsaddr*
++decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
++{
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	struct nfs4_deviceid *d;
++
++	dsaddr = decode_device(inode, dev);
++	if (!dsaddr) {
++		printk(KERN_WARNING "%s: Could not decode or add device\n",
++			__func__);
++		return NULL;
++	}
++
++	d = nfs4_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
++			      &dsaddr->deviceid);
++
++	return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Retrieve the information for dev_id, add it to the list
++ * of available devices, and return it.
++ */
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id)
++{
++	struct pnfs_device *pdev = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
++	int rc, i;
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
++		__func__, inode, max_resp_sz, max_pages);
++
++	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
++	if (pdev == NULL)
++		return NULL;
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(pdev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set pdev->area */
++	pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!pdev->area)
++		goto out_free;
++
++	memcpy(&pdev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE);
++	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
++	pdev->pages = pages;
++	pdev->pgbase = 0;
++	pdev->pglen = PAGE_SIZE * max_pages;
++	pdev->mincount = 0;
++	/* TODO: Update types when CB_NOTIFY_DEVICEID is available */
++	pdev->dev_notify_types = 0;
++
++	rc = pnfs_callback_ops->nfs_getdeviceinfo(server, pdev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	/*
++	 * Found new device, need to decode it and then add it to the
++	 * list of known devices for this mountpoint.
++	 */
++	dsaddr = decode_and_add_device(inode, pdev);
++out_free:
++	if (pdev->area != NULL)
++		vunmap(pdev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(pdev);
++	dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
++	return dsaddr;
++}
++
++struct nfs4_file_layout_dsaddr *
++nfs4_pnfs_device_item_find(struct nfs_client *clp, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++
++	d = nfs4_find_deviceid(clp->cl_devid_cache, id);
++	dprintk("%s device id (%s) nfs4_deviceid %p\n", __func__,
++		deviceid_fmt(id), d);
++	return (d == NULL) ? NULL :
++		container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
++ * Then: ((res + fsi) % dsaddr->stripe_count)
++ */
++static inline u32
++_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u64 tmp;
++
++	tmp = offset - flseg->pattern_offset;
++	do_div(tmp, flseg->stripe_unit);
++	tmp += flseg->first_stripe_index;
++	return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
++}
++
++u32
++nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	u32 j;
++
++	j = _nfs4_fl_calc_j_index(lseg, offset);
++	return FILE_DSADDR(lseg)->stripe_indices[j];
++}
++
++struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u32 i;
++
++	if (flseg->stripe_type == STRIPE_SPARSE) {
++		if (flseg->num_fh == 1)
++			i = 0;
++		else if (flseg->num_fh == 0)
++			return NULL;
++		else
++			i = nfs4_fl_calc_ds_index(lseg, offset);
++	} else
++		i = _nfs4_fl_calc_j_index(lseg, offset);
++	return &flseg->fh_array[i];
++}
++
++struct nfs4_pnfs_ds *
++nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	dsaddr = FILE_DSADDR(lseg);
++	if (dsaddr->ds_list[ds_idx] == NULL) {
++		printk(KERN_ERR "%s: No data server for device id (%s)!!\n",
++			__func__, deviceid_fmt(&flseg->dev_id));
++		return NULL;
++	}
++
++	if (!dsaddr->ds_list[ds_idx]->ds_clp) {
++		int err;
++
++		err = nfs4_pnfs_ds_create(PNFS_NFS_SERVER(lseg->layout),
++					  dsaddr->ds_list[ds_idx]);
++		if (err) {
++			printk(KERN_ERR "%s nfs4_pnfs_ds_create error %d\n",
++			       __func__, err);
++			return NULL;
++		}
++	}
++	dprintk("%s: dev_id=%s, ds_idx=%u\n",
++		__func__, deviceid_fmt(&flseg->dev_id), ds_idx);
++
++	return dsaddr->ds_list[ds_idx];
++}
++
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
+@@ -0,0 +1,97 @@
++/*
++ *  pnfs_nfs4filelayout.h
++ *
++ *  NFSv4 file layout driver data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_NFS4FILELAYOUT_H
++#define FS_NFS_NFS4FILELAYOUT_H
++
++#include <linux/kref.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
++
++#define NFS4_PNFS_DEV_HASH_BITS 5
++#define NFS4_PNFS_DEV_HASH_SIZE (1 << NFS4_PNFS_DEV_HASH_BITS)
++#define NFS4_PNFS_DEV_HASH_MASK (NFS4_PNFS_DEV_HASH_SIZE - 1)
++
++#define NFS4_PNFS_MAX_STRIPE_CNT 4096
++#define NFS4_PNFS_MAX_MULTI_CNT  64 /* 256 fit into a u8 stripe_index */
++#define NFS4_PNFS_MAX_MULTI_DS   2
++
++#define FILE_DSADDR(lseg) (container_of(lseg->deviceid, \
++					struct nfs4_file_layout_dsaddr, \
++					deviceid))
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++/* Individual ip address */
++struct nfs4_pnfs_ds {
++	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
++	u32 			ds_ip_addr;
++	u32 			ds_port;
++	struct nfs_client	*ds_clp;
++	atomic_t		ds_count;
++	char r_addr[29];
++};
++
++struct nfs4_file_layout_dsaddr {
++	struct nfs4_deviceid	deviceid;
++	u32 			stripe_count;
++	u8			*stripe_indices;
++	u32			ds_num;
++	struct nfs4_pnfs_ds	*ds_list[1];
++};
++
++struct nfs4_pnfs_dev_hlist {
++	rwlock_t		dev_lock;
++	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH_SIZE];
++};
++
++struct nfs4_filelayout_segment {
++	u32 stripe_type;
++	u32 commit_through_mds;
++	u32 stripe_unit;
++	u32 first_stripe_index;
++	u64 pattern_offset;
++	struct pnfs_deviceid dev_id;
++	unsigned int num_fh;
++	struct nfs_fh *fh_array;
++};
++
++struct nfs4_filelayout {
++	struct pnfs_layout_type fl_layout;
++	u32 stripe_unit;
++};
++
++extern struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
++
++static inline struct nfs4_filelayout *
++FILE_LO(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct nfs4_filelayout, fl_layout);
++}
++
++extern struct pnfs_client_operations *pnfs_callback_ops;
++
++extern void nfs4_fl_free_deviceid_callback(struct kref *);
++extern void print_ds(struct nfs4_pnfs_ds *ds);
++char *deviceid_fmt(const struct pnfs_deviceid *dev_id);
++u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset);
++struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
++					u32 ds_idx);
++extern struct nfs4_file_layout_dsaddr *
++nfs4_pnfs_device_item_find(struct nfs_client *, struct pnfs_deviceid *dev_id);
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id);
++
++#endif /* FS_NFS_NFS4FILELAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
+@@ -45,8 +45,28 @@ enum nfs4_client_state {
+ 	NFS4CLNT_RECLAIM_NOGRACE,
+ 	NFS4CLNT_DELEGRETURN,
+ 	NFS4CLNT_SESSION_RESET,
+-	NFS4CLNT_SESSION_DRAINING,
+ 	NFS4CLNT_RECALL_SLOT,
++	NFS4CLNT_LAYOUT_RECALL,
++};
++
++enum nfs4_session_state {
++	NFS4_SESSION_INITING,
++	NFS4_SESSION_DRAINING,
++};
++
++struct nfs4_minor_version_ops {
++	u32	minor_version;
++
++	int	(*call_sync)(struct nfs_server *server,
++			struct rpc_message *msg,
++			struct nfs4_sequence_args *args,
++			struct nfs4_sequence_res *res,
++			int cache_reply);
++	int	(*validate_stateid)(struct nfs_delegation *,
++			const nfs4_stateid *);
++	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
++	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
++	const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ };
+ 
+ /*
+@@ -89,7 +109,6 @@ struct nfs_unique_id {
+  */
+ struct nfs4_state_owner {
+ 	struct nfs_unique_id so_owner_id;
+-	struct nfs_client    *so_client;
+ 	struct nfs_server    *so_server;
+ 	struct rb_node	     so_client_node;
+ 
+@@ -99,7 +118,6 @@ struct nfs4_state_owner {
+ 	atomic_t	     so_count;
+ 	unsigned long	     so_flags;
+ 	struct list_head     so_states;
+-	struct list_head     so_delegations;
+ 	struct nfs_seqid_counter so_seqid;
+ 	struct rpc_sequence  so_sequence;
+ };
+@@ -125,10 +143,20 @@ enum {
+  * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+  */
+ 
++struct nfs4_lock_owner {
++	unsigned int lo_type;
++#define NFS4_ANY_LOCK_TYPE	(0U)
++#define NFS4_FLOCK_LOCK_TYPE	(1U << 0)
++#define NFS4_POSIX_LOCK_TYPE	(1U << 1)
++	union {
++		fl_owner_t posix_owner;
++		pid_t flock_owner;
++	} lo_u;
++};
++
+ struct nfs4_lock_state {
+ 	struct list_head	ls_locks;	/* Other lock stateids */
+ 	struct nfs4_state *	ls_state;	/* Pointer to open state */
+-	fl_owner_t		ls_owner;	/* POSIX lock owner */
+ #define NFS_LOCK_INITIALIZED 1
+ 	int			ls_flags;
+ 	struct nfs_seqid_counter	ls_seqid;
+@@ -136,6 +164,7 @@ struct nfs4_lock_state {
+ 	struct nfs_unique_id	ls_id;
+ 	nfs4_stateid		ls_stateid;
+ 	atomic_t		ls_count;
++	struct nfs4_lock_owner	ls_owner;
+ };
+ 
+ /* bits for nfs4_state->flags */
+@@ -219,22 +248,34 @@ extern int nfs4_open_revalidate(struct i
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
+ 		struct nfs4_fs_locations *fs_locations, struct page *page);
++extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+ 
+-extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
+-extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
+ #if defined(CONFIG_NFS_V4_1)
+-extern int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return server->nfs_client->cl_session;
++}
++
++extern int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task);
+ extern void nfs4_destroy_session(struct nfs4_session *session);
+ extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
++extern int nfs4_proc_exchange_id(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_create_session(struct nfs_client *);
+ extern int nfs4_proc_destroy_session(struct nfs4_session *);
+ extern int nfs4_init_session(struct nfs_server *server);
+ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
+ 		struct nfs_fsinfo *fsinfo);
+ #else /* CONFIG_NFS_v4_1 */
+-static inline int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return NULL;
++}
++
++static inline int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task)
+ {
+@@ -247,12 +288,12 @@ static inline int nfs4_init_session(stru
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+-extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
++extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
+ 
+ extern const u32 nfs4_fattr_bitmap[2];
+ extern const u32 nfs4_statfs_bitmap[2];
+ extern const u32 nfs4_pathconf_bitmap[2];
+-extern const u32 nfs4_fsinfo_bitmap[2];
++extern const u32 nfs4_fsinfo_bitmap[3];
+ extern const u32 nfs4_fs_locations_bitmap[2];
+ 
+ /* nfs4renewd.c */
+@@ -284,7 +325,7 @@ extern void nfs41_handle_sequence_flag_e
+ extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
++extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+ 
+ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+@@ -293,6 +334,7 @@ extern void nfs_increment_lock_seqid(int
+ extern void nfs_release_seqid(struct nfs_seqid *seqid);
+ extern void nfs_free_seqid(struct nfs_seqid *seqid);
+ 
++/* write.c */
+ extern const nfs4_stateid zero_stateid;
+ 
+ /* nfs4xdr.c */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
+@@ -49,12 +49,15 @@
+ #include <linux/mount.h>
+ #include <linux/module.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "delegation.h"
+ #include "internal.h"
+ #include "iostat.h"
+ #include "callback.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PROC
+ 
+@@ -67,7 +70,7 @@ struct nfs4_opendata;
+ static int _nfs4_proc_open(struct nfs4_opendata *data);
+ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
+ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
++static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, struct nfs_client *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+@@ -125,11 +128,16 @@ const u32 nfs4_pathconf_bitmap[2] = {
+ 	0
+ };
+ 
+-const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
++const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
+ 			| FATTR4_WORD0_MAXREAD
+ 			| FATTR4_WORD0_MAXWRITE
+ 			| FATTR4_WORD0_LEASE_TIME,
++#ifdef CONFIG_NFS_V4_1
++			FATTR4_WORD1_FS_LAYOUT_TYPES,
++			FATTR4_WORD2_LAYOUT_BLKSIZE
++#else /* CONFIG_NFS_V4_1 */
+ 			0
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ const u32 nfs4_fs_locations_bitmap[2] = {
+@@ -356,7 +364,7 @@ static void nfs41_check_drain_session_co
+ {
+ 	struct rpc_task *task;
+ 
+-	if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
++	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
+ 		if (task)
+ 			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+@@ -370,12 +378,11 @@ static void nfs41_check_drain_session_co
+ 	complete(&ses->complete);
+ }
+ 
+-static void nfs41_sequence_free_slot(const struct nfs_client *clp,
+-			      struct nfs4_sequence_res *res)
++static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+ {
+ 	struct nfs4_slot_table *tbl;
+ 
+-	tbl = &clp->cl_session->fc_slot_table;
++	tbl = &res->sr_session->fc_slot_table;
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+ 		/* just wake up the next guy waiting since
+ 		 * we may have not consumed a slot after all */
+@@ -385,18 +392,17 @@ static void nfs41_sequence_free_slot(con
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+ 	nfs4_free_slot(tbl, res->sr_slotid);
+-	nfs41_check_drain_session_complete(clp->cl_session);
++	nfs41_check_drain_session_complete(res->sr_session);
+ 	spin_unlock(&tbl->slot_tbl_lock);
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ }
+ 
+-static void nfs41_sequence_done(struct nfs_client *clp,
+-				struct nfs4_sequence_res *res,
+-				int rpc_status)
++static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+ {
+ 	unsigned long timestamp;
+ 	struct nfs4_slot_table *tbl;
+ 	struct nfs4_slot *slot;
++	struct nfs_client *clp;
+ 
+ 	/*
+ 	 * sr_status remains 1 if an RPC level error occurred. The server
+@@ -411,13 +417,16 @@ static void nfs41_sequence_done(struct n
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+ 		goto out;
+ 
++	tbl = &res->sr_session->fc_slot_table;
++	slot = tbl->slots + res->sr_slotid;
++
+ 	/* Check the SEQUENCE operation status */
+-	if (res->sr_status == 0) {
+-		tbl = &clp->cl_session->fc_slot_table;
+-		slot = tbl->slots + res->sr_slotid;
++	switch (res->sr_status) {
++	case 0:
+ 		/* Update the slot's sequence and clientid lease timer */
+ 		++slot->seq_nr;
+ 		timestamp = res->sr_renewal_time;
++		clp = res->sr_session->clp;
+ 		spin_lock(&clp->cl_lock);
+ 		if (time_before(clp->cl_last_renewal, timestamp))
+ 			clp->cl_last_renewal = timestamp;
+@@ -425,11 +434,39 @@ static void nfs41_sequence_done(struct n
+ 		/* Check sequence flags */
+ 		if (atomic_read(&clp->cl_count) > 1)
+ 			nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
++		break;
++	case -NFS4ERR_DELAY:
++		/* The server detected a resend of the RPC call and
++		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
++		 * of RFC5661.
++		 */
++		dprintk("%s: slot=%d seq=%d: Operation in progress\n",
++				__func__, res->sr_slotid, slot->seq_nr);
++		goto out_retry;
++	default:
++		/* Just update the slot sequence no. */
++		++slot->seq_nr;
+ 	}
+ out:
+ 	/* The session may be reset by one of the error handlers. */
+ 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
+-	nfs41_sequence_free_slot(clp, res);
++	nfs41_sequence_free_slot(res);
++	return 1;
++out_retry:
++	rpc_delay(task, NFS4_POLL_RETRY_MAX);
++	rpc_restart_call(task);
++	/* FIXME: rpc_restart_call() should be made to return success/fail */
++	if (RPC_ASSASSINATED(task))
++		goto out;
++	return 0;
++}
++
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	if (res->sr_session == NULL)
++		return 1;
++	return nfs41_sequence_done(task, res);
+ }
+ 
+ /*
+@@ -480,12 +517,11 @@ static int nfs41_setup_sequence(struct n
+ 	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+ 		return 0;
+ 
+-	memset(res, 0, sizeof(*res));
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ 	tbl = &session->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
++	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+ 	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+ 		/*
+ 		 * The state manager will wait until the slot table is empty.
+@@ -525,6 +561,7 @@ static int nfs41_setup_sequence(struct n
+ 	res->sr_session = session;
+ 	res->sr_slotid = slotid;
+ 	res->sr_renewal_time = jiffies;
++	res->sr_status_flags = 0;
+ 	/*
+ 	 * sr_status is only set in decode_sequence, and so will remain
+ 	 * set to 1 if an rpc level failure occurs.
+@@ -533,33 +570,36 @@ static int nfs41_setup_sequence(struct n
+ 	return 0;
+ }
+ 
+-int nfs4_setup_sequence(struct nfs_client *clp,
++int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 			struct nfs4_sequence_args *args,
+ 			struct nfs4_sequence_res *res,
+ 			int cache_reply,
+ 			struct rpc_task *task)
+ {
++	struct nfs4_session *session = nfs4_get_session(server);
+ 	int ret = 0;
+ 
++	if (ds_session)
++		session = ds_session;
++	if (session == NULL) {
++		args->sa_session = NULL;
++		res->sr_session = NULL;
++		goto out;
++	}
++
+ 	dprintk("--> %s clp %p session %p sr_slotid %d\n",
+-		__func__, clp, clp->cl_session, res->sr_slotid);
++		__func__, session->clp, session, res->sr_slotid);
+ 
+-	if (!nfs4_has_session(clp))
+-		goto out;
+-	ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
++	ret = nfs41_setup_sequence(session, args, res, cache_reply,
+ 				   task);
+-	if (ret && ret != -EAGAIN) {
+-		/* terminate rpc task */
+-		task->tk_status = ret;
+-		task->tk_action = NULL;
+-	}
+ out:
+ 	dprintk("<-- %s status=%d\n", __func__, ret);
+ 	return ret;
+ }
+ 
+ struct nfs41_call_sync_data {
+-	struct nfs_client *clp;
++	const struct nfs_server *seq_server;
+ 	struct nfs4_sequence_args *seq_args;
+ 	struct nfs4_sequence_res *seq_res;
+ 	int cache_reply;
+@@ -569,9 +609,9 @@ static void nfs41_call_sync_prepare(stru
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	dprintk("--> %s data->clp->cl_session %p\n", __func__,
+-		data->clp->cl_session);
+-	if (nfs4_setup_sequence(data->clp, data->seq_args,
++	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
++
++	if (nfs4_setup_sequence(data->seq_server, NULL, data->seq_args,
+ 				data->seq_res, data->cache_reply, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -587,7 +627,7 @@ static void nfs41_call_sync_done(struct 
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
++	nfs41_sequence_done(task, data->seq_res);
+ }
+ 
+ struct rpc_call_ops nfs41_call_sync_ops = {
+@@ -600,8 +640,7 @@ struct rpc_call_ops nfs41_call_priv_sync
+ 	.rpc_call_done = nfs41_call_sync_done,
+ };
+ 
+-static int nfs4_call_sync_sequence(struct nfs_client *clp,
+-				   struct rpc_clnt *clnt,
++static int nfs4_call_sync_sequence(struct nfs_server *server,
+ 				   struct rpc_message *msg,
+ 				   struct nfs4_sequence_args *args,
+ 				   struct nfs4_sequence_res *res,
+@@ -611,13 +650,13 @@ static int nfs4_call_sync_sequence(struc
+ 	int ret;
+ 	struct rpc_task *task;
+ 	struct nfs41_call_sync_data data = {
+-		.clp = clp,
++		.seq_server = server,
+ 		.seq_args = args,
+ 		.seq_res = res,
+ 		.cache_reply = cache_reply,
+ 	};
+ 	struct rpc_task_setup task_setup = {
+-		.rpc_client = clnt,
++		.rpc_client = server->client,
+ 		.rpc_message = msg,
+ 		.callback_ops = &nfs41_call_sync_ops,
+ 		.callback_data = &data
+@@ -642,10 +681,15 @@ int _nfs4_call_sync_session(struct nfs_s
+ 			    struct nfs4_sequence_res *res,
+ 			    int cache_reply)
+ {
+-	return nfs4_call_sync_sequence(server->nfs_client, server->client,
+-				       msg, args, res, cache_reply, 0);
++	return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
+ }
+ 
++#else
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	return 1;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ int _nfs4_call_sync(struct nfs_server *server,
+@@ -659,18 +703,9 @@ int _nfs4_call_sync(struct nfs_server *s
+ }
+ 
+ #define nfs4_call_sync(server, msg, args, res, cache_reply) \
+-	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
++	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
+ 			&(res)->seq_res, (cache_reply))
+ 
+-static void nfs4_sequence_done(const struct nfs_server *server,
+-			       struct nfs4_sequence_res *res, int rpc_status)
+-{
+-#ifdef CONFIG_NFS_V4_1
+-	if (nfs4_has_session(server->nfs_client))
+-		nfs41_sequence_done(server->nfs_client, res, rpc_status);
+-#endif /* CONFIG_NFS_V4_1 */
+-}
+-
+ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(dir);
+@@ -745,19 +780,14 @@ static struct nfs4_opendata *nfs4_openda
+ 	p->o_arg.server = server;
+ 	p->o_arg.bitmask = server->attr_bitmask;
+ 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+-	if (flags & O_EXCL) {
+-		if (nfs4_has_persistent_session(server->nfs_client)) {
+-			/* GUARDED */
+-			p->o_arg.u.attrs = &p->attrs;
+-			memcpy(&p->attrs, attrs, sizeof(p->attrs));
+-		} else { /* EXCLUSIVE4_1 */
+-			u32 *s = (u32 *) p->o_arg.u.verifier.data;
+-			s[0] = jiffies;
+-			s[1] = current->pid;
+-		}
+-	} else if (flags & O_CREAT) {
++	if (flags & O_CREAT) {
++		u32 *s;
++
+ 		p->o_arg.u.attrs = &p->attrs;
+ 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
++		s = (u32 *) p->o_arg.u.verifier.data;
++		s[0] = jiffies;
++		s[1] = current->pid;
+ 	}
+ 	p->c_arg.fh = &p->o_res.fh;
+ 	p->c_arg.stateid = &p->o_res.stateid;
+@@ -851,8 +881,10 @@ static void update_open_stateflags(struc
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+ {
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+-	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
++		memcpy(state->stateid.u.data, stateid->u.data,
++		       sizeof(state->stateid.u.data));
++	memcpy(state->open_stateid.u.data, stateid->u.data,
++	       sizeof(state->open_stateid.u.data));
+ 	switch (fmode) {
+ 		case FMODE_READ:
+ 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+@@ -880,7 +912,8 @@ static void __update_open_stateid(struct
+ 	 */
+ 	write_seqlock(&state->seqlock);
+ 	if (deleg_stateid != NULL) {
+-		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
++		memcpy(state->stateid.u.data, deleg_stateid->u.data,
++		       sizeof(state->stateid.u.data));
+ 		set_bit(NFS_DELEGATED_STATE, &state->flags);
+ 	}
+ 	if (open_stateid != NULL)
+@@ -911,7 +944,8 @@ static int update_open_stateid(struct nf
+ 
+ 	if (delegation == NULL)
+ 		delegation = &deleg_cur->stateid;
+-	else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
++	else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
++			NFS4_STATEID_SIZE) != 0)
+ 		goto no_delegation_unlock;
+ 
+ 	nfs_mark_delegation_referenced(deleg_cur);
+@@ -973,7 +1007,8 @@ static struct nfs4_state *nfs4_try_open_
+ 			break;
+ 		}
+ 		/* Save the delegation */
+-		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
++		memcpy(stateid.u.data, delegation->stateid.u.data,
++		       sizeof(stateid.u.data));
+ 		rcu_read_unlock();
+ 		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
+ 		if (ret != 0)
+@@ -1127,10 +1162,13 @@ static int nfs4_open_recover(struct nfs4
+ 	 * Check if we need to update the current stateid.
+ 	 */
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+-	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
++	    memcmp(state->stateid.u.data, state->open_stateid.u.data,
++		   sizeof(state->stateid.u.data)) != 0) {
+ 		write_seqlock(&state->seqlock);
+ 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
++			memcpy(state->stateid.u.data,
++			       state->open_stateid.u.data,
++			       sizeof(state->stateid.u.data));
+ 		write_sequnlock(&state->seqlock);
+ 	}
+ 	return 0;
+@@ -1199,8 +1237,8 @@ static int _nfs4_open_delegation_recall(
+ 	if (IS_ERR(opendata))
+ 		return PTR_ERR(opendata);
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+-	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
+-			sizeof(opendata->o_arg.u.delegation.data));
++	memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
++			sizeof(opendata->o_arg.u.delegation.u.data));
+ 	ret = nfs4_open_recover(opendata, state);
+ 	nfs4_opendata_put(opendata);
+ 	return ret;
+@@ -1258,8 +1296,8 @@ static void nfs4_open_confirm_done(struc
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+-				sizeof(data->o_res.stateid.data));
++		memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
++				sizeof(data->o_res.stateid.u.data));
+ 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 		renew_lease(data->o_res.server, data->timestamp);
+ 		data->rpc_done = 1;
+@@ -1356,13 +1394,13 @@ static void nfs4_open_prepare(struct rpc
+ 	}
+ 	/* Update sequence id. */
+ 	data->o_arg.id = sp->so_owner_id.id;
+-	data->o_arg.clientid = sp->so_client->cl_clientid;
++	data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
+ 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
+ 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
+ 	}
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
++	if (nfs4_setup_sequence(data->o_arg.server, NULL,
+ 				&data->o_arg.seq_args,
+ 				&data->o_res.seq_res, 1, task))
+ 		return;
+@@ -1385,8 +1423,8 @@ static void nfs4_open_done(struct rpc_ta
+ 
+ 	data->rpc_status = task->tk_status;
+ 
+-	nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->o_res.seq_res))
++		return;
+ 
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+@@ -1539,9 +1577,8 @@ static int _nfs4_proc_open(struct nfs4_o
+ 	return 0;
+ }
+ 
+-static int nfs4_recover_expired_lease(struct nfs_server *server)
++int nfs4_recover_expired_lease(struct nfs_client *clp)
+ {
+-	struct nfs_client *clp = server->nfs_client;
+ 	unsigned int loop;
+ 	int ret;
+ 
+@@ -1557,6 +1594,7 @@ static int nfs4_recover_expired_lease(st
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(nfs4_recover_expired_lease);
+ 
+ /*
+  * OPEN_EXPIRED:
+@@ -1646,7 +1684,7 @@ static int _nfs4_do_open(struct inode *d
+ 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+ 		goto out_err;
+ 	}
+-	status = nfs4_recover_expired_lease(server);
++	status = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (status != 0)
+ 		goto err_put_state_owner;
+ 	if (path->dentry->d_inode != NULL)
+@@ -1773,7 +1811,7 @@ static int _nfs4_do_setattr(struct inode
+ 	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ 		/* Use that stateid */
+ 	} else if (state != NULL) {
+-		nfs4_copy_stateid(&arg.stateid, state, current->files);
++		nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
+ 	} else
+ 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+ 
+@@ -1838,7 +1876,8 @@ static void nfs4_close_done(struct rpc_t
+ 	struct nfs4_state *state = calldata->state;
+ 	struct nfs_server *server = NFS_SERVER(calldata->inode);
+ 
+-	nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+         /* hmm. we are done with the inode, and in the process of freeing
+@@ -1858,7 +1897,7 @@ static void nfs4_close_done(struct rpc_t
+ 			if (calldata->arg.fmode == 0)
+ 				break;
+ 		default:
+-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
++			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ 				rpc_restart_call_prepare(task);
+ 	}
+ 	nfs_release_seqid(calldata->arg.seqid);
+@@ -1903,7 +1942,7 @@ static void nfs4_close_prepare(struct rp
+ 
+ 	nfs_fattr_init(calldata->res.fattr);
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
++	if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), NULL,
+ 				&calldata->arg.seq_args, &calldata->res.seq_res,
+ 				1, task))
+ 		return;
+@@ -2323,6 +2362,9 @@ nfs4_proc_setattr(struct dentry *dentry,
+ 	struct nfs4_state *state = NULL;
+ 	int status;
+ 
++	if (pnfs_ld_layoutret_on_setattr(inode))
++		pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	nfs_fattr_init(fattr);
+ 	
+ 	/* Search for an existing open(O_WRITE) file */
+@@ -2648,8 +2690,9 @@ static int nfs4_proc_unlink_done(struct 
+ {
+ 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
+ 
+-	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
+-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
++	if (!nfs4_sequence_done(task, &res->seq_res))
++		return 0;
++	if (nfs4_async_handle_error(task, res->server, NULL, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+ 	nfs_post_op_update_inode(dir, res->dir_attr);
+@@ -3090,18 +3133,31 @@ static int nfs4_proc_pathconf(struct nfs
+ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+ {
+ 	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
+-	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
+ 
+-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, server->nfs_client);
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
+ 
+ 	nfs_invalidate_atime(data->inode);
+-	if (task->tk_status > 0)
++	if (task->tk_status > 0 && client == server->nfs_client)
+ 		renew_lease(server, data->timestamp);
+ 	return 0;
+ }
+@@ -3112,20 +3168,56 @@ static void nfs4_proc_read_setup(struct 
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ }
+ 
++static void pnfs4_update_write_done(struct nfs_inode *nfsi, struct nfs_write_data *data)
++{
++#ifdef CONFIG_NFS_V4_1
++	pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++	pnfs_need_layoutcommit(nfsi, data->args.context);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
+ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
++	struct nfs_server *server = NFS_SERVER(inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++#ifdef CONFIG_NFS_V4_1
++	/* restore original count after retry? */
++	if (data->pdata.orig_count) {
++		dprintk("%s: restoring original count %u\n", __func__,
++			data->pdata.orig_count);
++		data->args.count = data->pdata.orig_count;
++	}
++
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
++
++	/*
++	 * MDS write: renew lease
++	 * DS write: update lastbyte written, mark for layout commit
++	 */
+ 	if (task->tk_status >= 0) {
+-		renew_lease(NFS_SERVER(inode), data->timestamp);
+-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		if (client == server->nfs_client) {
++			renew_lease(server, data->timestamp);
++			nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		} else
++			pnfs4_update_write_done(NFS_I(inode), data);
+ 	}
+ 	return 0;
+ }
+@@ -3138,20 +3230,42 @@ static void nfs4_proc_write_setup(struct
+ 	data->res.server = server;
+ 	data->timestamp   = jiffies;
+ 
++#ifdef CONFIG_NFS_V4_1
++	/* writes to DS use pnfs vector */
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_WRITE];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+ }
+ 
+ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
++	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
++
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS commit\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL, NULL) == -EAGAIN) {
+ 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ 		return -EAGAIN;
+ 	}
+-	nfs_refresh_inode(inode, data->res.fattr);
++	if (client == server->nfs_client)
++		nfs_refresh_inode(inode, data->res.fattr);
+ 	return 0;
+ }
+ 
+@@ -3161,6 +3275,12 @@ static void nfs4_proc_commit_setup(struc
+ 	
+ 	data->args.bitmask = server->cache_consistency_bitmask;
+ 	data->res.server = server;
++#if defined(CONFIG_NFS_V4_1)
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_COMMIT];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
+@@ -3464,9 +3584,12 @@ static int nfs4_proc_set_acl(struct inod
+ }
+ 
+ static int
+-_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
++nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state, struct nfs_client *clp)
+ {
+-	if (!clp || task->tk_status >= 0)
++	if (!clp)
++		clp = server->nfs_client;
++
++	if (task->tk_status >= 0)
+ 		return 0;
+ 	switch(task->tk_status) {
+ 		case -NFS4ERR_ADMIN_REVOKED:
+@@ -3491,8 +3614,9 @@ _nfs4_async_handle_error(struct rpc_task
+ 		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ 		case -NFS4ERR_SEQ_FALSE_RETRY:
+ 		case -NFS4ERR_SEQ_MISORDERED:
+-			dprintk("%s ERROR %d, Reset session\n", __func__,
+-				task->tk_status);
++			dprintk("%s ERROR %d, Reset session. Exchangeid "
++				"flags 0x%x\n", __func__, task->tk_status,
++				clp->cl_exchange_flags);
+ 			nfs4_schedule_state_recovery(clp);
+ 			task->tk_status = 0;
+ 			return -EAGAIN;
+@@ -3512,6 +3636,8 @@ _nfs4_async_handle_error(struct rpc_task
+ 	task->tk_status = nfs4_map_errors(task->tk_status);
+ 	return 0;
+ do_state_recovery:
++	if (is_ds_only_client(clp))
++		return 0;
+ 	rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+ 	nfs4_schedule_state_recovery(clp);
+ 	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+@@ -3520,12 +3646,6 @@ do_state_recovery:
+ 	return -EAGAIN;
+ }
+ 
+-static int
+-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+-{
+-	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+-}
+-
+ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
+ 		unsigned short port, struct rpc_cred *cred,
+ 		struct nfs4_setclientid_res *res)
+@@ -3641,8 +3761,8 @@ static void nfs4_delegreturn_done(struct
+ {
+ 	struct nfs4_delegreturndata *data = calldata;
+ 
+-	nfs4_sequence_done(data->res.server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_STALE_STATEID:
+@@ -3651,8 +3771,8 @@ static void nfs4_delegreturn_done(struct
+ 		renew_lease(data->res.server, data->timestamp);
+ 		break;
+ 	default:
+-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
+-				-EAGAIN) {
++		if (nfs4_async_handle_error(task, data->res.server, NULL, NULL)
++				== -EAGAIN) {
+ 			nfs_restart_rpc(task, data->res.server->nfs_client);
+ 			return;
+ 		}
+@@ -3672,7 +3792,7 @@ static void nfs4_delegreturn_prepare(str
+ 
+ 	d_data = (struct nfs4_delegreturndata *)data;
+ 
+-	if (nfs4_setup_sequence(d_data->res.server->nfs_client,
++	if (nfs4_setup_sequence(d_data->res.server, NULL,
+ 				&d_data->args.seq_args,
+ 				&d_data->res.seq_res, 1, task))
+ 		return;
+@@ -3892,15 +4012,16 @@ static void nfs4_locku_done(struct rpc_t
+ {
+ 	struct nfs4_unlockdata *calldata = data;
+ 
+-	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
+-			   task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	switch (task->tk_status) {
+ 		case 0:
+-			memcpy(calldata->lsp->ls_stateid.data,
+-					calldata->res.stateid.data,
+-					sizeof(calldata->lsp->ls_stateid.data));
++			memcpy(calldata->lsp->ls_stateid.u.data,
++					calldata->res.stateid.u.data,
++					sizeof(calldata->lsp->ls_stateid.u.
++					       data));
+ 			renew_lease(calldata->server, calldata->timestamp);
+ 			break;
+ 		case -NFS4ERR_BAD_STATEID:
+@@ -3909,7 +4030,7 @@ static void nfs4_locku_done(struct rpc_t
+ 		case -NFS4ERR_EXPIRED:
+ 			break;
+ 		default:
+-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
++			if (nfs4_async_handle_error(task, calldata->server, NULL, NULL) == -EAGAIN)
+ 				nfs_restart_rpc(task,
+ 						 calldata->server->nfs_client);
+ 	}
+@@ -3927,7 +4048,7 @@ static void nfs4_locku_prepare(struct rp
+ 		return;
+ 	}
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence(calldata->server->nfs_client,
++	if (nfs4_setup_sequence(calldata->server, NULL,
+ 				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 1, task))
+ 		return;
+@@ -4082,7 +4203,8 @@ static void nfs4_lock_prepare(struct rpc
+ 	} else
+ 		data->arg.new_lock_owner = 0;
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
++	if (nfs4_setup_sequence(data->server, NULL,
++				&data->arg.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -4101,8 +4223,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 
+-	nfs4_sequence_done(data->server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	data->rpc_status = task->tk_status;
+ 	if (RPC_ASSASSINATED(task))
+@@ -4114,8 +4236,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 			goto out;
+ 	}
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+-					sizeof(data->lsp->ls_stateid.data));
++		memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
++					sizeof(data->lsp->ls_stateid.u.data));
+ 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ 		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ 	}
+@@ -4424,6 +4546,34 @@ out:
+ 	return err;
+ }
+ 
++static void nfs4_release_lockowner_release(void *calldata)
++{
++	kfree(calldata);
++}
++
++const struct rpc_call_ops nfs4_release_lockowner_ops = {
++	.rpc_release = nfs4_release_lockowner_release,
++};
++
++void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
++{
++	struct nfs_server *server = lsp->ls_state->owner->so_server;
++	struct nfs_release_lockowner_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
++	};
++
++	if (server->nfs_client->cl_mvops->minor_version != 0)
++		return;
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (!args)
++		return;
++	args->lock_owner.clientid = server->nfs_client->cl_clientid;
++	args->lock_owner.id = lsp->ls_id.id;
++	msg.rpc_argp = args;
++	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
++}
++
+ #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+ 
+ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+@@ -4526,7 +4676,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	nfs4_verifier verifier;
+ 	struct nfs41_exchange_id_args args = {
+ 		.client = clp,
+-		.flags = clp->cl_exchange_flags,
++		.flags = clp->cl_exchange_flags & ~EXCHGID4_FLAG_CONFIRMED_R,
+ 	};
+ 	struct nfs41_exchange_id_res res = {
+ 		.client = clp,
+@@ -4574,6 +4724,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	dprintk("<-- %s status= %d\n", __func__, status);
+ 	return status;
+ }
++EXPORT_SYMBOL(nfs4_proc_exchange_id);
+ 
+ struct nfs4_get_lease_time_data {
+ 	struct nfs4_get_lease_time_args *args;
+@@ -4611,7 +4762,8 @@ static void nfs4_get_lease_time_done(str
+ 			(struct nfs4_get_lease_time_data *)calldata;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
++	if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
++		return;
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_DELAY:
+ 	case -NFS4ERR_GRACE:
+@@ -4805,13 +4957,6 @@ struct nfs4_session *nfs4_alloc_session(
+ 	if (!session)
+ 		return NULL;
+ 
+-	/*
+-	 * The create session reply races with the server back
+-	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+-	 * so that the client back channel can find the
+-	 * nfs_client struct
+-	 */
+-	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	init_completion(&session->complete);
+ 
+ 	tbl = &session->fc_slot_table;
+@@ -4824,6 +4969,8 @@ struct nfs4_session *nfs4_alloc_session(
+ 	spin_lock_init(&tbl->slot_tbl_lock);
+ 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ 
++	session->session_state = 1<<NFS4_SESSION_INITING;
++
+ 	session->clp = clp;
+ 	return session;
+ }
+@@ -5040,6 +5187,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (!nfs4_has_session(clp))
+ 		return 0;
+ 
++	session = clp->cl_session;
++	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
++		return 0;
++
+ 	rsize = server->rsize;
+ 	if (rsize == 0)
+ 		rsize = NFS_MAX_FILE_IO_SIZE;
+@@ -5047,11 +5198,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (wsize == 0)
+ 		wsize = NFS_MAX_FILE_IO_SIZE;
+ 
+-	session = clp->cl_session;
+ 	session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ 	session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ 
+-	ret = nfs4_recover_expired_lease(server);
++	ret = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (!ret)
+ 		ret = nfs4_check_client_ready(clp);
+ 	return ret;
+@@ -5060,69 +5210,70 @@ int nfs4_init_session(struct nfs_server 
+ /*
+  * Renew the cl_session lease.
+  */
+-static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+-{
++struct nfs4_sequence_data {
++	struct nfs_client *clp;
+ 	struct nfs4_sequence_args args;
+ 	struct nfs4_sequence_res res;
+-
+-	struct rpc_message msg = {
+-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+-		.rpc_argp = &args,
+-		.rpc_resp = &res,
+-		.rpc_cred = cred,
+-	};
+-
+-	args.sa_cache_this = 0;
+-
+-	return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
+-				       &res, args.sa_cache_this, 1);
+-}
++};
+ 
+ static void nfs41_sequence_release(void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(calldata);
++}
++
++static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
+ }
+ 
+ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+-	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
++	if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
++		return;
+ 
+ 	if (task->tk_status < 0) {
+ 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+ 		if (atomic_read(&clp->cl_count) == 1)
+ 			goto out;
+ 
+-		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+-								== -EAGAIN) {
+-			nfs_restart_rpc(task, clp);
++		if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
++			rpc_restart_call_prepare(task);
+ 			return;
+ 		}
+ 	}
+ 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
+ out:
+-	kfree(task->tk_msg.rpc_argp);
+-	kfree(task->tk_msg.rpc_resp);
+-
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 	struct nfs4_sequence_args *args;
+ 	struct nfs4_sequence_res *res;
+ 
+-	clp = (struct nfs_client *)data;
+ 	args = task->tk_msg.rpc_argp;
+ 	res = task->tk_msg.rpc_resp;
+ 
+-	if (nfs4_setup_sequence(clp, args, res, 0, task))
++	if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
+@@ -5133,32 +5284,67 @@ static const struct rpc_call_ops nfs41_s
+ 	.rpc_release = nfs41_sequence_release,
+ };
+ 
+-static int nfs41_proc_async_sequence(struct nfs_client *clp,
+-				     struct rpc_cred *cred)
++static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+ {
+-	struct nfs4_sequence_args *args;
+-	struct nfs4_sequence_res *res;
++	struct nfs4_sequence_data *calldata;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+ 		.rpc_cred = cred,
+ 	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = clp->cl_rpcclient,
++		.rpc_message = &msg,
++		.callback_ops = &nfs41_sequence_ops,
++		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
++	};
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+-		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_NOFS);
+-	res = kzalloc(sizeof(*res), GFP_NOFS);
+-	if (!args || !res) {
+-		kfree(args);
+-		kfree(res);
++		return ERR_PTR(-EIO);
++	calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
++	if (calldata == NULL) {
+ 		nfs_put_client(clp);
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 	}
+-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+-	msg.rpc_argp = args;
+-	msg.rpc_resp = res;
++	calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	msg.rpc_argp = &calldata->args;
++	msg.rpc_resp = &calldata->res;
++	calldata->clp = clp;
++	task_setup_data.callback_data = calldata;
+ 
+-	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			      &nfs41_sequence_ops, (void *)clp);
++	return rpc_run_task(&task_setup_data);
++}
++
++static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret = 0;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task))
++		ret = PTR_ERR(task);
++	else
++		rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
++}
++
++static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	ret = rpc_wait_for_completion_task(task);
++	if (!ret)
++		ret = task->tk_status;
++	rpc_put_task(task);
++out:
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
+ }
+ 
+ struct nfs4_reclaim_complete_data {
+@@ -5172,13 +5358,31 @@ static void nfs4_reclaim_complete_prepar
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+ 
+ 	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+-	if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
++	if (nfs41_setup_sequence(calldata->clp->cl_session,
++				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 0, task))
+ 		return;
+ 
+ 	rpc_call_start(task);
+ }
+ 
++static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case 0:
++	case -NFS4ERR_COMPLETE_ALREADY:
++	case -NFS4ERR_WRONG_CRED: /* What to do here? */
++		break;
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
++}
++
+ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
+ {
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+@@ -5186,32 +5390,13 @@ static void nfs4_reclaim_complete_done(s
+ 	struct nfs4_sequence_res *res = &calldata->res.seq_res;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(clp, res, task->tk_status);
+-	switch (task->tk_status) {
+-	case 0:
+-	case -NFS4ERR_COMPLETE_ALREADY:
+-		break;
+-	case -NFS4ERR_BADSESSION:
+-	case -NFS4ERR_DEADSESSION:
+-		/*
+-		 * Handle the session error, but do not retry the operation, as
+-		 * we have no way of telling whether the clientid had to be
+-		 * reset before we got our reply.  If reset, a new wave of
+-		 * reclaim operations will follow, containing their own reclaim
+-		 * complete.  We don't want our retry to get on the way of
+-		 * recovery by incorrectly indicating to the server that we're
+-		 * done reclaiming state since the process had to be restarted.
+-		 */
+-		_nfs4_async_handle_error(task, NULL, clp, NULL);
+-		break;
+-	default:
+-		if (_nfs4_async_handle_error(
+-				task, NULL, clp, NULL) == -EAGAIN) {
+-			rpc_restart_call_prepare(task);
+-			return;
+-		}
+-	}
++	if (!nfs41_sequence_done(task, res))
++		return;
+ 
++	if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
++		rpc_restart_call_prepare(task);
++		return;
++	}
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+@@ -5268,6 +5453,404 @@ out:
+ 	dprintk("<-- %s status=%d\n", __func__, status);
+ 	return status;
+ }
++
++static void
++nfs4_pnfs_layoutget_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
++				&lgp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_pnfs_layoutget_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	pnfs_get_layout_done(lgp, task->tk_status);
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	lgp->status = task->tk_status;
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_pnfs_layoutget_release(void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++
++	dprintk("--> %s\n", __func__);
++	pnfs_layout_release(NFS_I(lgp->args.inode)->layout, NULL);
++	if (lgp->res.layout.buf != NULL)
++		free_page((unsigned long) lgp->res.layout.buf);
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_pnfs_layoutget_call_ops = {
++	.rpc_call_prepare = nfs4_pnfs_layoutget_prepare,
++	.rpc_call_done = nfs4_pnfs_layoutget_done,
++	.rpc_release = nfs4_pnfs_layoutget_release,
++};
++
++/* FIXME: We need to call nfs4_handle_exception
++ * and deal with retries.
++ * Currently we can't since we release lgp and its contents.
++ */
++static int _pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTGET],
++		.rpc_argp = &lgp->args,
++		.rpc_resp = &lgp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_pnfs_layoutget_call_ops,
++		.callback_data = lgp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
++	if (lgp->res.layout.buf == NULL) {
++		nfs4_pnfs_layoutget_release(lgp);
++		return -ENOMEM;
++	}
++
++	lgp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = lgp->status;
++	if (status != 0)
++		goto out;
++	status = pnfs_layout_process(lgp);
++out:
++	rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server, _pnfs4_proc_layoutget(lgp),
++					    &exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void pnfs_layoutcommit_prepare(struct rpc_task *task, void *data)
++{
++	struct pnfs_layoutcommit_data *ldata =
++		(struct pnfs_layoutcommit_data *)data;
++	struct nfs_server *server = NFS_SERVER(ldata->args.inode);
++
++	if (nfs4_setup_sequence(server, NULL, &ldata->args.seq_args,
++				&ldata->res.seq_res, 1, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void
++pnfs_layoutcommit_done(struct rpc_task *task, void *calldata)
++{
++	struct pnfs_layoutcommit_data *data =
++		(struct pnfs_layoutcommit_data *)calldata;
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	data->status = task->tk_status;
++}
++
++static void pnfs_layoutcommit_release(void *lcdata)
++{
++	struct pnfs_layoutcommit_data *data =
++		(struct pnfs_layoutcommit_data *)lcdata;
++
++	put_rpccred(data->cred);
++	pnfs_cleanup_layoutcommit(lcdata);
++	pnfs_layoutcommit_free(lcdata);
++	/* Matched by get_layout in pnfs_layoutcommit_inode */
++	put_layout(data->args.inode);
++}
++
++static const struct rpc_call_ops pnfs_layoutcommit_ops = {
++	.rpc_call_prepare = pnfs_layoutcommit_prepare,
++	.rpc_call_done = pnfs_layoutcommit_done,
++	.rpc_release = pnfs_layoutcommit_release,
++};
++
++/* Execute a layoutcommit to the server */
++static int
++_pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data, int issync)
++{
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT],
++		.rpc_argp = &data->args,
++		.rpc_resp = &data->res,
++		.rpc_cred = data->cred,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.task = &data->task,
++		.rpc_client = NFS_CLIENT(data->args.inode),
++		.rpc_message = &msg,
++		.callback_ops = &pnfs_layoutcommit_ops,
++		.callback_data = data,
++		.flags = RPC_TASK_ASYNC,
++	};
++	struct rpc_task *task;
++	int status = 0;
++
++	dprintk("NFS: %4d initiating layoutcommit call. %llu@%llu lbw: %llu "
++		"type: %d issync %d\n",
++		data->task.tk_pid,
++		data->args.lseg.length,
++		data->args.lseg.offset,
++		data->args.lastbytewritten,
++		data->args.layout_type, issync);
++
++	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = data->status;
++out:
++	dprintk("%s: status %d\n", __func__, status);
++	rpc_put_task(task);
++	return 0;
++}
++
++int pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data, int issync)
++{
++	struct nfs4_exception exception = { };
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++					_pnfs4_proc_layoutcommit(data, issync),
++					&exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void
++nfs4_pnfs_layoutreturn_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
++				&lrp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_pnfs_layoutreturn_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_pnfs_layoutreturn_release(void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct pnfs_layout_type *lo = NFS_I(lrp->args.inode)->layout;
++
++	dprintk("--> %s return_type %d lo %p\n", __func__,
++		lrp->args.return_type, lo);
++
++	if (lrp->args.return_type == RETURN_FILE) {
++		if (!lrp->res.lrs_present)
++			pnfs_set_layout_stateid(lo, &zero_stateid);
++		pnfs_layout_release(lo, &lrp->args.lseg);
++	}
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_pnfs_layoutreturn_call_ops = {
++	.rpc_call_prepare = nfs4_pnfs_layoutreturn_prepare,
++	.rpc_call_done = nfs4_pnfs_layoutreturn_done,
++	.rpc_release = nfs4_pnfs_layoutreturn_release,
++};
++
++int _pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool issync)
++{
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTRETURN],
++		.rpc_argp = &lrp->args,
++		.rpc_resp = &lrp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_pnfs_layoutreturn_call_ops,
++		.callback_data = lrp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++	lrp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = task->tk_status;
++out:
++	dprintk("<-- %s\n", __func__);
++	rpc_put_task(task);
++	return status;
++}
++
++int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool issync)
++{
++	struct nfs_server *server = NFS_SERVER(lrp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server,
++				_pnfs4_proc_layoutreturn(lrp, issync),
++				&exception);
++	} while (exception.retry);
++
++	return err;
++}
++
++/*
++ * Retrieve the list of Data Server devices from the MDS.
++ */
++static int _nfs4_pnfs_getdevicelist(struct nfs_server *server,
++				    const struct nfs_fh *fh,
++				    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_pnfs_getdevicelist_arg arg = {
++		.fh = fh,
++		.layoutclass = server->pnfs_curr_ld->id,
++	};
++	struct nfs4_pnfs_getdevicelist_res res = {
++		.devlist = devlist,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_GETDEVICELIST],
++		.rpc_argp = &arg,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &arg, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_pnfs_getdevicelist(struct nfs_server *server,
++			    const struct nfs_fh *fh,
++			    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_exception exception = { };
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_pnfs_getdevicelist(server, fh, devlist),
++				&exception);
++	} while (exception.retry);
++
++	dprintk("nfs4_pnfs_getdevlist: err=%d, num_devs=%u\n",
++		err, devlist->num_devs);
++
++	return err;
++}
++
++int nfs4_pnfs_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
++{
++	struct nfs4_pnfs_getdeviceinfo_arg args = {
++		.pdev = pdev,
++	};
++	struct nfs4_pnfs_getdeviceinfo_res res = {
++		.pdev = pdev,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_GETDEVICEINFO],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++
++	return status;
++}
++
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+@@ -5325,28 +5908,30 @@ struct nfs4_state_maintenance_ops nfs41_
+ };
+ #endif
+ 
+-/*
+- * Per minor version reboot and network partition recovery ops
+- */
+-
+-struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
+-	&nfs40_reboot_recovery_ops,
+-#if defined(CONFIG_NFS_V4_1)
+-	&nfs41_reboot_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
++	.minor_version = 0,
++	.call_sync = _nfs4_call_sync,
++	.validate_stateid = nfs4_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
++	.state_renewal_ops = &nfs40_state_renewal_ops,
+ };
+ 
+-struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
+-	&nfs40_nograce_recovery_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_nograce_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
++	.minor_version = 1,
++	.call_sync = _nfs4_call_sync_session,
++	.validate_stateid = nfs41_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
++	.state_renewal_ops = &nfs41_state_renewal_ops,
+ };
++#endif
+ 
+-struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
+-	&nfs40_state_renewal_ops,
++const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
++	[0] = &nfs_v4_0_minor_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_state_renewal_ops,
++	[1] = &nfs_v4_1_minor_ops,
+ #endif
+ };
+ 
+@@ -5364,6 +5949,7 @@ const struct nfs_rpc_ops nfs_v4_clientop
+ 	.dentry_ops	= &nfs4_dentry_operations,
+ 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+ 	.file_inode_ops	= &nfs4_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs4_proc_get_root,
+ 	.getattr	= nfs4_proc_getattr,
+ 	.setattr	= nfs4_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
+@@ -54,17 +54,17 @@
+ void
+ nfs4_renew_state(struct work_struct *work)
+ {
+-	struct nfs4_state_maintenance_ops *ops;
++	const struct nfs4_state_maintenance_ops *ops;
+ 	struct nfs_client *clp =
+ 		container_of(work, struct nfs_client, cl_renewd.work);
+ 	struct rpc_cred *cred;
+ 	long lease;
+ 	unsigned long last, now;
+ 
+-	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
++	ops = clp->cl_mvops->state_renewal_ops;
+ 	dprintk("%s: start\n", __func__);
+ 	/* Are there any active superblocks? */
+-	if (list_empty(&clp->cl_superblocks))
++	if (list_empty(&clp->cl_superblocks) && !is_ds_only_client(clp))
+ 		goto out;
+ 	spin_lock(&clp->cl_lock);
+ 	lease = clp->cl_lease_time;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
+@@ -53,6 +53,9 @@
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++#include "pnfs.h"
+ 
+ #define OPENOWNER_POOL_SIZE	8
+ 
+@@ -126,6 +129,11 @@ static int nfs41_setup_state_renewal(str
+ 	int status;
+ 	struct nfs_fsinfo fsinfo;
+ 
++	if (is_ds_only_client(clp)) {
++		nfs4_schedule_state_renewal(clp);
++		return 0;
++	}
++
+ 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+ 	if (status == 0) {
+ 		/* Update lease time and schedule renewal */
+@@ -145,7 +153,9 @@ static void nfs4_end_drain_session(struc
+ 	struct nfs4_session *ses = clp->cl_session;
+ 	int max_slots;
+ 
+-	if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
++	if (ses == NULL)
++		return;
++	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		spin_lock(&ses->fc_slot_table.slot_tbl_lock);
+ 		max_slots = ses->fc_slot_table.max_slots;
+ 		while (max_slots--) {
+@@ -167,7 +177,7 @@ static int nfs4_begin_drain_session(stru
+ 	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
++	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ 	if (tbl->highest_used_slotid != -1) {
+ 		INIT_COMPLETION(ses->complete);
+ 		spin_unlock(&tbl->slot_tbl_lock);
+@@ -371,7 +381,6 @@ nfs4_alloc_state_owner(void)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+ 	INIT_LIST_HEAD(&sp->so_states);
+-	INIT_LIST_HEAD(&sp->so_delegations);
+ 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+ 	sp->so_seqid.sequence = &sp->so_sequence;
+ 	spin_lock_init(&sp->so_sequence.lock);
+@@ -384,7 +393,7 @@ static void
+ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+ {
+ 	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+-		struct nfs_client *clp = sp->so_client;
++		struct nfs_client *clp = sp->so_server->nfs_client;
+ 
+ 		spin_lock(&clp->cl_lock);
+ 		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+@@ -406,7 +415,6 @@ struct nfs4_state_owner *nfs4_get_state_
+ 	new = nfs4_alloc_state_owner();
+ 	if (new == NULL)
+ 		return NULL;
+-	new->so_client = clp;
+ 	new->so_server = server;
+ 	new->so_cred = cred;
+ 	spin_lock(&clp->cl_lock);
+@@ -423,7 +431,7 @@ struct nfs4_state_owner *nfs4_get_state_
+ 
+ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+ {
+-	struct nfs_client *clp = sp->so_client;
++	struct nfs_client *clp = sp->so_server->nfs_client;
+ 	struct rpc_cred *cred = sp->so_cred;
+ 
+ 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+@@ -583,8 +591,24 @@ static void __nfs4_close(struct path *pa
+ 	if (!call_close) {
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+-	} else
++	} else {
++		u32 roc_iomode;
++		struct nfs_inode *nfsi = NFS_I(state->inode);
++
++		if (has_layout(nfsi) &&
++		    (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
++			struct nfs4_pnfs_layout_segment range = {
++				.iomode = roc_iomode,
++				.offset = 0,
++				.length = NFS4_MAX_UINT64,
++			};
++
++			pnfs_return_layout(state->inode, &range, NULL,
++					   RETURN_FILE, wait);
++		}
++
+ 		nfs4_do_close(path, state, gfp_mask, wait);
++	}
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+@@ -602,12 +626,21 @@ void nfs4_close_sync(struct path *path, 
+  * that is compatible with current->files
+  */
+ static struct nfs4_lock_state *
+-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *pos;
+ 	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+-		if (pos->ls_owner != fl_owner)
++		if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ 			continue;
++		switch (pos->ls_owner.lo_type) {
++		case NFS4_POSIX_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.posix_owner != fl_owner)
++				continue;
++			break;
++		case NFS4_FLOCK_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.flock_owner != fl_pid)
++				continue;
++		}
+ 		atomic_inc(&pos->ls_count);
+ 		return pos;
+ 	}
+@@ -619,10 +652,10 @@ __nfs4_find_lock_state(struct nfs4_state
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp;
+-	struct nfs_client *clp = state->owner->so_client;
++	struct nfs_client *clp = state->owner->so_server->nfs_client;
+ 
+ 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+@@ -633,7 +666,18 @@ static struct nfs4_lock_state *nfs4_allo
+ 	lsp->ls_seqid.sequence = &lsp->ls_sequence;
+ 	atomic_set(&lsp->ls_count, 1);
+ 	lsp->ls_state = state;
+-	lsp->ls_owner = fl_owner;
++	lsp->ls_owner.lo_type = type;
++	switch (lsp->ls_owner.lo_type) {
++	case NFS4_FLOCK_LOCK_TYPE:
++		lsp->ls_owner.lo_u.flock_owner = fl_pid;
++		break;
++	case NFS4_POSIX_LOCK_TYPE:
++		lsp->ls_owner.lo_u.posix_owner = fl_owner;
++		break;
++	default:
++		kfree(lsp);
++		return NULL;
++	}
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ 	spin_unlock(&clp->cl_lock);
+@@ -643,7 +687,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 
+ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+ {
+-	struct nfs_client *clp = lsp->ls_state->owner->so_client;
++	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ 
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+@@ -657,13 +701,13 @@ static void nfs4_free_lock_state(struct 
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
++static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp, *new = NULL;
+ 	
+ 	for(;;) {
+ 		spin_lock(&state->state_lock);
+-		lsp = __nfs4_find_lock_state(state, owner);
++		lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ 		if (lsp != NULL)
+ 			break;
+ 		if (new != NULL) {
+@@ -674,7 +718,7 @@ static struct nfs4_lock_state *nfs4_get_
+ 			break;
+ 		}
+ 		spin_unlock(&state->state_lock);
+-		new = nfs4_alloc_lock_state(state, owner);
++		new = nfs4_alloc_lock_state(state, owner, pid, type);
+ 		if (new == NULL)
+ 			return NULL;
+ 	}
+@@ -701,6 +745,8 @@ void nfs4_put_lock_state(struct nfs4_loc
+ 	if (list_empty(&state->lock_states))
+ 		clear_bit(LK_STATE_IN_USE, &state->flags);
+ 	spin_unlock(&state->state_lock);
++	if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
++		nfs4_release_lockowner(lsp);
+ 	nfs4_free_lock_state(lsp);
+ }
+ 
+@@ -728,7 +774,12 @@ int nfs4_set_lock_state(struct nfs4_stat
+ 
+ 	if (fl->fl_ops != NULL)
+ 		return 0;
+-	lsp = nfs4_get_lock_state(state, fl->fl_owner);
++	if (fl->fl_flags & FL_POSIX)
++		lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
++	else if (fl->fl_flags & FL_FLOCK)
++		lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
++	else
++		return -EINVAL;
+ 	if (lsp == NULL)
+ 		return -ENOMEM;
+ 	fl->fl_u.nfs4_fl.owner = lsp;
+@@ -740,7 +791,7 @@ int nfs4_set_lock_state(struct nfs4_stat
+  * Byte-range lock aware utility to initialize the stateid of read/write
+  * requests.
+  */
+-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
++void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+ {
+ 	struct nfs4_lock_state *lsp;
+ 	int seq;
+@@ -753,7 +804,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 		return;
+ 
+ 	spin_lock(&state->state_lock);
+-	lsp = __nfs4_find_lock_state(state, fl_owner);
++	lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ 	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ 		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ 	spin_unlock(&state->state_lock);
+@@ -1031,8 +1082,8 @@ restart:
+ 				 * Open state on this file cannot be recovered
+ 				 * All we can do is revert to using the zero stateid.
+ 				 */
+-				memset(state->stateid.data, 0,
+-					sizeof(state->stateid.data));
++				memset(state->stateid.u.data, 0,
++					sizeof(state->stateid.u.data));
+ 				/* Mark the file as being 'closed' */
+ 				state->state = 0;
+ 				break;
+@@ -1041,11 +1092,11 @@ restart:
+ 			case -NFS4ERR_BAD_STATEID:
+ 			case -NFS4ERR_RECLAIM_BAD:
+ 			case -NFS4ERR_RECLAIM_CONFLICT:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 				break;
+ 			case -NFS4ERR_EXPIRED:
+ 			case -NFS4ERR_NO_GRACE:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 			case -NFS4ERR_STALE_CLIENTID:
+ 			case -NFS4ERR_BADSESSION:
+ 			case -NFS4ERR_BADSLOT:
+@@ -1120,8 +1171,7 @@ static void nfs4_state_end_reclaim_reboo
+ 	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ 		return;
+ 
+-	nfs4_reclaim_complete(clp,
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++	nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+ 
+ 	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ 		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+@@ -1211,8 +1261,8 @@ restart:
+ static int nfs4_check_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_maintenance_ops *ops =
+-		nfs4_state_renewal_ops[clp->cl_minorversion];
++	const struct nfs4_state_maintenance_ops *ops =
++		clp->cl_mvops->state_renewal_ops;
+ 	int status = -NFS4ERR_EXPIRED;
+ 
+ 	/* Is the client already known to have an expired lease? */
+@@ -1235,8 +1285,8 @@ out:
+ static int nfs4_reclaim_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_recovery_ops *ops =
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion];
++	const struct nfs4_state_recovery_ops *ops =
++		clp->cl_mvops->reboot_recovery_ops;
+ 	int status = -ENOENT;
+ 
+ 	cred = ops->get_clid_cred(clp);
+@@ -1421,6 +1471,7 @@ static void nfs4_state_manager(struct nf
+ 			}
+ 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ 			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
++			pnfs_destroy_all_layouts(clp);
+ 		}
+ 
+ 		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+@@ -1444,7 +1495,7 @@ static void nfs4_state_manager(struct nf
+ 		/* First recover reboot state... */
+ 		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->reboot_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ 				continue;
+@@ -1458,7 +1509,7 @@ static void nfs4_state_manager(struct nf
+ 		/* Now recover expired state... */
+ 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_nograce_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->nograce_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
+@@ -50,8 +50,11 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
+ #include "nfs4_fs.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+ 
+@@ -89,7 +92,7 @@ static int nfs4_stat_to_errno(int);
+ #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
+ #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
+ 				((3+NFS4_FHSIZE) >> 2))
+-#define nfs4_fattr_bitmap_maxsz 3
++#define nfs4_fattr_bitmap_maxsz 4
+ #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+ #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+ #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+@@ -111,7 +114,11 @@ static int nfs4_stat_to_errno(int);
+ #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+ #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
+ #define encode_fsinfo_maxsz	(encode_getattr_maxsz)
+-#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
++/* The 5 accounts for the PNFS attributes, and assumes that at most three
++ * layout types will be returned.
++ */
++#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + \
++				 nfs4_fattr_bitmap_maxsz + 8 + 5)
+ #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
+ #define decode_renew_maxsz	(op_decode_hdr_maxsz)
+ #define encode_setclientid_maxsz \
+@@ -202,14 +209,17 @@ static int nfs4_stat_to_errno(int);
+ #define encode_link_maxsz	(op_encode_hdr_maxsz + \
+ 				nfs4_name_maxsz)
+ #define decode_link_maxsz	(op_decode_hdr_maxsz + decode_change_info_maxsz)
++#define encode_lockowner_maxsz	(7)
+ #define encode_lock_maxsz	(op_encode_hdr_maxsz + \
+ 				 7 + \
+-				 1 + encode_stateid_maxsz + 8)
++				 1 + encode_stateid_maxsz + 1 + \
++				 encode_lockowner_maxsz)
+ #define decode_lock_denied_maxsz \
+ 				(8 + decode_lockowner_maxsz)
+ #define decode_lock_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+-#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 12)
++#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 5 + \
++				encode_lockowner_maxsz)
+ #define decode_lockt_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+ #define encode_locku_maxsz	(op_encode_hdr_maxsz + 3 + \
+@@ -217,6 +227,11 @@ static int nfs4_stat_to_errno(int);
+ 				 4)
+ #define decode_locku_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_stateid_maxsz)
++#define encode_release_lockowner_maxsz \
++				(op_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define decode_release_lockowner_maxsz \
++				(op_decode_hdr_maxsz)
+ #define encode_access_maxsz	(op_encode_hdr_maxsz + 1)
+ #define decode_access_maxsz	(op_decode_hdr_maxsz + 2)
+ #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
+@@ -302,6 +317,35 @@ static int nfs4_stat_to_errno(int);
+ 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+ #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
+ #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
++#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
++				encode_verifier_maxsz)
++#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 +  \
++				decode_verifier_maxsz +             \
++				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM *  \
++				NFS4_PNFS_DEVICEID4_SIZE))
++#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
++				XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE))
++#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
++				4 /*layout type */ + \
++				4 /* opaque devaddr4 length */ +\
++				4 /* notification bitmap length */ + \
++				4 /* notification bitmap */)
++#define encode_layoutget_sz	(op_encode_hdr_maxsz + 10 + \
++				encode_stateid_maxsz)
++#define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
++				decode_stateid_maxsz + \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
++#define encode_layoutcommit_sz	(18 +                           \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
++				op_encode_hdr_maxsz +          \
++				encode_stateid_maxsz)
++#define decode_layoutcommit_maxsz (3 + op_decode_hdr_maxsz)
++#define encode_layoutreturn_sz	(8 + op_encode_hdr_maxsz + \
++				encode_stateid_maxsz + \
++				1 /* FIXME: opaque lrf_body always empty at
++				   *the moment */)
++#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
++				1 + decode_stateid_maxsz)
+ #else /* CONFIG_NFS_V4_1 */
+ #define encode_sequence_maxsz	0
+ #define decode_sequence_maxsz	0
+@@ -471,6 +515,12 @@ static int nfs4_stat_to_errno(int);
+ 				decode_sequence_maxsz + \
+ 				decode_putfh_maxsz + \
+ 				decode_locku_maxsz)
++#define NFS4_enc_release_lockowner_sz \
++				(compound_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define NFS4_dec_release_lockowner_sz \
++				(compound_decode_hdr_maxsz + \
++				 decode_lockowner_maxsz)
+ #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+ 				encode_sequence_maxsz + \
+ 				encode_putfh_maxsz + \
+@@ -685,6 +735,60 @@ static int nfs4_stat_to_errno(int);
+ #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
+ 					 decode_sequence_maxsz + \
+ 					 decode_reclaim_complete_maxsz)
++#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_getdevicelist_maxsz)
++#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_getdevicelist_maxsz)
++#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
++				encode_sequence_maxsz +\
++				encode_getdeviceinfo_maxsz)
++#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz +    \
++				decode_sequence_maxsz + \
++				decode_getdeviceinfo_maxsz)
++#define NFS4_enc_layoutget_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz +        \
++				encode_layoutget_sz)
++#define NFS4_dec_layoutget_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz +        \
++				decode_layoutget_maxsz)
++#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_layoutcommit_sz + \
++				encode_getattr_maxsz)
++#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutcommit_maxsz + \
++				decode_getattr_maxsz)
++#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_layoutreturn_sz)
++#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutreturn_maxsz)
++#define NFS4_enc_dswrite_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_write_maxsz)
++#define NFS4_dec_dswrite_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_write_maxsz)
++#define NFS4_enc_dscommit_sz	(compound_encode_hdr_maxsz + \
++				encode_putfh_maxsz + \
++				encode_commit_maxsz)
++#define NFS4_dec_dscommit_sz	(compound_decode_hdr_maxsz + \
++				decode_putfh_maxsz + \
++				decode_commit_maxsz)
+ 
+ const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ 				      compound_encode_hdr_maxsz +
+@@ -915,7 +1019,7 @@ static void encode_close(struct xdr_stre
+ 	p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_CLOSE);
+ 	*p++ = cpu_to_be32(arg->seqid->sequence->counter);
+-	xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_close_maxsz;
+ }
+@@ -989,6 +1093,35 @@ static void encode_getattr_two(struct xd
+ 	hdr->replen += decode_getattr_maxsz;
+ }
+ 
++static void
++encode_getattr_three(struct xdr_stream *xdr,
++		     uint32_t bm0, uint32_t bm1, uint32_t bm2,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_GETATTR);
++	if (bm2) {
++		p = reserve_space(xdr, 16);
++		*p++ = cpu_to_be32(3);
++		*p++ = cpu_to_be32(bm0);
++		*p++ = cpu_to_be32(bm1);
++		*p = cpu_to_be32(bm2);
++	} else if (bm1) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(2);
++		*p++ = cpu_to_be32(bm0);
++		*p = cpu_to_be32(bm1);
++	} else {
++		p = reserve_space(xdr, 8);
++		*p++ = cpu_to_be32(1);
++		*p = cpu_to_be32(bm0);
++	}
++	hdr->nops++;
++	hdr->replen += decode_getattr_maxsz;
++}
++
+ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+ 	encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
+@@ -997,8 +1130,11 @@ static void encode_getfattr(struct xdr_s
+ 
+ static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+-	encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+-			   bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
++	encode_getattr_three(xdr,
++			     bitmask[0] & nfs4_fsinfo_bitmap[0],
++			     bitmask[1] & nfs4_fsinfo_bitmap[1],
++			     bitmask[2] & nfs4_fsinfo_bitmap[2],
++			     hdr);
+ }
+ 
+ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+@@ -1042,6 +1178,17 @@ static inline uint64_t nfs4_lock_length(
+ 	return fl->fl_end - fl->fl_start + 1;
+ }
+ 
++static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 28);
++	p = xdr_encode_hyper(p, lowner->clientid);
++	*p++ = cpu_to_be32(16);
++	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
++	xdr_encode_hyper(p, lowner->id);
++}
++
+ /*
+  * opcode,type,reclaim,offset,length,new_lock_owner = 32
+  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+@@ -1058,18 +1205,16 @@ static void encode_lock(struct xdr_strea
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	*p = cpu_to_be32(args->new_lock_owner);
+ 	if (args->new_lock_owner){
+-		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
++		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 		*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+-		p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
++					    NFS4_STATEID_SIZE);
+ 		*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+-		p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-		*p++ = cpu_to_be32(16);
+-		p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-		xdr_encode_hyper(p, args->lock_owner.id);
++		encode_lockowner(xdr, &args->lock_owner);
+ 	}
+ 	else {
+ 		p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+-		p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
+ 		*p = cpu_to_be32(args->lock_seqid->sequence->counter);
+ 	}
+ 	hdr->nops++;
+@@ -1080,15 +1225,12 @@ static void encode_lockt(struct xdr_stre
+ {
+ 	__be32 *p;
+ 
+-	p = reserve_space(xdr, 52);
++	p = reserve_space(xdr, 24);
+ 	*p++ = cpu_to_be32(OP_LOCKT);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+-	p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-	*p++ = cpu_to_be32(16);
+-	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-	xdr_encode_hyper(p, args->lock_owner.id);
++	encode_lockowner(xdr, &args->lock_owner);
+ 	hdr->nops++;
+ 	hdr->replen += decode_lockt_maxsz;
+ }
+@@ -1101,13 +1243,25 @@ static void encode_locku(struct xdr_stre
+ 	*p++ = cpu_to_be32(OP_LOCKU);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	*p++ = cpu_to_be32(args->seqid->sequence->counter);
+-	p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
++				    NFS4_STATEID_SIZE);
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	hdr->nops++;
+ 	hdr->replen += decode_locku_maxsz;
+ }
+ 
++static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
++	encode_lockowner(xdr, lowner);
++	hdr->nops++;
++	hdr->replen += decode_release_lockowner_maxsz;
++}
++
+ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
+ {
+ 	int len = name->len;
+@@ -1172,7 +1326,7 @@ static inline void encode_createmode(str
+ 		break;
+ 	default:
+ 		clp = arg->server->nfs_client;
+-		if (clp->cl_minorversion > 0) {
++		if (clp->cl_mvops->minor_version > 0) {
+ 			if (nfs4_has_persistent_session(clp)) {
+ 				*p = cpu_to_be32(NFS4_CREATE_GUARDED);
+ 				encode_attrs(xdr, arg->u.attrs, arg->server);
+@@ -1251,7 +1405,7 @@ static inline void encode_claim_delegate
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	encode_string(xdr, name->len, name->name);
+ }
+ 
+@@ -1282,7 +1436,7 @@ static void encode_open_confirm(struct x
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	hdr->nops++;
+ 	hdr->replen += decode_open_confirm_maxsz;
+@@ -1294,7 +1448,7 @@ static void encode_open_downgrade(struct
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	encode_share_access(xdr, arg->fmode);
+ 	hdr->nops++;
+@@ -1324,17 +1478,17 @@ static void encode_putrootfh(struct xdr_
+ 	hdr->replen += decode_putrootfh_maxsz;
+ }
+ 
+-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
++static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
+ {
+ 	nfs4_stateid stateid;
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, NFS4_STATEID_SIZE);
+ 	if (ctx->state != NULL) {
+-		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+-		xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
++		nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
++		xdr_encode_opaque_fixed(p, stateid.u.data, NFS4_STATEID_SIZE);
+ 	} else
+-		xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++		xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+@@ -1344,7 +1498,7 @@ static void encode_read(struct xdr_strea
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_READ);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 12);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1448,7 +1602,7 @@ encode_setacl(struct xdr_stream *xdr, st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ 	p = reserve_space(xdr, 2*4);
+ 	*p++ = cpu_to_be32(1);
+ 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
+@@ -1479,7 +1633,7 @@ static void encode_setattr(struct xdr_st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setattr_maxsz;
+ 	encode_attrs(xdr, arg->iap, server);
+@@ -1523,7 +1677,7 @@ static void encode_write(struct xdr_stre
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_WRITE);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 16);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1542,7 +1696,7 @@ static void encode_delegreturn(struct xd
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 
+ 	*p++ = cpu_to_be32(OP_DELEGRETURN);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_delegreturn_maxsz;
+ }
+@@ -1696,6 +1850,162 @@ static void encode_sequence(struct xdr_s
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#ifdef CONFIG_NFS_V4_1
++static void
++encode_getdevicelist(struct xdr_stream *xdr,
++		     const struct nfs4_pnfs_getdevicelist_arg *args,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++	nfs4_verifier dummy = {
++		.data = "dummmmmy",
++	};
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_GETDEVICELIST);
++	*p++ = cpu_to_be32(args->layoutclass);
++	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
++	xdr_encode_hyper(p, 0ULL);                          /* cookie */
++	encode_nfs4_verifier(xdr, &dummy);
++	hdr->nops++;
++}
++
++static void
++encode_getdeviceinfo(struct xdr_stream *xdr,
++		     const struct nfs4_pnfs_getdeviceinfo_arg *args,
++		     struct compound_hdr *hdr)
++{
++	int has_bitmap = (args->pdev->dev_notify_types != 0);
++	int len = 16 + NFS4_PNFS_DEVICEID4_SIZE + (has_bitmap * 4);
++	__be32 *p;
++
++	p = reserve_space(xdr, len);
++	*p++ = cpu_to_be32(OP_GETDEVICEINFO);
++	p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
++				    NFS4_PNFS_DEVICEID4_SIZE);
++	*p++ = cpu_to_be32(args->pdev->layout_type);
++	*p++ = cpu_to_be32(args->pdev->pglen + len);	/* gdia_maxcount */
++	*p++ = cpu_to_be32(has_bitmap);			/* bitmap length [01] */
++	if (has_bitmap)
++		*p = cpu_to_be32(args->pdev->dev_notify_types);
++	hdr->nops++;
++}
++
++static void
++encode_layoutget(struct xdr_stream *xdr,
++		      const struct nfs4_pnfs_layoutget_arg *args,
++		      struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTGET);
++	*p++ = cpu_to_be32(0);     /* Signal layout available */
++	*p++ = cpu_to_be32(args->type);
++	*p++ = cpu_to_be32(args->lseg.iomode);
++	p = xdr_encode_hyper(p, args->lseg.offset);
++	p = xdr_encode_hyper(p, args->lseg.length);
++	p = xdr_encode_hyper(p, args->minlength);
++	pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++	p = xdr_encode_opaque_fixed(p, &stateid.u.data, NFS4_STATEID_SIZE);
++	*p = cpu_to_be32(args->maxcount);
++
++	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
++		__func__,
++		args->type,
++		args->lseg.iomode,
++		(unsigned long)args->lseg.offset,
++		(unsigned long)args->lseg.length,
++		args->maxcount);
++	hdr->nops++;
++	hdr->replen += decode_layoutget_maxsz;
++}
++
++static int
++encode_layoutcommit(struct xdr_stream *xdr,
++		    const struct pnfs_layoutcommit_arg *args,
++		    struct compound_hdr *hdr)
++{
++	struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++	__be32 *p;
++
++	dprintk("%s: %llu@%llu lbw: %llu type: %d\n", __func__,
++		args->lseg.length, args->lseg.offset, args->lastbytewritten,
++		args->layout_type);
++
++	p = reserve_space(xdr, 40 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
++	p = xdr_encode_hyper(p, args->lseg.offset);
++	p = xdr_encode_hyper(p, args->lseg.length);
++	*p++ = cpu_to_be32(0);     /* reclaim */
++	p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(1);     /* newoffset = TRUE */
++	p = xdr_encode_hyper(p, args->lastbytewritten);
++	*p = cpu_to_be32(args->time_modify_changed != 0);
++	if (args->time_modify_changed) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(0);
++		*p++ = cpu_to_be32(args->time_modify.tv_sec);
++		*p = cpu_to_be32(args->time_modify.tv_nsec);
++	}
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(args->layout_type);
++
++	if (ld_io_ops->encode_layoutcommit) {
++		ld_io_ops->encode_layoutcommit(NFS_I(args->inode)->layout,
++					       xdr, args);
++	} else {
++		p = reserve_space(xdr, 4);
++		xdr_encode_opaque(p, NULL, 0);
++	}
++
++	hdr->nops++;
++	hdr->replen += decode_layoutcommit_maxsz;
++	return 0;
++}
++
++static void
++encode_layoutreturn(struct xdr_stream *xdr,
++		    const struct nfs4_pnfs_layoutreturn_arg *args,
++		    struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
++	*p++ = cpu_to_be32(args->reclaim);
++	*p++ = cpu_to_be32(args->layout_type);
++	*p++ = cpu_to_be32(args->lseg.iomode);
++	*p = cpu_to_be32(args->return_type);
++	if (args->return_type == RETURN_FILE) {
++		struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++
++		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
++		p = xdr_encode_hyper(p, args->lseg.offset);
++		p = xdr_encode_hyper(p, args->lseg.length);
++		pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++		p = xdr_encode_opaque_fixed(p, &stateid.u.data,
++					    NFS4_STATEID_SIZE);
++		dprintk("%s: call %pF\n", __func__,
++		ld_io_ops->encode_layoutreturn);
++		if (ld_io_ops->encode_layoutreturn) {
++			ld_io_ops->encode_layoutreturn(
++				NFS_I(args->inode)->layout, xdr, args);
++		} else {
++			p = reserve_space(xdr, 4);
++			*p = cpu_to_be32(0);
++		}
++	}
++	hdr->nops++;
++	hdr->replen += decode_layoutreturn_maxsz;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" ENCODE ROUTINES.
+  */
+@@ -1704,7 +2014,7 @@ static u32 nfs4_xdr_minorversion(const s
+ {
+ #if defined(CONFIG_NFS_V4_1)
+ 	if (args->sa_session)
+-		return args->sa_session->clp->cl_minorversion;
++		return args->sa_session->clp->cl_mvops->minor_version;
+ #endif /* CONFIG_NFS_V4_1 */
+ 	return 0;
+ }
+@@ -2048,6 +2358,20 @@ static int nfs4_xdr_enc_locku(struct rpc
+ 	return 0;
+ }
+ 
++static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = 0,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
+ /*
+  * Encode a READLINK request
+  */
+@@ -2330,7 +2654,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 	struct compound_hdr hdr = {
+ 		.nops	= 0,
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2395,7 +2719,7 @@ static int nfs4_xdr_enc_exchange_id(stru
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2413,7 +2737,7 @@ static int nfs4_xdr_enc_create_session(s
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2431,7 +2755,7 @@ static int nfs4_xdr_enc_destroy_session(
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = session->clp->cl_minorversion,
++		.minorversion = session->clp->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2469,7 +2793,7 @@ static int nfs4_xdr_enc_get_lease_time(s
+ 	struct compound_hdr hdr = {
+ 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2499,6 +2823,159 @@ static int nfs4_xdr_enc_reclaim_complete
+ 	return 0;
+ }
+ 
++/*
++ * Encode GETDEVICELIST request
++ */
++static int
++nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, uint32_t *p,
++			   struct nfs4_pnfs_getdevicelist_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_getdevicelist(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode GETDEVICEINFO request
++ */
++static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
++				      struct nfs4_pnfs_getdeviceinfo_arg *args)
++{
++	struct xdr_stream xdr;
++	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++	int replen;
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_getdeviceinfo(&xdr, args, &hdr);
++
++	/* set up reply kvec. Subtract notification bitmap max size (8)
++	 * so that notification bitmap is put in xdr_buf tail */
++	replen = (RPC_REPHDRSIZE + auth->au_rslack +
++		  NFS4_dec_getdeviceinfo_sz - 8) << 2;
++	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pdev->pages,
++			 args->pdev->pgbase, args->pdev->pglen);
++	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
++		__func__, replen, args->pdev->pages,
++		args->pdev->pgbase, args->pdev->pglen);
++
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTGET request
++ */
++static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
++				  struct nfs4_pnfs_layoutget_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutget(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTCOMMIT request
++ */
++static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, uint32_t *p,
++				     struct pnfs_layoutcommit_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_layoutcommit(&xdr, args, &hdr);
++	encode_getfattr(&xdr, args->bitmask, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode LAYOUTRETURN request
++ */
++static int nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_pnfs_layoutreturn_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutreturn(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server WRITE request
++ */
++static int nfs4_xdr_enc_dswrite(struct rpc_rqst *req, uint32_t *p,
++				struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_write(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server COMMIT request
++ */
++static int nfs4_xdr_enc_dscommit(struct rpc_rqst *req, uint32_t *p,
++				 struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_commit(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+@@ -2599,14 +3076,17 @@ static int decode_attr_bitmap(struct xdr
+ 		goto out_overflow;
+ 	bmlen = be32_to_cpup(p);
+ 
+-	bitmap[0] = bitmap[1] = 0;
++	bitmap[0] = bitmap[1] = bitmap[2] = 0;
+ 	p = xdr_inline_decode(xdr, (bmlen << 2));
+ 	if (unlikely(!p))
+ 		goto out_overflow;
+ 	if (bmlen > 0) {
+ 		bitmap[0] = be32_to_cpup(p++);
+-		if (bmlen > 1)
+-			bitmap[1] = be32_to_cpup(p);
++		if (bmlen > 1) {
++			bitmap[1] = be32_to_cpup(p++);
++			if (bmlen > 2)
++				bitmap[2] = be32_to_cpup(p);
++		}
+ 	}
+ 	return 0;
+ out_overflow:
+@@ -2635,8 +3115,9 @@ static int decode_attr_supported(struct 
+ 		decode_attr_bitmap(xdr, bitmask);
+ 		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+ 	} else
+-		bitmask[0] = bitmask[1] = 0;
+-	dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
++		bitmask[0] = bitmask[1] = bitmask[2] = 0;
++	dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__,
++		bitmask[0], bitmask[1], bitmask[2]);
+ 	return 0;
+ }
+ 
+@@ -3565,7 +4046,7 @@ static int decode_opaque_fixed(struct xd
+ 
+ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+ {
+-	return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
++	return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+@@ -3621,7 +4102,7 @@ out_overflow:
+ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3647,7 +4128,7 @@ xdr_error:
+ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3679,7 +4160,7 @@ xdr_error:
+ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3705,7 +4186,7 @@ static int decode_getfattr(struct xdr_st
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0},
++		 bitmap[3] = {0},
+ 		 type;
+ 	int status;
+ 	umode_t fmode = 0;
+@@ -3824,24 +4305,101 @@ xdr_error:
+ 	return status;
+ }
+ 
+-
+-static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * Decode potentially multiple layout types. Currently we only support
++ * one layout driver per file system.
++ */
++static int decode_pnfs_list(struct xdr_stream *xdr, uint32_t *layoutclass)
+ {
+-	__be32 *savep;
+-	uint32_t attrlen, bitmap[2];
+-	int status;
++	uint32_t *p;
++	int num;
+ 
+-	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+-		goto xdr_error;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	num = be32_to_cpup(p);
+ 
+-	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++	/* pNFS is not supported by the underlying file system */
++	if (num == 0) {
++		*layoutclass = 0;
++		return 0;
++	}
+ 
+-	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+-		goto xdr_error;
++	/* TODO: We will eventually support multiple layout drivers ? */
++	if (num > 1)
++		printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
++			"per filesystem not supported\n", __func__);
++
++	/* Decode and set first layout type */
++	p = xdr_inline_decode(xdr, num * 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	*layoutclass = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++/*
++ * The type of file system exported
++ */
++static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
++				uint32_t *layoutclass)
++{
++	int status = 0;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
++	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
++		return -EIO;
++	if (likely(bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES)) {
++		status = decode_pnfs_list(xdr, layoutclass);
++		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
++	}
++	return status;
++}
++
++/*
++ * The prefered block size for layout directed io
++ */
++static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
++				      uint32_t *res)
++{
++	__be32 *p;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
++	*res = 0;
++	if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p)) {
++			print_overflow_msg(__func__, xdr);
++			return -EIO;
++		}
++		*res = be32_to_cpup(p);
++		bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
++	}
++	return 0;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
++static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++{
++	__be32 *savep;
++	uint32_t attrlen, bitmap[3];
++	int status;
++
++	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
++		goto xdr_error;
++
++	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++
++	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
++		goto xdr_error;
+ 	if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+ 		goto xdr_error;
+ 	if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+@@ -3850,6 +4408,14 @@ static int decode_fsinfo(struct xdr_stre
+ 	if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+ 		goto xdr_error;
+ 	fsinfo->wtpref = fsinfo->wtmax;
++#if defined(CONFIG_NFS_V4_1)
++	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
++	if (status)
++		goto xdr_error;
++	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
++	if (status)
++		goto xdr_error;
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ 	status = verify_attr_len(xdr, savep, attrlen);
+ xdr_error:
+@@ -3973,6 +4539,11 @@ static int decode_locku(struct xdr_strea
+ 	return status;
+ }
+ 
++static int decode_release_lockowner(struct xdr_stream *xdr)
++{
++	return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
++}
++
+ static int decode_lookup(struct xdr_stream *xdr)
+ {
+ 	return decode_op_hdr(xdr, OP_LOOKUP);
+@@ -4333,7 +4904,7 @@ static int decode_getacl(struct xdr_stre
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0};
++		 bitmap[3] = {0};
+ 	struct kvec *iov = req->rq_rcv_buf.head;
+ 	int status;
+ 
+@@ -4682,6 +5253,226 @@ out_overflow:
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * TODO: Need to handle case when EOF != true;
++ */
++static int decode_getdevicelist(struct xdr_stream *xdr,
++				struct pnfs_devicelist *res)
++{
++	__be32 *p;
++	int status, i;
++	struct nfs_writeverf verftemp;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 8 + 8 + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++
++	/* TODO: Skip cookie for now */
++	p += 2;
++
++	/* Read verifier */
++	p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
++
++	res->num_devs = be32_to_cpup(p);
++
++	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
++
++	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM)
++		return -NFS4ERR_REP_TOO_BIG;
++
++	p = xdr_inline_decode(xdr,
++			      res->num_devs * NFS4_PNFS_DEVICEID4_SIZE + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	for (i = 0; i < res->num_devs; i++)
++		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
++					    NFS4_PNFS_DEVICEID4_SIZE);
++	res->eof = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_getdeviceinfo(struct xdr_stream *xdr,
++				struct pnfs_device *pdev)
++{
++	__be32 *p;
++	uint32_t len, type;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
++	if (status) {
++		if (status == -ETOOSMALL) {
++			p = xdr_inline_decode(xdr, 4);
++			if (unlikely(!p))
++				goto out_overflow;
++			pdev->mincount = be32_to_cpup(p);
++			dprintk("%s: Min count too small. mincnt = %u\n",
++				__func__, pdev->mincount);
++		}
++		return status;
++	}
++
++	p = xdr_inline_decode(xdr, 8);
++	if (unlikely(!p))
++		goto out_overflow;
++	type = be32_to_cpup(p++);
++	if (type != pdev->layout_type) {
++		dprintk("%s: layout mismatch req: %u pdev: %u\n",
++			__func__, pdev->layout_type, type);
++		return -EINVAL;
++	}
++	/*
++	 * Get the length of the opaque device_addr4. xdr_read_pages places
++	 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
++	 * and places the remaining xdr data in xdr_buf->tail
++	 */
++	pdev->mincount = be32_to_cpup(p);
++	xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
++
++	/* At most one bitmap word */
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	len = be32_to_cpup(p);
++	if (len) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p))
++			goto out_overflow;
++		pdev->dev_notify_types = be32_to_cpup(p);
++	} else
++		pdev->dev_notify_types = 0;
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
++			    struct nfs4_pnfs_layoutget_res *res)
++{
++	__be32 *p;
++	int status;
++	u32 layout_count, dummy;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTGET);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->return_on_close = be32_to_cpup(p++);
++	p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
++	layout_count = be32_to_cpup(p);
++	if (!layout_count) {
++		dprintk("%s: server responded with empty layout array\n",
++			__func__);
++		return -EINVAL;
++	}
++
++	p = xdr_inline_decode(xdr, 24);
++	if (unlikely(!p))
++		goto out_overflow;
++	p = xdr_decode_hyper(p, &res->lseg.offset);
++	p = xdr_decode_hyper(p, &res->lseg.length);
++	res->lseg.iomode = be32_to_cpup(p++);
++	res->type = be32_to_cpup(p++);
++
++	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
++	if (unlikely(status))
++		return status;
++
++	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
++		__func__,
++		(unsigned long)res->lseg.offset,
++		(unsigned long)res->lseg.length,
++		res->lseg.iomode,
++		res->type,
++		res->layout.len);
++
++	/* presuambly, pnfs4_proc_layoutget allocated a single page */
++	if (res->layout.len > PAGE_SIZE)
++		return -ENOMEM;
++	memcpy(res->layout.buf, p, res->layout.len);
++
++	/* FIXME: the whole layout array should be passed up to the pnfs
++	 * client */
++	if (layout_count > 1) {
++		dprintk("%s: server responded with %d layouts, dropping tail\n",
++			__func__, layout_count);
++
++		while (--layout_count) {
++			p = xdr_inline_decode(xdr, 24);
++			if (unlikely(!p))
++				goto out_overflow;
++			status = decode_opaque_inline(xdr, &dummy, (char **)&p);
++			if (unlikely(status))
++				return status;
++		}
++	}
++
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutreturn(struct xdr_stream *xdr,
++			       struct nfs4_pnfs_layoutreturn_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->lrs_present = be32_to_cpup(p);
++	if (res->lrs_present)
++		status = decode_stateid(xdr, &res->stateid);
++	return status;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutcommit(struct xdr_stream *xdr,
++				    struct rpc_rqst *req,
++				    struct pnfs_layoutcommit_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->sizechanged = be32_to_cpup(p);
++
++	if (res->sizechanged) {
++		p = xdr_inline_decode(xdr, 8);
++		if (unlikely(!p))
++			goto out_overflow;
++		xdr_decode_hyper(p, &res->newsize);
++	}
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" DECODE ROUTINES.
+  */
+@@ -5259,6 +6050,19 @@ out:
+ 	return status;
+ }
+ 
++static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (!status)
++		status = decode_release_lockowner(&xdr);
++	return status;
++}
++
+ /*
+  * Decode READLINK response
+  */
+@@ -5696,6 +6500,186 @@ static int nfs4_xdr_dec_reclaim_complete
+ 		status = decode_reclaim_complete(&xdr, (void *)NULL);
+ 	return status;
+ }
++
++/*
++ * Decode GETDEVICELIST response
++ */
++static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_pnfs_getdevicelist_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	dprintk("encoding getdevicelist!\n");
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status != 0)
++		goto out;
++	status = decode_getdevicelist(&xdr, res->devlist);
++out:
++	return status;
++}
++
++/*
++ * Decode GETDEVINFO response
++ */
++static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_pnfs_getdeviceinfo_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_getdeviceinfo(&xdr, res->pdev);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTGET response
++ */
++static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
++				  struct nfs4_pnfs_layoutget_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutget(&xdr, rqstp, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTRETURN response
++ */
++static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_pnfs_layoutreturn_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutreturn(&xdr, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTCOMMIT response
++ */
++static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct pnfs_layoutcommit_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutcommit(&xdr, rqstp, res);
++	if (status)
++		goto out;
++	decode_getfattr(&xdr, res->fattr, res->server,
++			!RPC_IS_ASYNC(rqstp->rq_task));
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server WRITE response
++ */
++static int nfs4_xdr_dec_dswrite(struct rpc_rqst *rqstp, uint32_t *p,
++				struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_write(&xdr, res);
++	if (!status)
++		return res->count;
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server COMMIT response
++ */
++static int nfs4_xdr_dec_dscommit(struct rpc_rqst *rqstp, uint32_t *p,
++				 struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_commit(&xdr, res);
++out:
++	return status;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+@@ -5866,6 +6850,7 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(GETACL,		enc_getacl,	dec_getacl),
+   PROC(SETACL,		enc_setacl,	dec_setacl),
+   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
++  PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ #if defined(CONFIG_NFS_V4_1)
+   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+   PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
+@@ -5873,6 +6858,13 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
+   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+   PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
++  PROC(PNFS_GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
++  PROC(PNFS_GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
++  PROC(PNFS_LAYOUTGET,  enc_layoutget,     dec_layoutget),
++  PROC(PNFS_LAYOUTCOMMIT, enc_layoutcommit,  dec_layoutcommit),
++  PROC(PNFS_LAYOUTRETURN, enc_layoutreturn,  dec_layoutreturn),
++  PROC(PNFS_WRITE, enc_dswrite,  dec_dswrite),
++  PROC(PNFS_COMMIT, enc_dscommit,  dec_dscommit),
+ #endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
+@@ -0,0 +1,11 @@
++#
++# Makefile for the pNFS Objects Layout Driver kernel module
++#
++objlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o objio_osd.o
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
++
++#
++# Panasas pNFS Layout Driver kernel module
++#
++panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
++obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
+@@ -0,0 +1,1087 @@
++/*
++ *  objio_osd.c
++ *
++ *  pNFS Objects layout implementation over open-osd initiator library
++ *
++ *  Copyright (C) 2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bharrosh@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <scsi/scsi_device.h>
++#include <scsi/osd_attributes.h>
++#include <scsi/osd_initiator.h>
++#include <scsi/osd_sec.h>
++#include <scsi/osd_sense.h>
++
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++#define _LLU(x) ((unsigned long long)x)
++
++enum { BIO_MAX_PAGES_KMALLOC =
++		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
++};
++
++/* A per mountpoint struct currently for device cache */
++struct objio_mount_type {
++	struct list_head dev_list;
++	spinlock_t dev_list_lock;
++};
++
++struct _dev_ent {
++	struct list_head list;
++	struct pnfs_deviceid d_id;
++	struct osd_dev *od;
++};
++
++static void _dev_list_remove_all(struct objio_mount_type *omt)
++{
++	spin_lock(&omt->dev_list_lock);
++
++	while (!list_empty(&omt->dev_list)) {
++		struct _dev_ent *de = list_entry(omt->dev_list.next,
++				 struct _dev_ent, list);
++
++		list_del_init(&de->list);
++		osduld_put_device(de->od);
++		kfree(de);
++	}
++
++	spin_unlock(&omt->dev_list_lock);
++}
++
++static struct osd_dev *___dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct list_head *le;
++
++	list_for_each(le, &omt->dev_list) {
++		struct _dev_ent *de = list_entry(le, struct _dev_ent, list);
++
++		if (0 == memcmp(&de->d_id, d_id, sizeof(*d_id)))
++			return de->od;
++	}
++
++	return NULL;
++}
++
++static struct osd_dev *_dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct osd_dev *od;
++
++	spin_lock(&omt->dev_list_lock);
++	od = ___dev_list_find(omt, d_id);
++	spin_unlock(&omt->dev_list_lock);
++	return od;
++}
++
++static int _dev_list_add(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id, struct osd_dev *od)
++{
++	struct _dev_ent *de = kzalloc(sizeof(*de), GFP_KERNEL);
++
++	if (!de)
++		return -ENOMEM;
++
++	spin_lock(&omt->dev_list_lock);
++
++	if (___dev_list_find(omt, d_id)) {
++		kfree(de);
++		goto out;
++	}
++
++	de->d_id = *d_id;
++	de->od = od;
++	list_add(&de->list, &omt->dev_list);
++
++out:
++	spin_unlock(&omt->dev_list_lock);
++	return 0;
++}
++
++struct objio_segment {
++	struct pnfs_osd_layout *layout;
++
++	unsigned mirrors_p1;
++	unsigned stripe_unit;
++	unsigned group_width;	/* Data stripe_units without integrity comps */
++	u64 group_depth;
++	unsigned group_count;
++
++	unsigned num_comps;
++	/* variable length */
++	struct osd_dev	*ods[1];
++};
++
++struct objio_state;
++typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
++
++struct objio_state {
++	/* Generic layer */
++	struct objlayout_io_state ol_state;
++
++	struct objio_segment *objio_seg;
++
++	struct kref kref;
++	objio_done_fn done;
++	void *private;
++
++	unsigned long length;
++	unsigned numdevs; /* Actually used devs in this IO */
++	/* A per-device variable array of size numdevs */
++	struct _objio_per_comp {
++		struct bio *bio;
++		struct osd_request *or;
++		unsigned long length;
++		u64 offset;
++		unsigned dev;
++	} per_dev[];
++};
++
++/* Send and wait for a get_device_info of devices in the layout,
++   then look them up with the osd_initiator library */
++static struct osd_dev *_device_lookup(struct pnfs_layout_type *pnfslay,
++			       struct objio_segment *objio_seg, unsigned comp)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	struct pnfs_osd_deviceaddr *deviceaddr;
++	struct pnfs_deviceid *d_id;
++	struct osd_dev *od;
++	struct osd_dev_info odi;
++	struct objio_mount_type *omt = PNFS_NFS_SERVER(pnfslay)->pnfs_ld_data;
++	int err;
++
++	d_id = &layout->olo_comps[comp].oc_object_id.oid_device_id;
++
++	od = _dev_list_find(omt, d_id);
++	if (od)
++		return od;
++
++	err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr);
++	if (unlikely(err)) {
++		dprintk("%s: objlayout_get_deviceinfo=>%d\n", __func__, err);
++		return ERR_PTR(err);
++	}
++
++	odi.systemid_len = deviceaddr->oda_systemid.len;
++	if (odi.systemid_len > sizeof(odi.systemid)) {
++		err = -EINVAL;
++		goto out;
++	} else if (odi.systemid_len)
++		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
++		       odi.systemid_len);
++	odi.osdname_len	 = deviceaddr->oda_osdname.len;
++	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
++
++	if (!odi.osdname_len && !odi.systemid_len) {
++		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
++			__func__);
++		err = -ENODEV;
++		goto out;
++	}
++
++	od = osduld_info_lookup(&odi);
++	if (unlikely(IS_ERR(od))) {
++		err = PTR_ERR(od);
++		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
++		goto out;
++	}
++
++	_dev_list_add(omt, d_id, od);
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	objlayout_put_deviceinfo(deviceaddr);
++	return err ? ERR_PTR(err) : od;
++}
++
++static int objio_devices_lookup(struct pnfs_layout_type *pnfslay,
++	struct objio_segment *objio_seg)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	unsigned i, num_comps = layout->olo_num_comps;
++	int err;
++
++	/* lookup all devices */
++	for (i = 0; i < num_comps; i++) {
++		struct osd_dev *od;
++
++		od = _device_lookup(pnfslay, objio_seg, i);
++		if (unlikely(IS_ERR(od))) {
++			err = PTR_ERR(od);
++			goto out;
++		}
++		objio_seg->ods[i] = od;
++	}
++	objio_seg->num_comps = num_comps;
++	err = 0;
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	return err;
++}
++
++static int _verify_data_map(struct pnfs_osd_layout *layout)
++{
++	struct pnfs_osd_data_map *data_map = &layout->olo_map;
++	u64 stripe_length;
++	u32 group_width;
++
++/* FIXME: Only raid0 for now. if not go through MDS */
++	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
++		printk(KERN_ERR "Only RAID_0 for now\n");
++		return -ENOTSUPP;
++	}
++	if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
++		printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
++			  data_map->odm_num_comps, data_map->odm_mirror_cnt);
++		return -EINVAL;
++	}
++
++	if (data_map->odm_group_width)
++		group_width = data_map->odm_group_width;
++	else
++		group_width = data_map->odm_num_comps /
++						(data_map->odm_mirror_cnt + 1);
++
++	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
++	if (stripe_length >= (1ULL << 32)) {
++		printk(KERN_ERR "Total Stripe length(0x%llx)"
++			  " >= 32bit is not supported\n", _LLU(stripe_length));
++		return -ENOTSUPP;
++	}
++
++	if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
++		printk(KERN_ERR "Stripe Unit(0x%llx)"
++			  " must be Multples of PAGE_SIZE(0x%lx)\n",
++			  _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
++		return -ENOTSUPP;
++	}
++
++	return 0;
++}
++
++int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	struct objio_segment *objio_seg;
++	int err;
++
++	err = _verify_data_map(layout);
++	if (unlikely(err))
++		return err;
++
++	objio_seg = kzalloc(sizeof(*objio_seg) +
++			(layout->olo_num_comps - 1) * sizeof(objio_seg->ods[0]),
++			GFP_KERNEL);
++	if (!objio_seg)
++		return -ENOMEM;
++
++	objio_seg->layout = layout;
++	err = objio_devices_lookup(pnfslay, objio_seg);
++	if (err)
++		goto free_seg;
++
++	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
++	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
++	if (layout->olo_map.odm_group_width) {
++		objio_seg->group_width = layout->olo_map.odm_group_width;
++		objio_seg->group_depth = layout->olo_map.odm_group_depth;
++		objio_seg->group_count = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1 /
++						objio_seg->group_width;
++	} else {
++		objio_seg->group_width = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1;
++		objio_seg->group_depth = -1;
++		objio_seg->group_count = 1;
++	}
++
++	*outp = objio_seg;
++	return 0;
++
++free_seg:
++	dprintk("%s: Error: return %d\n", __func__, err);
++	kfree(objio_seg);
++	*outp = NULL;
++	return err;
++}
++
++void objio_free_lseg(void *p)
++{
++	struct objio_segment *objio_seg = p;
++
++	kfree(objio_seg);
++}
++
++int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct objio_segment *objio_seg = seg;
++	struct objio_state *ios;
++	const unsigned first_size = sizeof(*ios) +
++				objio_seg->num_comps * sizeof(ios->per_dev[0]);
++	const unsigned sec_size = objio_seg->num_comps *
++						sizeof(ios->ol_state.ioerrs[0]);
++
++	dprintk("%s: num_comps=%d\n", __func__, objio_seg->num_comps);
++	ios = kzalloc(first_size + sec_size, GFP_KERNEL);
++	if (unlikely(!ios))
++		return -ENOMEM;
++
++	ios->objio_seg = objio_seg;
++	ios->ol_state.ioerrs = ((void *)ios) + first_size;
++	ios->ol_state.num_comps = objio_seg->num_comps;
++
++	*outp = &ios->ol_state;
++	return 0;
++}
++
++void objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++
++	kfree(ios);
++}
++
++enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
++{
++	switch (oep) {
++	case OSD_ERR_PRI_NO_ERROR:
++		return (enum pnfs_osd_errno)0;
++
++	case OSD_ERR_PRI_CLEAR_PAGES:
++		BUG_ON(1);
++		return 0;
++
++	case OSD_ERR_PRI_RESOURCE:
++		return PNFS_OSD_ERR_RESOURCE;
++	case OSD_ERR_PRI_BAD_CRED:
++		return PNFS_OSD_ERR_BAD_CRED;
++	case OSD_ERR_PRI_NO_ACCESS:
++		return PNFS_OSD_ERR_NO_ACCESS;
++	case OSD_ERR_PRI_UNREACHABLE:
++		return PNFS_OSD_ERR_UNREACHABLE;
++	case OSD_ERR_PRI_NOT_FOUND:
++		return PNFS_OSD_ERR_NOT_FOUND;
++	case OSD_ERR_PRI_NO_SPACE:
++		return PNFS_OSD_ERR_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case OSD_ERR_PRI_EIO:
++		return PNFS_OSD_ERR_EIO;
++	}
++}
++
++static void _clear_bio(struct bio *bio)
++{
++	struct bio_vec *bv;
++	unsigned i;
++
++	__bio_for_each_segment(bv, bio, i, 0) {
++		unsigned this_count = bv->bv_len;
++
++		if (likely(PAGE_SIZE == this_count))
++			clear_highpage(bv->bv_page);
++		else
++			zero_user(bv->bv_page, bv->bv_offset, this_count);
++	}
++}
++
++static int _io_check(struct objio_state *ios, bool is_write)
++{
++	enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
++	int lin_ret = 0;
++	int i;
++
++	for (i = 0; i <  ios->numdevs; i++) {
++		struct osd_sense_info osi;
++		struct osd_request *or = ios->per_dev[i].or;
++		int ret;
++
++		if (!or)
++			continue;
++
++		ret = osd_req_decode_sense(or, &osi);
++		if (likely(!ret))
++			continue;
++
++		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
++			/* start read offset passed endof file */
++			BUG_ON(is_write);
++			_clear_bio(ios->per_dev[i].bio);
++			dprintk("%s: start read offset passed end of file "
++				"offset=0x%llx, length=0x%lx\n", __func__,
++				_LLU(ios->per_dev[i].offset),
++				ios->per_dev[i].length);
++
++			continue; /* we recovered */
++		}
++		objlayout_io_set_result(&ios->ol_state, ios->per_dev[i].dev,
++					osd_pri_2_pnfs_err(osi.osd_err_pri),
++					ios->per_dev[i].offset,
++					ios->per_dev[i].length,
++					is_write);
++
++		if (osi.osd_err_pri >= oep) {
++			oep = osi.osd_err_pri;
++			lin_ret = ret;
++		}
++	}
++
++	return lin_ret;
++}
++
++/*
++ * Common IO state helpers.
++ */
++static void _io_free(struct objio_state *ios)
++{
++	unsigned i;
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[i];
++
++		if (per_dev->or) {
++			osd_end_request(per_dev->or);
++			per_dev->or = NULL;
++		}
++
++		if (per_dev->bio) {
++			bio_put(per_dev->bio);
++			per_dev->bio = NULL;
++		}
++	}
++}
++
++struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
++{
++	unsigned min_dev = ios->objio_seg->layout->olo_comps_index;
++	unsigned max_dev = min_dev + ios->ol_state.num_comps;
++
++	BUG_ON(dev < min_dev || max_dev <= dev);
++	return ios->objio_seg->ods[dev - min_dev];
++}
++
++struct _striping_info {
++	u64 obj_offset;
++	u64 group_length;
++	u64 total_group_length;
++	u64 Major;
++	unsigned dev;
++	unsigned unit_off;
++};
++
++static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
++			      struct _striping_info *si)
++{
++	u32	stripe_unit = ios->objio_seg->stripe_unit;
++	u32	group_width = ios->objio_seg->group_width;
++	u64	group_depth = ios->objio_seg->group_depth;
++	u32	U = stripe_unit * group_width;
++
++	u64	T = U * group_depth;
++	u64	S = T * ios->objio_seg->group_count;
++	u64	M = div64_u64(file_offset, S);
++
++	/*
++	G = (L - (M * S)) / T
++	H = (L - (M * S)) % T
++	*/
++	u64	LmodU = file_offset - M * S;
++	u32	G = div64_u64(LmodU, T);
++	u64	H = LmodU - G * T;
++
++	u32	N = div_u64(H, U);
++
++	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
++	si->obj_offset = si->unit_off + (N * stripe_unit) +
++				  (M * group_depth * stripe_unit);
++
++	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
++	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
++	si->dev *= ios->objio_seg->mirrors_p1;
++
++	si->group_length = T - H;
++	si->total_group_length = T;
++	si->Major = M;
++}
++
++static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
++		unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len)
++{
++	unsigned pg = *cur_pg;
++	struct request_queue *q =
++			osd_request_queue(_io_od(ios, per_dev->dev));
++
++	per_dev->length += cur_len;
++
++	if (per_dev->bio == NULL) {
++		unsigned stripes = ios->ol_state.num_comps /
++						     ios->objio_seg->mirrors_p1;
++		unsigned pages_in_stripe = stripes *
++				      (ios->objio_seg->stripe_unit / PAGE_SIZE);
++		unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
++				    stripes;
++
++		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
++		if (unlikely(!per_dev->bio)) {
++			dprintk("Faild to allocate BIO size=%u\n", bio_size);
++			return -ENOMEM;
++		}
++	}
++
++	while (cur_len > 0) {
++		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
++		unsigned added_len;
++
++		BUG_ON(ios->ol_state.nr_pages <= pg);
++		cur_len -= pglen;
++
++		added_len = bio_add_pc_page(q, per_dev->bio,
++					ios->ol_state.pages[pg], pglen, pgbase);
++		if (unlikely(pglen != added_len))
++			return -ENOMEM;
++		pgbase = 0;
++		++pg;
++	}
++	BUG_ON(cur_len);
++
++	*cur_pg = pg;
++	return 0;
++}
++
++static int _prepare_one_group(struct objio_state *ios, u64 length,
++			      struct _striping_info *si, unsigned first_comp,
++			      unsigned *last_pg)
++{
++	unsigned stripe_unit = ios->objio_seg->stripe_unit;
++	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
++	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
++	unsigned dev = si->dev;
++	unsigned first_dev = dev - (dev % devs_in_group);
++	unsigned comp = first_comp + (dev - first_dev);
++	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
++	unsigned cur_pg = *last_pg;
++	int ret = 0;
++
++	while (length) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[comp];
++		unsigned cur_len, page_off = 0;
++
++		if (!per_dev->length) {
++			per_dev->dev = dev;
++			if (dev < si->dev) {
++				per_dev->offset = si->obj_offset + stripe_unit -
++								   si->unit_off;
++				cur_len = stripe_unit;
++			} else if (dev == si->dev) {
++				per_dev->offset = si->obj_offset;
++				cur_len = stripe_unit - si->unit_off;
++				page_off = si->unit_off & ~PAGE_MASK;
++				BUG_ON(page_off &&
++				      (page_off != ios->ol_state.pgbase));
++			} else { /* dev > si->dev */
++				per_dev->offset = si->obj_offset - si->unit_off;
++				cur_len = stripe_unit;
++			}
++
++			if (max_comp < comp)
++				max_comp = comp;
++
++			dev += mirrors_p1;
++			dev = (dev % devs_in_group) + first_dev;
++		} else {
++			cur_len = stripe_unit;
++		}
++		if (cur_len >= length)
++			cur_len = length;
++
++		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
++				       cur_len);
++		if (unlikely(ret))
++			goto out;
++
++		comp += mirrors_p1;
++		comp = (comp % devs_in_group) + first_comp;
++
++		length -= cur_len;
++		ios->length += cur_len;
++	}
++out:
++	ios->numdevs = max_comp + mirrors_p1;
++	*last_pg = cur_pg;
++	return ret;
++}
++
++static int _io_rw_pagelist(struct objio_state *ios)
++{
++	u64 length = ios->ol_state.count;
++	struct _striping_info si;
++	unsigned devs_in_group = ios->objio_seg->group_width *
++				 ios->objio_seg->mirrors_p1;
++	unsigned first_comp = 0;
++	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
++	unsigned last_pg = 0;
++	int ret = 0;
++
++	_calc_stripe_info(ios, ios->ol_state.offset, &si);
++	while (length) {
++		if (length < si.group_length)
++			si.group_length = length;
++
++		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
++					 &last_pg);
++		if (unlikely(ret))
++			goto out;
++
++		length -= si.group_length;
++
++		si.group_length = si.total_group_length;
++		si.unit_off = 0;
++		++si.Major;
++		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
++						ios->objio_seg->group_depth;
++
++		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
++		si.dev %= num_comps;
++
++		first_comp += devs_in_group;
++		first_comp %= num_comps;
++	}
++
++out:
++	if (!ios->length)
++		return ret;
++
++	return 0;
++}
++
++static ssize_t _sync_done(struct objio_state *ios)
++{
++	struct completion *waiting = ios->private;
++
++	complete(waiting);
++	return 0;
++}
++
++static void _last_io(struct kref *kref)
++{
++	struct objio_state *ios = container_of(kref, struct objio_state, kref);
++
++	ios->done(ios);
++}
++
++static void _done_io(struct osd_request *or, void *p)
++{
++	struct objio_state *ios = p;
++
++	kref_put(&ios->kref, _last_io);
++}
++
++static ssize_t _io_exec(struct objio_state *ios)
++{
++	DECLARE_COMPLETION_ONSTACK(wait);
++	ssize_t status = 0; /* sync status */
++	unsigned i;
++	objio_done_fn saved_done_fn = ios->done;
++	bool sync = ios->ol_state.sync;
++
++	if (sync) {
++		ios->done = _sync_done;
++		ios->private = &wait;
++	}
++
++	kref_init(&ios->kref);
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct osd_request *or = ios->per_dev[i].or;
++
++		if (!or)
++			continue;
++
++		kref_get(&ios->kref);
++		osd_execute_request_async(or, _done_io, ios);
++	}
++
++	kref_put(&ios->kref, _last_io);
++
++	if (sync) {
++		wait_for_completion(&wait);
++		status = saved_done_fn(ios);
++	}
++
++	return status;
++}
++
++/*
++ * read
++ */
++static ssize_t _read_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, false);
++
++	_io_free(ios);
++
++	if (likely(!ret))
++		status = ios->length;
++	else
++		status = ret;
++
++	objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct osd_request *or = NULL;
++	struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++	unsigned dev = per_dev->dev;
++	struct pnfs_osd_object_cred *cred =
++			&ios->objio_seg->layout->olo_comps[dev];
++	struct osd_obj_id obj = {
++		.partition = cred->oc_object_id.oid_partition_id,
++		.id = cred->oc_object_id.oid_object_id,
++	};
++	int ret;
++
++	or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++	if (unlikely(!or)) {
++		ret = -ENOMEM;
++		goto err;
++	}
++	per_dev->or = or;
++
++	osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
++
++	ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++	if (ret) {
++		dprintk("%s: Faild to osd_finalize_request() => %d\n",
++			__func__, ret);
++		goto err;
++	}
++
++	dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++		__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++		per_dev->length);
++
++err:
++	return ret;
++}
++
++static ssize_t _read_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _read_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _read_done;
++	return _io_exec(ios); /* In sync mode exec returns the io status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _read_exec(ios);
++}
++
++/*
++ * write
++ */
++static ssize_t _write_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, true);
++
++	_io_free(ios);
++
++	if (likely(!ret)) {
++		/* FIXME: should be based on the OSD's persistence model
++		 * See OSD2r05 Section 4.13 Data persistence model */
++		ios->ol_state.committed = NFS_UNSTABLE; //NFS_FILE_SYNC;
++		status = ios->length;
++	} else {
++		status = ret;
++	}
++
++	objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
++	unsigned dev = ios->per_dev[cur_comp].dev;
++	unsigned last_comp = cur_comp + ios->objio_seg->mirrors_p1;
++	int ret;
++
++	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
++		struct osd_request *or = NULL;
++		struct pnfs_osd_object_cred *cred =
++					&ios->objio_seg->layout->olo_comps[dev];
++		struct osd_obj_id obj = {
++			.partition = cred->oc_object_id.oid_partition_id,
++			.id = cred->oc_object_id.oid_object_id,
++		};
++		struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++		struct bio *bio;
++
++		or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++		if (unlikely(!or)) {
++			ret = -ENOMEM;
++			goto err;
++		}
++		per_dev->or = or;
++
++		if (per_dev != master_dev) {
++			bio = bio_kmalloc(GFP_KERNEL,
++					  master_dev->bio->bi_max_vecs);
++			if (unlikely(!bio)) {
++				dprintk("Faild to allocate BIO size=%u\n",
++					master_dev->bio->bi_max_vecs);
++				ret = -ENOMEM;
++				goto err;
++			}
++
++			__bio_clone(bio, master_dev->bio);
++			bio->bi_bdev = NULL;
++			bio->bi_next = NULL;
++			per_dev->bio = bio;
++			per_dev->dev = dev;
++			per_dev->length = master_dev->length;
++			per_dev->offset =  master_dev->offset;
++		} else {
++			bio = master_dev->bio;
++			/* FIXME: bio_set_dir() */
++			bio->bi_rw |= (1 << BIO_RW);
++		}
++
++		osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
++
++		ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++		if (ret) {
++			dprintk("%s: Faild to osd_finalize_request() => %d\n",
++				__func__, ret);
++			goto err;
++		}
++
++		dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++			__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++			per_dev->length);
++	}
++
++err:
++	return ret;
++}
++
++static ssize_t _write_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _write_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _write_done;
++	return _io_exec(ios); /* In sync mode exec returns the io->status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	/* TODO: ios->stable = stable; */
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _write_exec(ios);
++}
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++objlayout_get_stripesize(struct pnfs_layout_type *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zx\n", __func__, maxsz);
++	return maxsz;
++}
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++objlayout_get_blocksize(void)
++{
++	ssize_t sz = BIO_MAX_PAGES_KMALLOC * PAGE_SIZE;
++
++	return sz;
++}
++
++static struct layoutdriver_policy_operations objlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = objlayout_get_stripesize,
++	.get_blocksize         = objlayout_get_blocksize,
++};
++
++static struct pnfs_layoutdriver_type objlayout_type = {
++	.id = LAYOUT_OSD2_OBJECTS,
++	.name = "LAYOUT_OSD2_OBJECTS",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &objlayout_policy_operations,
++};
++
++void *objio_init_mt(void)
++{
++	struct objio_mount_type *omt = kzalloc(sizeof(*omt), GFP_KERNEL);
++
++	if (!omt)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&omt->dev_list);
++	spin_lock_init(&omt->dev_list_lock);
++	return omt;
++}
++
++void objio_fini_mt(void *mountid)
++{
++	_dev_list_remove_all(mountid);
++	kfree(mountid);
++}
++
++MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++objlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++objlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(objlayout_init);
++module_exit(objlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
+@@ -0,0 +1,790 @@
++/*
++ *  objlayout.c
++ *
++ *  pNFS layout driver for Panasas OSDs
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <scsi/osd_initiator.h>
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct pnfs_client_operations *pnfs_client_ops;
++
++/*
++ * Create a objlayout layout structure for the given inode and return it.
++ */
++static struct pnfs_layout_type *
++objlayout_alloc_layout(struct inode *inode)
++{
++	struct objlayout *objlay;
++
++	objlay = kzalloc(sizeof(struct objlayout), GFP_KERNEL);
++	if (objlay) {
++		spin_lock_init(&objlay->lock);
++		INIT_LIST_HEAD(&objlay->err_list);
++	}
++	dprintk("%s: Return %p\n", __func__, objlay);
++	return &objlay->pnfs_layout;
++}
++
++/*
++ * Free an objlayout layout structure
++ */
++static void
++objlayout_free_layout(struct pnfs_layout_type *lo)
++{
++	struct objlayout *objlay = OBJLAYOUT(lo);
++
++	dprintk("%s: objlay %p\n", __func__, objlay);
++
++	WARN_ON(!list_empty(&objlay->err_list));
++	kfree(objlay);
++}
++
++/*
++ * Unmarshall layout and store it in pnfslay.
++ */
++static struct pnfs_layout_segment *
++objlayout_alloc_lseg(struct pnfs_layout_type *pnfslay,
++		     struct nfs4_pnfs_layoutget_res *lgr)
++{
++	int status;
++	void *layout = lgr->layout.buf;
++	struct pnfs_layout_segment *lseg;
++	struct objlayout_segment *objlseg;
++	struct pnfs_osd_layout *pnfs_osd_layout;
++
++	dprintk("%s: Begin pnfslay %p layout %p\n", __func__, pnfslay, layout);
++
++	BUG_ON(!layout);
++
++	status = -ENOMEM;
++	lseg = kzalloc(sizeof(*lseg) + sizeof(*objlseg) +
++		       pnfs_osd_layout_incore_sz(layout), GFP_KERNEL);
++	if (!lseg)
++		goto err;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	pnfs_osd_layout = (struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++	pnfs_osd_xdr_decode_layout(pnfs_osd_layout, layout);
++
++	status = objio_alloc_lseg(&objlseg->internal, pnfslay, lseg,
++				  pnfs_osd_layout);
++	if (status)
++		goto err;
++
++	dprintk("%s: Return %p\n", __func__, lseg);
++	return lseg;
++
++ err:
++	kfree(lseg);
++	return ERR_PTR(status);
++}
++
++/*
++ * Free a layout segement
++ */
++static void
++objlayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	struct objlayout_segment *objlseg;
++
++	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
++
++	if (unlikely(!lseg))
++		return;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	objio_free_lseg(objlseg->internal);
++	kfree(lseg);
++}
++
++/*
++ * I/O Operations
++ */
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
++static struct objlayout_io_state *
++objlayout_alloc_io_state(struct pnfs_layout_type *pnfs_layout_type,
++			struct page **pages,
++			unsigned pgbase,
++			unsigned nr_pages,
++			loff_t offset,
++			size_t count,
++			struct pnfs_layout_segment *lseg,
++			void *rpcdata)
++{
++	struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++	struct objlayout_io_state *state;
++	u64 lseg_end_offset;
++	size_t size_nr_pages;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	if (objio_alloc_io_state(objlseg->internal, &state))
++		return NULL;
++
++	BUG_ON(offset < lseg->range.offset);
++	lseg_end_offset = end_offset(lseg->range.offset, lseg->range.length);
++	BUG_ON(offset >= lseg_end_offset);
++	if (offset + count > lseg_end_offset) {
++		count = lseg->range.length - (offset - lseg->range.offset);
++		dprintk("%s: truncated count %Zd\n", __func__, count);
++	}
++
++	if (pgbase > PAGE_SIZE) {
++		unsigned n = pgbase >> PAGE_SHIFT;
++
++		pgbase &= ~PAGE_MASK;
++		pages += n;
++		nr_pages -= n;
++	}
++
++	size_nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	BUG_ON(nr_pages < size_nr_pages);
++	if (nr_pages > size_nr_pages)
++		nr_pages = size_nr_pages;
++
++	INIT_LIST_HEAD(&state->err_list);
++	state->lseg = lseg;
++	state->rpcdata = rpcdata;
++	state->pages = pages;
++	state->pgbase = pgbase;
++	state->nr_pages = nr_pages;
++	state->offset = offset;
++	state->count = count;
++	state->sync = 0;
++
++	return state;
++}
++
++static void
++objlayout_free_io_state(struct objlayout_io_state *state)
++{
++	dprintk("%s: freeing io_state\n", __func__);
++	if (unlikely(!state))
++		return;
++
++	objio_free_io_state(state);
++}
++
++/*
++ * I/O done common code
++ */
++static void
++objlayout_iodone(struct objlayout_io_state *state)
++{
++	dprintk("%s: state %p status\n", __func__, state);
++
++	if (likely(state->status >= 0)) {
++		objlayout_free_io_state(state);
++	} else {
++		struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++		spin_lock(&objlay->lock);
++		objlay->delta_space_valid = OBJ_DSU_INVALID;
++		list_add(&objlay->err_list, &state->err_list);
++		spin_unlock(&objlay->lock);
++	}
++}
++
++/*
++ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
++ *
++ * The @index component IO failed (error returned from target). Register
++ * the error for later reporting at layout-return.
++ */
++void
++objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
++			int osd_error, u64 offset, u64 length, bool is_write)
++{
++	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
++
++	BUG_ON(index >= state->num_comps);
++	if (osd_error) {
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(state->lseg);
++		struct pnfs_osd_layout *layout =
++				(typeof(layout))objlseg->pnfs_osd_layout;
++
++		ioerr->oer_component = layout->olo_comps[index].oc_object_id;
++		ioerr->oer_comp_offset = offset;
++		ioerr->oer_comp_length = length;
++		ioerr->oer_iswrite = is_write;
++		ioerr->oer_errno = osd_error;
++
++		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
++			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
++			__func__, index, ioerr->oer_errno,
++			ioerr->oer_iswrite,
++			_DEVID_LO(&ioerr->oer_component.oid_device_id),
++			_DEVID_HI(&ioerr->oer_component.oid_device_id),
++			ioerr->oer_component.oid_partition_id,
++			ioerr->oer_component.oid_object_id,
++			ioerr->oer_comp_offset,
++			ioerr->oer_comp_length);
++	} else {
++		/* User need not call if no error is reported */
++		ioerr->oer_errno = 0;
++	}
++}
++
++static void _rpc_commit_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_commit_complete(wdata);
++}
++
++/*
++ * Commit data remotely on OSDs
++ */
++enum pnfs_try_status
++objlayout_commit(struct nfs_write_data *wdata, int how)
++{
++	int status = PNFS_ATTEMPTED;
++
++	INIT_WORK(&wdata->task.u.tk_work, _rpc_commit_complete);
++	schedule_work(&wdata->task.u.tk_work);
++	dprintk("%s: Return %d\n", __func__, status);
++	return status;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_read_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++
++	pnfs_client_ops->nfs_readlist_complete(rdata);
++}
++
++void
++objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
++{
++	int eof = state->eof;
++	struct nfs_read_data *rdata;
++
++	state->status = status;
++	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
++	rdata = state->rpcdata;
++	rdata->task.tk_status = status;
++	if (status >= 0) {
++		rdata->res.count = status;
++		rdata->res.eof = eof;
++	}
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_readlist_complete(rdata);
++	else {
++		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
++		schedule_work(&rdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async reads.
++ */
++enum pnfs_try_status
++objlayout_read_pagelist(struct nfs_read_data *rdata, unsigned nr_pages)
++{
++	loff_t offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct objlayout_io_state *state;
++	ssize_t status = 0;
++	loff_t eof;
++
++	dprintk("%s: Begin inode %p offset %llu count %d\n",
++		__func__, rdata->inode, offset, (int)count);
++
++	eof = i_size_read(rdata->inode);
++	if (unlikely(offset + count > eof)) {
++		if (offset >= eof) {
++			status = 0;
++			rdata->res.count = 0;
++			rdata->res.eof = 1;
++			goto out;
++		}
++		count = eof - offset;
++	}
++
++	state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
++					 rdata->args.pages, rdata->args.pgbase,
++					 nr_pages, offset, count,
++					 rdata->pdata.lseg, rdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->eof = state->offset + state->count >= eof;
++
++	status = objio_read_pagelist(state);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	rdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_write_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_writelist_complete(wdata);
++}
++
++void
++objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
++		     bool sync)
++{
++	struct nfs_write_data *wdata;
++
++	dprintk("%s: Begin\n", __func__);
++	wdata = state->rpcdata;
++	state->status = status;
++	wdata->task.tk_status = status;
++	if (status >= 0) {
++		wdata->res.count = status;
++		wdata->verf.committed = state->committed;
++		dprintk("%s: Return status %d committed %d\n",
++			__func__, wdata->task.tk_status,
++			wdata->verf.committed);
++	} else
++		dprintk("%s: Return status %d\n",
++			__func__, wdata->task.tk_status);
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_writelist_complete(wdata);
++	else {
++		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
++		schedule_work(&wdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async writes.
++ */
++enum pnfs_try_status
++objlayout_write_pagelist(struct nfs_write_data *wdata,
++			 unsigned nr_pages,
++			 int how)
++{
++	struct objlayout_io_state *state;
++	ssize_t status;
++
++	dprintk("%s: Begin inode %p offset %llu count %u\n",
++		__func__, wdata->inode, wdata->args.offset, wdata->args.count);
++
++	state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
++					 wdata->args.pages,
++					 wdata->args.pgbase,
++					 nr_pages,
++					 wdata->args.offset,
++					 wdata->args.count,
++					 wdata->pdata.lseg, wdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->sync = how & FLUSH_SYNC;
++
++	status = objio_write_pagelist(state, how & FLUSH_STABLE);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	wdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++void
++objlayout_encode_layoutcommit(struct pnfs_layout_type *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct pnfs_layoutcommit_arg *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct pnfs_osd_layoutupdate lou;
++	__be32 *start;
++
++	dprintk("%s: Begin\n", __func__);
++
++	spin_lock(&objlay->lock);
++	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
++	lou.dsu_delta = objlay->delta_space_used;
++	objlay->delta_space_used = 0;
++	objlay->delta_space_valid = OBJ_DSU_INIT;
++	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
++	spin_unlock(&objlay->lock);
++
++	start = xdr_reserve_space(xdr, 4);
++
++	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++
++	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
++		lou.dsu_delta, lou.olu_ioerr_flag);
++}
++
++static int
++err_prio(u32 oer_errno)
++{
++	switch (oer_errno) {
++	case 0:
++		return 0;
++
++	case PNFS_OSD_ERR_RESOURCE:
++		return OSD_ERR_PRI_RESOURCE;
++	case PNFS_OSD_ERR_BAD_CRED:
++		return OSD_ERR_PRI_BAD_CRED;
++	case PNFS_OSD_ERR_NO_ACCESS:
++		return OSD_ERR_PRI_NO_ACCESS;
++	case PNFS_OSD_ERR_UNREACHABLE:
++		return OSD_ERR_PRI_UNREACHABLE;
++	case PNFS_OSD_ERR_NOT_FOUND:
++		return OSD_ERR_PRI_NOT_FOUND;
++	case PNFS_OSD_ERR_NO_SPACE:
++		return OSD_ERR_PRI_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case PNFS_OSD_ERR_EIO:
++		return OSD_ERR_PRI_EIO;
++	}
++}
++
++static void
++merge_ioerr(struct pnfs_osd_ioerr *dest_err,
++	    const struct pnfs_osd_ioerr *src_err)
++{
++	u64 dest_end, src_end;
++
++	if (!dest_err->oer_errno) {
++		*dest_err = *src_err;
++		/* accumulated device must be blank */
++		memset(&dest_err->oer_component.oid_device_id, 0,
++			sizeof(dest_err->oer_component.oid_device_id));
++
++		return;
++	}
++
++	if (dest_err->oer_component.oid_partition_id !=
++				src_err->oer_component.oid_partition_id)
++		dest_err->oer_component.oid_partition_id = 0;
++
++	if (dest_err->oer_component.oid_object_id !=
++				src_err->oer_component.oid_object_id)
++		dest_err->oer_component.oid_object_id = 0;
++
++	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
++		dest_err->oer_comp_offset = src_err->oer_comp_offset;
++
++	dest_end = end_offset(dest_err->oer_comp_offset,
++			      dest_err->oer_comp_length);
++	src_end =  end_offset(src_err->oer_comp_offset,
++			      src_err->oer_comp_length);
++	if (dest_end < src_end)
++		dest_end = src_end;
++
++	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
++
++	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
++	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
++			dest_err->oer_errno = src_err->oer_errno;
++	} else if (src_err->oer_iswrite) {
++		dest_err->oer_iswrite = true;
++		dest_err->oer_errno = src_err->oer_errno;
++	}
++}
++
++static void
++encode_accumulated_error(struct objlayout *objlay, struct xdr_stream *xdr)
++{
++	struct objlayout_io_state *state, *tmp;
++	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++
++		for (i = 0; i < state->num_comps; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			merge_ioerr(&accumulated_err, ioerr);
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++
++	BUG_ON(pnfs_osd_xdr_encode_ioerr(xdr, &accumulated_err));
++}
++
++void
++objlayout_encode_layoutreturn(struct pnfs_layout_type *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_pnfs_layoutreturn_arg *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct objlayout_io_state *state, *tmp;
++	__be32 *start, *uninitialized_var(last_xdr);
++
++	dprintk("%s: Begin\n", __func__);
++	start = xdr_reserve_space(xdr, 4);
++	BUG_ON(!start);
++
++	spin_lock(&objlay->lock);
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++		int res = 0;
++
++		for (i = 0; i < state->num_comps && !res; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			dprintk("%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			last_xdr = xdr->p;
++			res = pnfs_osd_xdr_encode_ioerr(xdr, &state->ioerrs[i]);
++		}
++		if (unlikely(res)) {
++			/* no space for even one error descriptor */
++			BUG_ON(last_xdr == start + 1);
++
++			/* we've encountered a situation with lots and lots of
++			 * errors and no space to encode them all. Use the last
++			 * available slot to report the union of all the
++			 * remaining errors.
++			 */
++			xdr_rewind_stream(xdr, last_xdr -
++					       pnfs_osd_ioerr_xdr_sz() / 4);
++			encode_accumulated_error(objlay, xdr);
++			goto loop_done;
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++loop_done:
++	spin_unlock(&objlay->lock);
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++	dprintk("%s: Return\n", __func__);
++}
++
++struct objlayout_deviceinfo {
++	struct page *page;
++	struct pnfs_osd_deviceaddr da; /* This must be last */
++};
++
++/* Initialize and call nfs_getdeviceinfo, then decode and return a
++ * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
++ * should be called.
++ */
++int objlayout_get_deviceinfo(struct pnfs_layout_type *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr)
++{
++	struct objlayout_deviceinfo *odi;
++	struct pnfs_device pd;
++	struct super_block *sb;
++	struct page *page;
++	size_t sz;
++	u32 *p;
++	int err;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	pd.area = page_address(page);
++
++	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
++	pd.layout_type = LAYOUT_OSD2_OBJECTS;
++	pd.dev_notify_types = 0;
++	pd.pages = &page;
++	pd.pgbase = 0;
++	pd.pglen = PAGE_SIZE;
++	pd.mincount = 0;
++
++	sb = PNFS_INODE(pnfslay)->i_sb;
++	err = pnfs_client_ops->nfs_getdeviceinfo(PNFS_NFS_SERVER(pnfslay), &pd);
++	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
++	if (err)
++		goto err_out;
++
++	p = pd.area;
++	sz = pnfs_osd_xdr_deviceaddr_incore_sz(p);
++	odi = kzalloc(sz + (sizeof(*odi) - sizeof(odi->da)), GFP_KERNEL);
++	if (!odi) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
++	odi->page = page;
++	*deviceaddr = &odi->da;
++	return 0;
++
++err_out:
++	__free_page(page);
++	return err;
++}
++
++void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
++{
++	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
++						struct objlayout_deviceinfo,
++						da);
++
++	__free_page(odi->page);
++	kfree(odi);
++}
++
++/*
++ * Initialize a mountpoint by retrieving the list of
++ * available devices for it.
++ * Return the pnfs_mount_type structure so the
++ * pNFS_client can refer to the mount point later on.
++ */
++static int
++objlayout_initialize_mountpoint(struct nfs_server *server,
++				const struct nfs_fh *mntfh)
++{
++	void *data;
++
++	data = objio_init_mt();
++	if (IS_ERR(data)) {
++		printk(KERN_INFO "%s: objlayout lib not ready err=%ld\n",
++		       __func__, PTR_ERR(data));
++		return PTR_ERR(data);
++	}
++	server->pnfs_ld_data = data;
++
++	dprintk("%s: Return data=%p\n", __func__, data);
++	return 0;
++}
++
++/*
++ * Uninitialize a mountpoint
++ */
++static int
++objlayout_uninitialize_mountpoint(struct nfs_server *server)
++{
++	dprintk("%s: Begin %p\n", __func__, server->pnfs_ld_data);
++	objio_fini_mt(server->pnfs_ld_data);
++	return 0;
++}
++
++struct layoutdriver_io_operations objlayout_io_operations = {
++	.commit                  = objlayout_commit,
++	.read_pagelist           = objlayout_read_pagelist,
++	.write_pagelist          = objlayout_write_pagelist,
++	.alloc_layout            = objlayout_alloc_layout,
++	.free_layout             = objlayout_free_layout,
++	.alloc_lseg              = objlayout_alloc_lseg,
++	.free_lseg               = objlayout_free_lseg,
++	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
++	.encode_layoutreturn     = objlayout_encode_layoutreturn,
++	.initialize_mountpoint   = objlayout_initialize_mountpoint,
++	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
++};
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
+@@ -0,0 +1,171 @@
++/*
++ *  objlayout.h
++ *
++ *  Data types and function declerations for interfacing with the
++ *  pNFS standard object layout driver.
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef _OBJLAYOUT_H
++#define _OBJLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * in-core layout segment
++ */
++struct objlayout_segment {
++	void *internal;    /* for provider internal use */
++	u8 pnfs_osd_layout[];
++};
++
++/*
++ * per-inode layout
++ */
++struct objlayout {
++	struct pnfs_layout_type pnfs_layout;
++
++	 /* for layout_commit */
++	enum osd_delta_space_valid_enum {
++		OBJ_DSU_INIT = 0,
++		OBJ_DSU_VALID,
++		OBJ_DSU_INVALID,
++	} delta_space_valid;
++	s64 delta_space_used;  /* consumed by write ops */
++
++	 /* for layout_return */
++	spinlock_t lock;
++	struct list_head err_list;
++};
++
++static inline struct objlayout *
++OBJLAYOUT(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct objlayout, pnfs_layout);
++}
++
++/*
++ * per-I/O operation state
++ * embedded in objects provider io_state data structure
++ */
++struct objlayout_io_state {
++	struct pnfs_layout_segment *lseg;
++
++	struct page **pages;
++	unsigned pgbase;
++	unsigned nr_pages;
++	unsigned long count;
++	loff_t offset;
++	bool sync;
++
++	void *rpcdata;
++	int status;             /* res */
++	int eof;                /* res */
++	int committed;          /* res */
++
++	/* Error reporting (layout_return) */
++	struct list_head err_list;
++	unsigned num_comps;
++	/* Pointer to array of error descriptors of size num_comps.
++	 * It should contain as many entries as devices in the osd_layout
++	 * that participate in the I/O. It is up to the io_engine to allocate
++	 * needed space and set num_comps.
++	 */
++	struct pnfs_osd_ioerr *ioerrs;
++};
++
++/*
++ * Raid engine I/O API
++ */
++extern void *objio_init_mt(void);
++extern void objio_fini_mt(void *mt);
++
++extern int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout);
++extern void objio_free_lseg(void *p);
++
++extern int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp);
++extern void objio_free_io_state(struct objlayout_io_state *state);
++
++extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
++extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
++				    bool stable);
++
++/*
++ * callback API
++ */
++extern void objlayout_io_set_result(struct objlayout_io_state *state,
++				    unsigned index, int osd_error,
++				    u64 offset, u64 length, bool is_write);
++
++static inline void
++objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
++{
++	struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++	/* If one of the I/Os errored out and the delta_space_used was
++	 * invalid we render the complete report as invalid. Protocol mandate
++	 * the DSU be accurate or not reported.
++	 */
++	spin_lock(&objlay->lock);
++	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
++		objlay->delta_space_valid = OBJ_DSU_VALID;
++		objlay->delta_space_used += space_used;
++	}
++	spin_unlock(&objlay->lock);
++}
++
++extern void objlayout_read_done(struct objlayout_io_state *state,
++				ssize_t status, bool sync);
++extern void objlayout_write_done(struct objlayout_io_state *state,
++				 ssize_t status, bool sync);
++
++extern int objlayout_get_deviceinfo(struct pnfs_layout_type *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr);
++extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
++
++/*
++ * exported generic objects function vectors
++ */
++extern struct layoutdriver_io_operations objlayout_io_operations;
++extern struct pnfs_client_operations *pnfs_client_ops;
++
++#endif /* _OBJLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
+@@ -0,0 +1,734 @@
++/*
++ *  panfs_shim.c
++ *
++ *  Shim layer for interfacing with the Panasas DirectFlow module I/O stack
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <asm/byteorder.h>
++
++#include "objlayout.h"
++#include "panfs_shim.h"
++
++#include <linux/panfs_shim_api.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct panfs_export_operations *panfs_export_ops;
++
++void *
++objio_init_mt(void)
++{
++	return panfs_export_ops == NULL ? ERR_PTR(-EAGAIN) : NULL;
++}
++
++void objio_fini_mt(void *mountid)
++{
++}
++
++static int
++panfs_shim_conv_raid01(struct pnfs_osd_layout *layout,
++		       struct pnfs_osd_data_map *lo_map,
++		       pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt) {
++		hdr->type = PAN_AGG_RAID1;
++		hdr->hdr.raid1.num_comps = lo_map->odm_mirror_cnt + 1;
++	} else if (layout->olo_num_comps > 1) {
++		hdr->type = PAN_AGG_RAID0;
++		hdr->hdr.raid0.num_comps = layout->olo_num_comps;
++		hdr->hdr.raid0.stripe_unit = lo_map->odm_stripe_unit;
++	} else
++		hdr->type = PAN_AGG_SIMPLE;
++	return 0;
++}
++
++static int
++panfs_shim_conv_raid5(struct pnfs_osd_layout *layout,
++		      struct pnfs_osd_data_map *lo_map,
++		      pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt)
++		goto err;
++
++	if (lo_map->odm_group_width || lo_map->odm_group_depth) {
++		if (!lo_map->odm_group_width || !lo_map->odm_group_depth)
++			goto err;
++
++		hdr->type = PAN_AGG_GRP_RAID5_LEFT;
++		hdr->hdr.grp_raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.grp_raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.grp_raid5_left.stripe_unit = lo_map->odm_stripe_unit;
++		hdr->hdr.grp_raid5_left.rg_width = lo_map->odm_group_width;
++		hdr->hdr.grp_raid5_left.rg_depth = lo_map->odm_group_depth;
++		/* this is a guess, panasas server is not supposed to
++		   hand out layotu otherwise */
++		hdr->hdr.grp_raid5_left.group_layout_policy =
++			PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN;
++	} else {
++		hdr->type = PAN_AGG_RAID5_LEFT;
++		hdr->hdr.raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.raid5_left.stripe_unit2 =
++		hdr->hdr.raid5_left.stripe_unit1 =
++		hdr->hdr.raid5_left.stripe_unit0 = lo_map->odm_stripe_unit;
++	}
++
++	return 0;
++err:
++	return -EINVAL;
++}
++
++/*
++ * Convert a pnfs_osd data map into Panasas aggregation layout header
++ */
++static int
++panfs_shim_conv_pnfs_osd_data_map(
++	struct pnfs_osd_layout *layout,
++	pan_agg_layout_hdr_t *hdr)
++{
++	int status = -EINVAL;
++	struct pnfs_osd_data_map *lo_map = &layout->olo_map;
++
++	if (!layout->olo_num_comps) {
++		dprintk("%s: !!layout.n_comps(%u)\n", __func__,
++			layout->olo_num_comps);
++		goto err;
++	}
++
++	switch (lo_map->odm_raid_algorithm) {
++	case PNFS_OSD_RAID_0:
++		if (layout->olo_num_comps != lo_map->odm_num_comps ||
++		    layout->olo_comps_index) {
++			dprintk("%s: !!PNFS_OSD_RAID_0 "
++				"layout.n_comps(%u) map.n_comps(%u) "
++				"comps_index(%u)\n", __func__,
++				layout->olo_num_comps,
++				lo_map->odm_num_comps,
++				layout->olo_comps_index);
++			goto err;
++		}
++		status = panfs_shim_conv_raid01(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_5:
++		if (!lo_map->odm_group_width) {
++			if (layout->olo_num_comps != lo_map->odm_num_comps ||
++			    layout->olo_comps_index) {
++				dprintk("%s: !!PNFS_OSD_RAID_5 !group_width "
++					"layout.n_comps(%u)!=map.n_comps(%u) "
++					"|| comps_index(%u)\n", __func__,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		} else if ((layout->olo_num_comps != lo_map->odm_num_comps &&
++			    layout->olo_num_comps > lo_map->odm_group_width) ||
++			   (layout->olo_comps_index % lo_map->odm_group_width)){
++				dprintk("%s: !!PNFS_OSD_RAID_5 group_width(%u) "
++					"layout.n_comps(%u) map.n_comps(%u) "
++					"comps_index(%u)\n", __func__,
++					lo_map->odm_group_width,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		status = panfs_shim_conv_raid5(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_4:
++	case PNFS_OSD_RAID_PQ:
++	default:
++		dprintk("%s: !!PNFS_OSD_RAID_(%d)\n", __func__,
++			lo_map->odm_raid_algorithm);
++		goto err;
++	}
++
++	return 0;
++
++err:
++	return status;
++}
++
++/*
++ * Convert pnfs_osd layout into Panasas map and caps type
++ */
++int
++objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	int i, total_comps;
++	int status;
++	struct pnfs_osd_object_cred *lo_comp;
++	pan_size_t alloc_sz, local_sz;
++	pan_sm_map_cap_t *mcs = NULL;
++	u8 *buf;
++	pan_agg_comp_obj_t *pan_comp;
++	pan_sm_sec_t *pan_sec;
++
++	status = -EINVAL;
++	if (layout->olo_num_comps < layout->olo_map.odm_group_width) {
++		total_comps = layout->olo_comps_index + layout->olo_num_comps;
++	} else {
++		/* allocate full map, otherwise SAM gets confused */
++		total_comps = layout->olo_map.odm_num_comps;
++	}
++	alloc_sz = total_comps *
++		   (sizeof(pan_agg_comp_obj_t) + sizeof(pan_sm_sec_t));
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p = layout->olo_comps[i].oc_cap.cred;
++		if (panfs_export_ops->sm_sec_t_get_size_otw(
++			(pan_sm_sec_otw_t *)&p, &local_sz, NULL, NULL))
++			goto err;
++		alloc_sz += local_sz;
++	}
++
++	status = -ENOMEM;
++	mcs = kzalloc(sizeof(*mcs) + alloc_sz, GFP_KERNEL);
++	if (!mcs)
++		goto err;
++	buf = (u8 *)&mcs[1];
++
++	mcs->offset = lseg->range.offset;
++	mcs->length = lseg->range.length;
++#if 0
++	/* FIXME: for now */
++	mcs->expiration_time.ts_sec  = 0;
++	mcs->expiration_time.ts_nsec = 0;
++#endif
++	mcs->full_map.map_hdr.avail_state = PAN_AGG_OBJ_STATE_NORMAL;
++	status = panfs_shim_conv_pnfs_osd_data_map(layout,
++						   &mcs->full_map.layout_hdr);
++	if (status)
++		goto err;
++
++	mcs->full_map.components.size = total_comps;
++	mcs->full_map.components.data = (pan_agg_comp_obj_t *)buf;
++	buf += total_comps * sizeof(pan_agg_comp_obj_t);
++
++	mcs->secs.size = total_comps;
++	mcs->secs.data = (pan_sm_sec_t *)buf;
++	buf += total_comps * sizeof(pan_sm_sec_t);
++
++	lo_comp = layout->olo_comps;
++	pan_comp = mcs->full_map.components.data + layout->olo_comps_index;
++	pan_sec = mcs->secs.data + layout->olo_comps_index;
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p;
++		pan_stor_obj_id_t *obj_id = &mcs->full_map.map_hdr.obj_id;
++		struct pnfs_osd_objid *oc_obj_id = &lo_comp->oc_object_id;
++		u64 dev_id = __be64_to_cpup(
++			(__be64 *)oc_obj_id->oid_device_id.data + 1);
++
++		dprintk("%s: i=%d deviceid=%Lx:%Lx partition=%Lx object=%Lx\n",
++			__func__, i,
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data),
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data + 1),
++			oc_obj_id->oid_partition_id, oc_obj_id->oid_object_id);
++
++		if (i == 0) {
++			/* make up mgr_id to calm sam down */
++			pan_mgr_id_construct_artificial(PAN_MGR_SM, 0,
++							&obj_id->dev_id);
++			obj_id->grp_id = oc_obj_id->oid_partition_id;
++			obj_id->obj_id = oc_obj_id->oid_object_id;
++		}
++
++		if (obj_id->grp_id != lo_comp->oc_object_id.oid_partition_id) {
++			dprintk("%s: i=%d grp_id=0x%Lx oid_partition_id=0x%Lx\n",
++				__func__, i, (u64)obj_id->grp_id,
++				lo_comp->oc_object_id.oid_partition_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		if (obj_id->obj_id != lo_comp->oc_object_id.oid_object_id) {
++			dprintk("%s: i=%d obj_id=0x%Lx oid_object_id=0x%Lx\n",
++				__func__, i, obj_id->obj_id,
++				lo_comp->oc_object_id.oid_object_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		pan_comp->dev_id = dev_id;
++		if (!pan_stor_is_device_id_an_obsd_id(pan_comp->dev_id)) {
++			dprintk("%s: i=%d dev_id=0x%Lx not an obsd_id\n",
++				__func__, i, obj_id->dev_id);
++			status = -EINVAL;
++			goto err;
++		}
++		if (lo_comp->oc_osd_version == PNFS_OSD_MISSING) {
++			dprintk("%s: degraded maps not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++		pan_comp->avail_state = PAN_AGG_COMP_STATE_NORMAL;
++		if (lo_comp->oc_cap_key_sec != PNFS_OSD_CAP_KEY_SEC_NONE) {
++			dprintk("%s: cap key security not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++
++		p = lo_comp->oc_cap.cred;
++		panfs_export_ops->sm_sec_t_unmarshall(
++			(pan_sm_sec_otw_t *)&p,
++			pan_sec,
++			buf,
++			alloc_sz,
++			NULL,
++			&local_sz);
++		buf += local_sz;
++		alloc_sz -= local_sz;
++
++		lo_comp++;
++		pan_comp++;
++		pan_sec++;
++	}
++
++	*outp = mcs;
++	dprintk("%s:Return mcs=%p\n", __func__, mcs);
++	return 0;
++
++err:
++	objio_free_lseg(mcs);
++	dprintk("%s:Error %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * Free a Panasas map and caps type
++ */
++void
++objio_free_lseg(void *p)
++{
++	kfree(p);
++}
++
++/*
++ * I/O routines
++ */
++int
++objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct panfs_shim_io_state *p;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	*outp = &p->ol_state;
++	return 0;
++}
++
++/*
++ * Free an I/O state
++ */
++void
++objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	int i;
++
++	dprintk("%s: freeing io_state\n", __func__);
++	for (i = 0; i < state->ol_state.nr_pages; i++)
++		kunmap(state->ol_state.pages[i]);
++
++	if (state->ucreds)
++		panfs_export_ops->ucreds_put(state->ucreds);
++	kfree(state->sg_list);
++	kfree(state);
++}
++
++static int
++panfs_shim_pages_to_sg(
++	struct panfs_shim_io_state *state,
++	struct page **pages,
++	unsigned int pgbase,
++	unsigned nr_pages,
++	size_t count)
++{
++	unsigned i, n;
++	pan_sg_entry_t *sg;
++
++	dprintk("%s pgbase %u nr_pages %u count %d "
++		"pg0 %p flags 0x%x index %llu\n",
++		__func__, pgbase, nr_pages, (int)count, pages[0],
++		(unsigned)pages[0]->flags, (unsigned long long)pages[0]->index);
++
++	sg = kmalloc(nr_pages * sizeof(*sg), GFP_KERNEL);
++	if (sg == NULL)
++		return -ENOMEM;
++
++	dprintk("%s sg_list %p pages %p pgbase %u nr_pages %u\n",
++		__func__, sg, pages, pgbase, nr_pages);
++
++	for (i = 0; i < nr_pages; i++) {
++		sg[i].buffer = (char *)kmap(pages[i]) + pgbase;
++		n = PAGE_SIZE - pgbase;
++		pgbase = 0;
++		if (n > count)
++			n = count;
++		sg[i].chunk_size = n;
++		count -= n;
++		if (likely(count)) {
++			sg[i].next = &sg[i+1];
++		} else {
++			/* we're done */
++			sg[i].next = NULL;
++			break;
++		}
++	}
++	BUG_ON(count);
++
++	state->sg_list = sg;
++	return 0;
++}
++
++/*
++ * Callback function for async reads
++ */
++static void
++panfs_shim_read_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_read_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.read.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++		status = res_p->length;
++		WARN_ON(status < 0);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_read rc %d: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_read_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.read.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.read.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_read(PAN_SAM_ACCESS_BYPASS_TIMESTAMP,
++					&state->u.read.args,
++					&state->obj_sec,
++					state->sg_list,
++					state->ucreds,
++					ol_state->sync ?
++						NULL : panfs_shim_read_done,
++					state, NULL,
++					&state->u.read.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_read_done(state, NULL, &state->u.read.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++/*
++ * Callback function for async writes
++ */
++static void
++panfs_shim_write_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_write_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.write.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++/*		state->ol_state.committed = NFS_FILE_SYNC;*/
++		state->ol_state.committed = NFS_UNSTABLE;
++		status = res_p->length;
++		WARN_ON(status < 0);
++
++		objlayout_add_delta_space_used(&state->ol_state,
++					       res_p->delta_capacity_used);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_write rc %u: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_write_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_write_pagelist(struct objlayout_io_state *ol_state,
++		     bool stable /* unused, PanOSD writes are stable */)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.write.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.write.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_write(PAN_SAM_ACCESS_NONE,
++					 &state->u.write.args,
++					 &state->obj_sec,
++					 state->sg_list,
++					 state->ucreds,
++					 ol_state->sync ?
++						NULL : panfs_shim_write_done,
++					 state,
++					 NULL,
++					 &state->u.write.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_write_done(state, NULL, &state->u.write.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++int
++panfs_shim_register(struct panfs_export_operations *ops)
++{
++	if (panfs_export_ops) {
++		printk(KERN_INFO
++		       "%s: panfs already registered (panfs ops %p)\n",
++		       __func__, panfs_export_ops);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: registering panfs ops %p\n",
++	       __func__, ops);
++
++	panfs_export_ops = ops;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_register);
++
++int
++panfs_shim_unregister(void)
++{
++	if (!panfs_export_ops) {
++		printk(KERN_INFO "%s: panfs is not registered\n", __func__);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: unregistering panfs ops %p\n",
++	       __func__, panfs_export_ops);
++
++	panfs_export_ops = NULL;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_unregister);
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++panlayout_get_stripesize(struct pnfs_layout_type *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	dprintk("%s: Begin\n", __func__);
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *panlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)panlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			n *= 8;	/* FIXME: until we have 2-D coalescing */
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zd\n", __func__, maxsz);
++	return maxsz;
++}
++
++#define PANLAYOUT_DEF_STRIPE_UNIT    (64*1024)
++#define PANLAYOUT_DEF_STRIPE_WIDTH   9
++#define PANLAYOUT_MAX_STRIPE_WIDTH   11
++#define PANLAYOUT_MAX_GATHER_STRIPES 8
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++panlayout_get_blocksize(void)
++{
++	ssize_t sz = (PANLAYOUT_MAX_STRIPE_WIDTH-1) *
++		      PANLAYOUT_DEF_STRIPE_UNIT *
++		      PANLAYOUT_MAX_GATHER_STRIPES;
++	dprintk("%s: Return %Zd\n", __func__, sz);
++	return sz;
++}
++
++static struct layoutdriver_policy_operations panlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = panlayout_get_stripesize,
++	.get_blocksize         = panlayout_get_blocksize,
++};
++
++#define PNFS_LAYOUT_PANOSD (NFS4_PNFS_PRIVATE_LAYOUT | LAYOUT_OSD2_OBJECTS)
++
++static struct pnfs_layoutdriver_type panlayout_type = {
++	.id = PNFS_LAYOUT_PANOSD,
++	.name = "PNFS_LAYOUT_PANOSD",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &panlayout_policy_operations,
++};
++
++MODULE_DESCRIPTION("pNFS Layout Driver for Panasas OSDs");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++panlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Registered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++panlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Unregistered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(panlayout_init);
++module_exit(panlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
+@@ -0,0 +1,482 @@
++/*
++ *  panfs_shim.h
++ *
++ *  Data types and external function declerations for interfacing with
++ *  panfs (Panasas DirectFlow) I/O stack
++ *
++ *  Copyright (C) 2007 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#ifndef _PANLAYOUT_PANFS_SHIM_H
++#define _PANLAYOUT_PANFS_SHIM_H
++
++typedef s8 pan_int8_t;
++typedef u8 pan_uint8_t;
++typedef s16 pan_int16_t;
++typedef u16 pan_uint16_t;
++typedef s32 pan_int32_t;
++typedef u32 pan_uint32_t;
++typedef s64 pan_int64_t;
++typedef u64 pan_uint64_t;
++
++/*
++ * from pan_base_types.h
++ */
++typedef  pan_uint64_t pan_rpc_none_t;
++typedef pan_uint32_t  pan_rpc_arrdim_t;
++typedef pan_uint32_t  pan_status_t;
++typedef pan_uint8_t   pan_otw_t;
++typedef pan_uint8_t   pan_pad_t;
++
++typedef pan_uint32_t  pan_timespec_sec_t;
++typedef pan_uint32_t  pan_timespec_nsec_t;
++
++typedef  struct pan_timespec_s  pan_timespec_t;
++struct pan_timespec_s {
++  pan_timespec_sec_t   ts_sec;
++  pan_timespec_nsec_t  ts_nsec;
++};
++
++/*
++ * from pan_std_types.h
++ */
++typedef pan_uint32_t pan_size_t;
++typedef  int  pan_bool_t;
++
++/*
++ * from pan_common_error.h
++ */
++#define PAN_SUCCESS                                         ((pan_status_t)0)
++#define PAN_ERR_IN_PROGRESS                                 ((pan_status_t)55)
++
++/*
++ * from pan_sg.h
++ */
++typedef struct pan_sg_entry_s pan_sg_entry_t;
++struct pan_sg_entry_s {
++  void                  *buffer;       /* pointer to memory */
++  pan_uint32_t           chunk_size;   /* size of each chunk (bytes) */
++  pan_sg_entry_t        *next;
++};
++
++/*
++ * from pan_storage.h
++ */
++typedef pan_uint64_t pan_stor_dev_id_t;
++typedef pan_uint32_t pan_stor_obj_grp_id_t;
++typedef pan_uint64_t pan_stor_obj_uniq_t;
++typedef pan_uint32_t pan_stor_action_t;
++typedef pan_uint8_t pan_stor_cap_key_t[20];
++
++typedef pan_uint8_t pan_stor_key_type_t;
++typedef pan_uint64_t pan_stor_len_t;
++typedef pan_int64_t pan_stor_delta_len_t;
++typedef pan_uint64_t pan_stor_offset_t;
++typedef pan_uint16_t pan_stor_op_t;
++
++typedef pan_uint16_t pan_stor_sec_level_t;
++
++struct pan_stor_obj_id_s {
++  pan_stor_dev_id_t      dev_id;
++  pan_stor_obj_uniq_t    obj_id;
++  pan_stor_obj_grp_id_t  grp_id;
++};
++
++typedef struct pan_stor_obj_id_s pan_stor_obj_id_t;
++
++#define PAN_STOR_OP_NONE ((pan_stor_op_t) 0U)
++#define PAN_STOR_OP_READ ((pan_stor_op_t) 8U)
++#define PAN_STOR_OP_WRITE ((pan_stor_op_t) 9U)
++#define PAN_STOR_OP_APPEND ((pan_stor_op_t) 10U)
++#define PAN_STOR_OP_GETATTR ((pan_stor_op_t) 11U)
++#define PAN_STOR_OP_SETATTR ((pan_stor_op_t) 12U)
++#define PAN_STOR_OP_FLUSH ((pan_stor_op_t) 13U)
++#define PAN_STOR_OP_CLEAR ((pan_stor_op_t) 14U)
++
++/*
++ * from pan_aggregation_map.h
++ */
++typedef pan_uint8_t pan_agg_type_t;
++typedef pan_uint64_t pan_agg_map_version_t;
++typedef pan_uint8_t pan_agg_obj_state_t;
++typedef pan_uint8_t pan_agg_comp_state_t;
++typedef pan_uint8_t pan_agg_comp_flag_t;
++
++#define PAN_AGG_OBJ_STATE_INVALID ((pan_agg_obj_state_t) 0x00)
++#define PAN_AGG_OBJ_STATE_NORMAL ((pan_agg_obj_state_t) 0x01)
++#define PAN_AGG_OBJ_STATE_DEGRADED ((pan_agg_obj_state_t) 0x02)
++#define PAN_AGG_OBJ_STATE_RECONSTRUCT ((pan_agg_obj_state_t) 0x03)
++#define PAN_AGG_OBJ_STATE_COPYBACK ((pan_agg_obj_state_t) 0x04)
++#define PAN_AGG_OBJ_STATE_UNAVAILABLE ((pan_agg_obj_state_t) 0x05)
++#define PAN_AGG_OBJ_STATE_CREATING ((pan_agg_obj_state_t) 0x06)
++#define PAN_AGG_OBJ_STATE_DELETED ((pan_agg_obj_state_t) 0x07)
++#define PAN_AGG_COMP_STATE_INVALID ((pan_agg_comp_state_t) 0x00)
++#define PAN_AGG_COMP_STATE_NORMAL ((pan_agg_comp_state_t) 0x01)
++#define PAN_AGG_COMP_STATE_UNAVAILABLE ((pan_agg_comp_state_t) 0x02)
++#define PAN_AGG_COMP_STATE_COPYBACK ((pan_agg_comp_state_t) 0x03)
++#define PAN_AGG_COMP_F_NONE ((pan_agg_comp_flag_t) 0x00)
++#define PAN_AGG_COMP_F_ATTR_STORING ((pan_agg_comp_flag_t) 0x01)
++#define PAN_AGG_COMP_F_OBJ_CORRUPT_OBS ((pan_agg_comp_flag_t) 0x02)
++#define PAN_AGG_COMP_F_TEMP ((pan_agg_comp_flag_t) 0x04)
++
++struct pan_aggregation_map_s {
++  pan_agg_map_version_t  version;
++  pan_agg_obj_state_t    avail_state;
++  pan_stor_obj_id_t      obj_id;
++};
++
++typedef struct pan_aggregation_map_s pan_aggregation_map_t;
++
++struct pan_agg_comp_obj_s {
++  pan_stor_dev_id_t     dev_id;
++  pan_agg_comp_state_t  avail_state;
++  pan_agg_comp_flag_t   comp_flags;
++};
++
++typedef struct pan_agg_comp_obj_s pan_agg_comp_obj_t;
++
++struct pan_agg_simple_header_s {
++  pan_uint8_t  unused;
++};
++
++typedef struct pan_agg_simple_header_s pan_agg_simple_header_t;
++
++struct pan_agg_raid1_header_s {
++  pan_uint16_t  num_comps;
++};
++
++typedef struct pan_agg_raid1_header_s pan_agg_raid1_header_t;
++
++struct pan_agg_raid0_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++};
++
++typedef struct pan_agg_raid0_header_s pan_agg_raid0_header_t;
++
++struct pan_agg_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit0;
++  pan_uint32_t  stripe_unit1;
++  pan_uint32_t  stripe_unit2;
++};
++
++typedef struct pan_agg_raid5_left_header_s pan_agg_raid5_left_header_t;
++
++typedef struct pan_agg_grp_raid5_left_header_s pan_agg_grp_raid5_left_header_t;
++
++struct pan_agg_grp_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++  pan_uint16_t  rg_width;
++  pan_uint16_t  rg_depth;
++  pan_uint8_t   group_layout_policy;
++};
++
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_INVALID ((pan_uint8_t) 0x00)
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN ((pan_uint8_t) 0x01)
++
++#define PAN_AGG_NULL_MAP ((pan_agg_type_t) 0x00)
++#define PAN_AGG_SIMPLE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_RAID1 ((pan_agg_type_t) 0x02)
++#define PAN_AGG_RAID0 ((pan_agg_type_t) 0x03)
++#define PAN_AGG_RAID5_LEFT ((pan_agg_type_t) 0x04)
++#define PAN_AGG_GRP_RAID5_LEFT ((pan_agg_type_t) 0x06)
++#define PAN_AGG_MINTYPE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_MAXTYPE ((pan_agg_type_t) 0x06)
++
++struct pan_agg_layout_hdr_s {
++  pan_agg_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_uint64_t                        null;
++    pan_agg_simple_header_t             simple;
++    pan_agg_raid1_header_t              raid1;
++    pan_agg_raid0_header_t              raid0;
++    pan_agg_raid5_left_header_t         raid5_left;
++    pan_agg_grp_raid5_left_header_t     grp_raid5_left;
++  } hdr;
++};
++
++typedef struct pan_agg_layout_hdr_s pan_agg_layout_hdr_t;
++
++struct pan_agg_comp_obj_a_s {
++  pan_rpc_arrdim_t size;
++  pan_agg_comp_obj_t *data;
++};
++typedef struct pan_agg_comp_obj_a_s pan_agg_comp_obj_a;
++
++struct pan_agg_full_map_s {
++  pan_aggregation_map_t  map_hdr;
++  pan_agg_layout_hdr_t   layout_hdr;
++  pan_agg_comp_obj_a     components;
++};
++
++typedef struct pan_agg_full_map_s pan_agg_full_map_t;
++
++/*
++ * from pan_obsd_rpc_types.h
++ */
++typedef pan_uint8_t pan_obsd_security_key_a[16];
++
++typedef pan_uint8_t pan_obsd_capability_key_a[20];
++
++typedef pan_uint8_t pan_obsd_key_holder_id_t;
++
++#define PAN_OBSD_KEY_HOLDER_BASIS_KEY ((pan_obsd_key_holder_id_t) 0x01)
++#define PAN_OBSD_KEY_HOLDER_CAP_KEY ((pan_obsd_key_holder_id_t) 0x02)
++
++struct pan_obsd_key_holder_s {
++  pan_obsd_key_holder_id_t select;
++  pan_pad_t pad[3];
++  union {
++    pan_obsd_security_key_a    basis_key;
++    pan_obsd_capability_key_a  cap_key;
++  } key;
++};
++
++typedef struct pan_obsd_key_holder_s pan_obsd_key_holder_t;
++
++/*
++ * from pan_sm_sec.h
++ */
++typedef pan_uint8_t pan_sm_sec_type_t;
++typedef pan_uint8_t pan_sm_sec_otw_allo_mode_t;
++
++struct pan_obsd_capability_generic_otw_t_s {
++  pan_rpc_arrdim_t size;
++  pan_uint8_t *data;
++};
++typedef struct pan_obsd_capability_generic_otw_t_s
++				pan_obsd_capability_generic_otw_t;
++
++struct pan_sm_sec_obsd_s {
++  pan_obsd_key_holder_t              key;
++  pan_obsd_capability_generic_otw_t  cap_otw;
++  pan_sm_sec_otw_allo_mode_t         allo_mode;
++};
++
++typedef struct pan_sm_sec_obsd_s pan_sm_sec_obsd_t;
++
++struct pan_sm_sec_s {
++  pan_sm_sec_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_rpc_none_t     none;
++    pan_sm_sec_obsd_t  obsd;
++  } variant;
++};
++
++typedef struct pan_sm_sec_s pan_sm_sec_t;
++
++struct pan_sm_sec_a_s {
++  pan_rpc_arrdim_t size;
++  pan_sm_sec_t *data;
++};
++typedef struct pan_sm_sec_a_s pan_sm_sec_a;
++typedef pan_otw_t *pan_sm_sec_otw_t;
++
++/*
++ * from pan_sm_types.h
++ */
++typedef pan_uint64_t pan_sm_cap_handle_t;
++
++struct pan_sm_map_cap_s {
++  pan_agg_full_map_t   full_map;
++  pan_stor_offset_t    offset;
++  pan_stor_len_t       length;
++  pan_sm_sec_a         secs;
++  pan_sm_cap_handle_t  handle;
++  pan_timespec_t       expiration_time;
++  pan_stor_action_t    action_mask;
++  pan_uint32_t         flags;
++};
++
++typedef struct pan_sm_map_cap_s pan_sm_map_cap_t;
++
++/*
++ * from pan_sm_ops.h
++ */
++typedef pan_rpc_none_t pan_sm_cache_ptr_t;
++
++/*
++ * from pan_sam_api.h
++ */
++typedef pan_uint32_t    pan_sam_access_flags_t;
++
++typedef struct pan_sam_dev_error_s  pan_sam_dev_error_t;
++struct pan_sam_dev_error_s {
++    pan_stor_dev_id_t       dev_id;
++    pan_stor_op_t           stor_op;
++    pan_status_t            error;
++};
++
++typedef struct pan_sam_ext_status_s pan_sam_ext_status_t;
++struct pan_sam_ext_status_s {
++    pan_uint32_t        available;
++    pan_uint32_t        size;
++    pan_sam_dev_error_t *errors;
++};
++
++enum pan_sam_rpc_sec_sel_e {
++    PAN_SAM_RPC_SEC_DEFAULT,
++    PAN_SAM_RPC_SEC_ATLEAST,
++    PAN_SAM_RPC_SEC_EXACTLY
++};
++typedef enum pan_sam_rpc_sec_sel_e pan_sam_rpc_sec_sel_t;
++
++typedef struct pan_sam_obj_sec_s pan_sam_obj_sec_t;
++struct pan_sam_obj_sec_s {
++    pan_stor_sec_level_t    min_security;
++    pan_sm_map_cap_t        *map_ccaps;
++};
++
++typedef struct  pan_sam_rpc_sec_s   pan_sam_rpc_sec_t;
++struct pan_sam_rpc_sec_s {
++    pan_sam_rpc_sec_sel_t   selector;
++};
++
++typedef struct pan_sam_read_args_s pan_sam_read_args_t;
++struct pan_sam_read_args_s {
++    pan_stor_obj_id_t                obj_id;
++    pan_sm_cache_ptr_t               obj_ent;
++    void                            *return_attr;
++    void                            *checksum;
++    pan_stor_offset_t                offset;
++    pan_uint16_t                     sm_options;
++    void                            *callout;
++    void                            *callout_arg;
++};
++
++typedef struct pan_sam_read_res_s pan_sam_read_res_t;
++struct pan_sam_read_res_s {
++    pan_status_t             result;
++    pan_sam_ext_status_t     ext_status;
++    pan_stor_len_t           length;
++    void                    *attr;
++    void                    *checksum;
++};
++
++typedef void (*pan_sam_read_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_read_res_t  *res_p,
++    pan_status_t        status);
++
++#define PAN_SAM_ACCESS_NONE                             0x0000
++#define PAN_SAM_ACCESS_BYPASS_TIMESTAMP                 0x0020
++
++typedef struct pan_sam_write_args_s pan_sam_write_args_t;
++struct pan_sam_write_args_s {
++    pan_stor_obj_id_t   obj_id;
++    pan_sm_cache_ptr_t  obj_ent;
++    pan_stor_offset_t   offset;
++    void                *attr;
++    void                *return_attr;
++};
++
++typedef struct pan_sam_write_res_s pan_sam_write_res_t;
++struct pan_sam_write_res_s {
++    pan_status_t            result;
++    pan_sam_ext_status_t    ext_status;
++    pan_stor_len_t          length;
++    pan_stor_delta_len_t    delta_capacity_used;
++    pan_bool_t              parity_dirty;
++    void                   *attr;
++};
++
++typedef void (*pan_sam_write_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_write_res_t *res_p,
++    pan_status_t        status);
++
++/*
++ * from pan_mgr_types.h
++ */
++#define PAN_MGR_ID_TYPE_SHIFT 56
++#define PAN_MGR_ID_TYPE_MASK ((pan_mgr_id_t)18374686479671623680ULL)
++#define PAN_MGR_ID_UNIQ_MASK ((pan_mgr_id_t)72057594037927935ULL)
++
++typedef pan_uint16_t pan_mgr_type_t;
++typedef pan_uint64_t pan_mgr_id_t;
++
++#define PAN_MGR_SM ((pan_mgr_type_t) 2U)
++#define PAN_MGR_OBSD ((pan_mgr_type_t) 6U)
++
++/*
++ * from pan_mgr_types_c.h
++ */
++#define pan_mgr_id_construct_artificial(_mgr_type_, _mgr_uniq_, _mgr_id_p_) { \
++  pan_mgr_id_t  _id1, _id2; \
++\
++  _id1 = (_mgr_type_); \
++  _id1 <<= PAN_MGR_ID_TYPE_SHIFT; \
++  _id1 &= PAN_MGR_ID_TYPE_MASK; \
++  _id2 = (_mgr_uniq_); \
++  _id2 &= PAN_MGR_ID_UNIQ_MASK; \
++  _id1 |= _id2; \
++  *(_mgr_id_p_) = _id1; \
++}
++
++/*
++ * from pan_storage_c.h
++ */
++#define pan_stor_is_device_id_an_obsd_id(_device_id_) \
++    ((((_device_id_) & PAN_MGR_ID_TYPE_MASK) >> PAN_MGR_ID_TYPE_SHIFT) \
++	== PAN_MGR_OBSD)
++
++/*
++ * pnfs_shim internal definitions
++ */
++
++struct panfs_shim_io_state {
++	struct objlayout_io_state ol_state;
++
++	pan_sg_entry_t *sg_list;
++	pan_sam_obj_sec_t obj_sec;
++	void *ucreds;
++	union {
++		struct {
++			pan_sam_read_args_t args;
++			pan_sam_read_res_t res;
++		} read;
++		struct {
++			pan_sam_write_args_t args;
++			pan_sam_write_res_t res;
++		} write;
++	} u;
++};
++
++#endif /* _PANLAYOUT_PANFS_SHIM_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
+@@ -0,0 +1,435 @@
++/*
++ *  pnfs_osd_xdr.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/pnfs_osd_xdr.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/*
++ * The following implementation is based on these Internet Drafts:
++ *
++ * draft-ietf-nfsv4-minorversion-21
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_objid(u32 *p, struct pnfs_osd_objid *objid)
++{
++	COPYMEM(objid->oid_device_id.data, sizeof(objid->oid_device_id.data));
++	READ64(objid->oid_partition_id);
++	READ64(objid->oid_object_id);
++	return p;
++}
++
++static inline u32 *
++pnfs_osd_xdr_decode_opaque_cred(u32 *p,
++				struct pnfs_osd_opaque_cred *opaque_cred)
++{
++	READ32(opaque_cred->cred_len);
++	COPYMEM(opaque_cred->cred, opaque_cred->cred_len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_object_cred(u32 *p, struct pnfs_osd_object_cred *comp,
++				u8 **credp)
++{
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_objid(p, &comp->oc_object_id);
++	READ32(comp->oc_osd_version);
++	READ32(comp->oc_cap_key_sec);
++
++	cred = *credp;
++	comp->oc_cap_key.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap_key);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap_key.cred_len));
++	comp->oc_cap.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap.cred_len));
++	*credp = cred;
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_data_map(u32 *p, struct pnfs_osd_data_map *data_map)
++{
++	READ32(data_map->odm_num_comps);
++	READ64(data_map->odm_stripe_unit);
++	READ32(data_map->odm_group_width);
++	READ32(data_map->odm_group_depth);
++	READ32(data_map->odm_mirror_cnt);
++	READ32(data_map->odm_raid_algorithm);
++	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
++		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
++		__func__,
++		data_map->odm_num_comps,
++		(unsigned long long)data_map->odm_stripe_unit,
++		data_map->odm_group_width,
++		data_map->odm_group_depth,
++		data_map->odm_mirror_cnt,
++		data_map->odm_raid_algorithm);
++	return p;
++}
++
++struct pnfs_osd_layout *
++pnfs_osd_xdr_decode_layout(struct pnfs_osd_layout *layout, u32 *p)
++{
++	int i;
++	u32 *start = p;
++	struct pnfs_osd_object_cred *comp;
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_data_map(p, &layout->olo_map);
++	READ32(layout->olo_comps_index);
++	READ32(layout->olo_num_comps);
++	layout->olo_comps = (struct pnfs_osd_object_cred *)(layout + 1);
++	comp = layout->olo_comps;
++	cred = (u8 *)(comp + layout->olo_num_comps);
++	dprintk("%s: comps_index=%u num_comps=%u\n",
++		__func__, layout->olo_comps_index, layout->olo_num_comps);
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		p = pnfs_osd_xdr_decode_object_cred(p, comp, &cred);
++		dprintk("%s: comp[%d]=dev(%llx:%llx) par=0x%llx obj=0x%llx "
++			"key_len=%u cap_len=%u\n",
++			__func__, i,
++			_DEVID_LO(&comp->oc_object_id.oid_device_id),
++			_DEVID_HI(&comp->oc_object_id.oid_device_id),
++			comp->oc_object_id.oid_partition_id,
++			comp->oc_object_id.oid_object_id,
++			comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
++		comp++;
++	}
++	dprintk("%s: xdr_size=%Zd end=%p in_core_size=%Zd\n", __func__,
++	       (char *)p - (char *)start, cred, (char *)cred - (char *)layout);
++	return layout;
++}
++
++/*
++ * Get Device Information Decoding
++ *
++ * Note: since Device Information is currently done synchronously, most
++ *       of the actual fields are left inside the rpc buffer and are only
++ *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
++ *       should not be freed while the returned information is in use.
++ */
++
++u32 *__xdr_read_calc_nfs4_string(
++	u32 *p, struct nfs4_string *str, u8 **freespace)
++{
++	u32 len;
++	char *data;
++	bool need_copy;
++
++	READ32(len);
++	data = (char *)p;
++
++	if (data[len]) { /* Not null terminated we'll need extra space */
++		data = *freespace;
++		*freespace += len + 1;
++		need_copy = true;
++	} else {
++		need_copy = false;
++	}
++
++	if (str) {
++		str->len = len;
++		str->data = data;
++		if (need_copy) {
++			memcpy(data, p, len);
++			data[len] = 0;
++		}
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++u32 *__xdr_read_calc_u8_opaque(
++	u32 *p, struct nfs4_string *str)
++{
++	u32 len;
++
++	READ32(len);
++
++	if (str) {
++		str->len = len;
++		str->data = (char *)p;
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetid {
++ * 	u32			oti_type;
++ * 	struct nfs4_string	oti_scsi_device_id;
++ * };
++ */
++u32 *__xdr_read_calc_targetid(
++	u32 *p, struct pnfs_osd_targetid* targetid, u8 **freespace)
++{
++	u32 oti_type;
++
++	READ32(oti_type);
++	if (targetid)
++		targetid->oti_type = oti_type;
++
++	switch (oti_type) {
++	case OBJ_TARGET_SCSI_NAME:
++	case OBJ_TARGET_SCSI_DEVICE_ID:
++		p = __xdr_read_calc_u8_opaque(p,
++			targetid ? &targetid->oti_scsi_device_id : NULL);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_net_addr {
++ * 	struct nfs4_string	r_netid;
++ * 	struct nfs4_string	r_addr;
++ * };
++ */
++u32 *__xdr_read_calc_net_addr(
++	u32 *p, struct pnfs_osd_net_addr* netaddr, u8 **freespace)
++{
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_netid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_addr : NULL,
++			freespace);
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetaddr {
++ * 	u32				ota_available;
++ * 	struct pnfs_osd_net_addr	ota_netaddr;
++ * };
++ */
++u32 *__xdr_read_calc_targetaddr(
++	u32 *p, struct pnfs_osd_targetaddr *targetaddr, u8 **freespace)
++{
++	u32 ota_available;
++
++	READ32(ota_available);
++	if (targetaddr)
++		targetaddr->ota_available = ota_available;
++
++	if (ota_available) {
++		p = __xdr_read_calc_net_addr(p,
++				targetaddr ? &targetaddr->ota_netaddr : NULL,
++				freespace);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++u32 *__xdr_read_calc_deviceaddr(
++	u32 *p, struct pnfs_osd_deviceaddr *deviceaddr, u8 **freespace)
++{
++	p = __xdr_read_calc_targetid(p,
++			deviceaddr ? &deviceaddr->oda_targetid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_targetaddr(p,
++			deviceaddr ? &deviceaddr->oda_targetaddr : NULL,
++			freespace);
++
++	if (deviceaddr)
++		COPYMEM(deviceaddr->oda_lun, sizeof(deviceaddr->oda_lun));
++	else
++		p += XDR_QUADLEN(sizeof(deviceaddr->oda_lun));
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_systemid : NULL);
++
++	if (deviceaddr) {
++		p = pnfs_osd_xdr_decode_object_cred(p,
++				&deviceaddr->oda_root_obj_cred, freespace);
++	} else {
++		*freespace += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_osdname : NULL);
++
++	return p;
++}
++
++size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p)
++{
++	u8 *null_freespace = NULL;
++	size_t sz;
++
++	__xdr_read_calc_deviceaddr(p, NULL, &null_freespace);
++	sz = sizeof(struct pnfs_osd_deviceaddr) + (size_t)null_freespace;
++
++	return sz;
++}
++
++void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p)
++{
++	u8 *freespace = (u8 *)(deviceaddr + 1);
++
++	__xdr_read_calc_deviceaddr(p, deviceaddr, &freespace);
++}
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou)
++{
++	__be32 *p = xdr_reserve_space(xdr, 16);
++
++	if (!p)
++		return -E2BIG;
++
++	*p++ = cpu_to_be32(lou->dsu_valid);
++	if (lou->dsu_valid)
++		p = xdr_encode_hyper(p, lou->dsu_delta);
++	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ */
++static inline int pnfs_osd_xdr_encode_objid(struct xdr_stream *xdr,
++					    struct pnfs_osd_objid *object_id)
++{
++	__be32 *p;
++
++	p = xdr_reserve_space(xdr, 32);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
++				    sizeof(object_id->oid_device_id.data));
++	p = xdr_encode_hyper(p, object_id->oid_partition_id);
++	p = xdr_encode_hyper(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++int pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr,
++			      struct pnfs_osd_ioerr *ioerr)
++{
++	__be32 *p;
++	int ret;
++
++	ret = pnfs_osd_xdr_encode_objid(xdr, &ioerr->oer_component);
++	if (ret)
++		return ret;
++
++	p = xdr_reserve_space(xdr, 24);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
++	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
++	*p++ = cpu_to_be32(ioerr->oer_iswrite);
++	*p   = cpu_to_be32(ioerr->oer_errno);
++
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ 
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ static struct kmem_cache *nfs_page_cachep;
+ 
+@@ -56,7 +57,8 @@ nfs_page_free(struct nfs_page *p)
+ struct nfs_page *
+ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ 		   struct page *page,
+-		   unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page		*req;
+ 
+@@ -79,7 +81,11 @@ nfs_create_request(struct nfs_open_conte
+ 	req->wb_pgbase	= offset;
+ 	req->wb_bytes   = count;
+ 	req->wb_context = get_nfs_open_context(ctx);
++	req->wb_lock_context = nfs_get_lock_context(ctx);
+ 	kref_init(&req->wb_kref);
++	req->wb_lseg    = lseg;
++	if (lseg)
++		get_lseg(lseg);
+ 	return req;
+ }
+ 
+@@ -141,18 +147,26 @@ void nfs_clear_request(struct nfs_page *
+ {
+ 	struct page *page = req->wb_page;
+ 	struct nfs_open_context *ctx = req->wb_context;
++	struct nfs_lock_context *l_ctx = req->wb_lock_context;
+ 
+ 	if (page != NULL) {
+ 		page_cache_release(page);
+ 		req->wb_page = NULL;
+ 	}
++	if (l_ctx != NULL) {
++		nfs_put_lock_context(l_ctx);
++		req->wb_lock_context = NULL;
++	}
+ 	if (ctx != NULL) {
+ 		put_nfs_open_context(ctx);
+ 		req->wb_context = NULL;
+ 	}
++	if (req->wb_lseg != NULL) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
+ }
+ 
+-
+ /**
+  * nfs_release_request - Release the count on an NFS read/write request
+  * @req: request to release
+@@ -231,11 +245,12 @@ void nfs_pageio_init(struct nfs_pageio_d
+  * Return 'true' if this is the case, else return 'false'.
+  */
+ static int nfs_can_coalesce_requests(struct nfs_page *prev,
+-				     struct nfs_page *req)
++				     struct nfs_page *req,
++				     struct nfs_pageio_descriptor *pgio)
+ {
+ 	if (req->wb_context->cred != prev->wb_context->cred)
+ 		return 0;
+-	if (req->wb_context->lockowner != prev->wb_context->lockowner)
++	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
+ 		return 0;
+ 	if (req->wb_context->state != prev->wb_context->state)
+ 		return 0;
+@@ -245,6 +260,12 @@ static int nfs_can_coalesce_requests(str
+ 		return 0;
+ 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ 		return 0;
++	if (req->wb_lseg != prev->wb_lseg)
++		return 0;
++#ifdef CONFIG_NFS_V4_1
++	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
++		return 0;
++#endif /* CONFIG_NFS_V4_1 */
+ 	return 1;
+ }
+ 
+@@ -277,7 +298,7 @@ static int nfs_pageio_do_add_request(str
+ 		if (newlen > desc->pg_bsize)
+ 			return 0;
+ 		prev = nfs_list_entry(desc->pg_list.prev);
+-		if (!nfs_can_coalesce_requests(prev, req))
++		if (!nfs_can_coalesce_requests(prev, req, desc))
+ 			return 0;
+ 	} else
+ 		desc->pg_base = req->wb_pgbase;
+@@ -366,6 +387,7 @@ void nfs_pageio_cond_complete(struct nfs
+  * @idx_start: lower bound of page->index to scan
+  * @npages: idx_start + npages sets the upper bound to scan.
+  * @tag: tag to scan for
++ * @use_pnfs: will be set TRUE if commit needs to be handled by layout driver
+  *
+  * Moves elements from one of the inode request lists.
+  * If the number of requests is set to 0, the entire address_space
+@@ -375,7 +397,7 @@ void nfs_pageio_cond_complete(struct nfs
+  */
+ int nfs_scan_list(struct nfs_inode *nfsi,
+ 		struct list_head *dst, pgoff_t idx_start,
+-		unsigned int npages, int tag)
++		  unsigned int npages, int tag, int *use_pnfs)
+ {
+ 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ 	struct nfs_page *req;
+@@ -406,6 +428,8 @@ int nfs_scan_list(struct nfs_inode *nfsi
+ 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+ 						req->wb_index, tag);
+ 				nfs_list_add_request(req, dst);
++				if (req->wb_lseg)
++					*use_pnfs = 1;
+ 				res++;
+ 				if (res == INT_MAX)
+ 					goto out;
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
+@@ -0,0 +1,2027 @@
++/*
++ *  linux/fs/nfs/pnfs.c
++ *
++ *  pNFS functions to call and manage layout drivers.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/smp_lock.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_mount.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/rculist.h>
++
++#include "internal.h"
++#include "nfs4_fs.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS
++
++#define MIN_POOL_LC		(4)
++
++static int pnfs_initialized;
++
++static void pnfs_free_layout(struct pnfs_layout_type *lo,
++			     struct nfs4_pnfs_layout_segment *range);
++static inline void get_layout(struct pnfs_layout_type *lo);
++
++/* Locking:
++ *
++ * pnfs_spinlock:
++ * 	protects pnfs_modules_tbl.
++ */
++static spinlock_t pnfs_spinlock = __SPIN_LOCK_UNLOCKED(pnfs_spinlock);
++
++/*
++ * pnfs_modules_tbl holds all pnfs modules
++ */
++static struct list_head	pnfs_modules_tbl;
++static struct kmem_cache *pnfs_cachep;
++static mempool_t *pnfs_layoutcommit_mempool;
++
++static inline struct pnfs_layoutcommit_data *pnfs_layoutcommit_alloc(void)
++{
++	struct pnfs_layoutcommit_data *p =
++			mempool_alloc(pnfs_layoutcommit_mempool, GFP_NOFS);
++	if (p)
++		memset(p, 0, sizeof(*p));
++
++	return p;
++}
++
++void pnfs_layoutcommit_free(struct pnfs_layoutcommit_data *p)
++{
++	mempool_free(p, pnfs_layoutcommit_mempool);
++}
++
++/*
++ * struct pnfs_module - One per pNFS device module.
++ */
++struct pnfs_module {
++	struct pnfs_layoutdriver_type *pnfs_ld_type;
++	struct list_head        pnfs_tblid;
++};
++
++int
++pnfs_initialize(void)
++{
++	INIT_LIST_HEAD(&pnfs_modules_tbl);
++
++	pnfs_cachep = kmem_cache_create("pnfs_layoutcommit_data",
++					sizeof(struct pnfs_layoutcommit_data),
++					0, SLAB_HWCACHE_ALIGN, NULL);
++	if (pnfs_cachep == NULL)
++		return -ENOMEM;
++
++	pnfs_layoutcommit_mempool = mempool_create(MIN_POOL_LC,
++						   mempool_alloc_slab,
++						   mempool_free_slab,
++						   pnfs_cachep);
++	if (pnfs_layoutcommit_mempool == NULL) {
++		kmem_cache_destroy(pnfs_cachep);
++		return -ENOMEM;
++	}
++
++	pnfs_initialized = 1;
++	return 0;
++}
++
++void pnfs_uninitialize(void)
++{
++	mempool_destroy(pnfs_layoutcommit_mempool);
++	kmem_cache_destroy(pnfs_cachep);
++}
++
++/* search pnfs_modules_tbl for right pnfs module */
++static int
++find_pnfs(u32 id, struct pnfs_module **module) {
++	struct  pnfs_module *local = NULL;
++
++	dprintk("PNFS: %s: Searching for %u\n", __func__, id);
++	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) {
++		if (local->pnfs_ld_type->id == id) {
++			*module = local;
++			return(1);
++		}
++	}
++	return 0;
++}
++
++/* Set lo_cred to indicate we require a layoutcommit
++ * If we don't even have a layout, we don't need to commit it.
++ */
++void
++pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
++{
++	dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (has_layout(nfsi) &&
++	    !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state)) {
++		nfsi->layout->lo_cred = get_rpccred(ctx->state->owner->so_cred);
++		__set_bit(NFS_INO_LAYOUTCOMMIT,
++			  &nfsi->layout->pnfs_layout_state);
++		nfsi->change_attr++;
++		spin_unlock(&nfsi->vfs_inode.i_lock);
++		dprintk("%s: Set layoutcommit\n", __func__);
++		return;
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Update last_write_offset for layoutcommit.
++ * TODO: We should only use commited extents, but the current nfs
++ * implementation does not calculate the written range in nfs_commit_done.
++ * We therefore update this field in writeback_done.
++ */
++void
++pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
++{
++	loff_t end_pos;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (offset < nfsi->layout->pnfs_write_begin_pos)
++		nfsi->layout->pnfs_write_begin_pos = offset;
++	end_pos = offset + extent - 1; /* I'm being inclusive */
++	if (end_pos > nfsi->layout->pnfs_write_end_pos)
++		nfsi->layout->pnfs_write_end_pos = end_pos;
++	dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
++		__func__,
++		(unsigned long) extent,
++		(unsigned long) offset ,
++		(unsigned long) nfsi->layout->pnfs_write_begin_pos,
++		(unsigned long) nfsi->layout->pnfs_write_end_pos);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Unitialize a mountpoint in a layout driver */
++void
++unmount_pnfs_layoutdriver(struct nfs_server *nfss)
++{
++	if (PNFS_EXISTS_LDIO_OP(nfss, uninitialize_mountpoint))
++		nfss->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint(nfss);
++}
++
++/*
++ * Set the server pnfs module to the first registered pnfs_type.
++ * Only one pNFS layout driver is supported.
++ */
++void
++set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
++		      u32 id)
++{
++	struct pnfs_module *mod = NULL;
++
++	if (server->pnfs_curr_ld)
++		return;
++
++	if (!find_pnfs(id, &mod)) {
++		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
++		find_pnfs(id, &mod);
++	}
++
++	if (!mod) {
++		dprintk("%s: No pNFS module found for %u. ", __func__, id);
++		goto out_err;
++	}
++
++	server->pnfs_curr_ld = mod->pnfs_ld_type;
++	if (mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
++							server, mntfh)) {
++		printk(KERN_ERR "%s: Error initializing mount point "
++		       "for layout driver %u. ", __func__, id);
++		goto out_err;
++	}
++
++	dprintk("%s: pNFS module for %u set\n", __func__, id);
++	return;
++
++out_err:
++	dprintk("Using NFSv4 I/O\n");
++	server->pnfs_curr_ld = NULL;
++}
++
++/* Allow I/O module to set its functions structure */
++struct pnfs_client_operations*
++pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++	struct layoutdriver_io_operations *io_ops = ld_type->ld_io_ops;
++
++	if (!pnfs_initialized) {
++		printk(KERN_ERR "%s Registration failure. "
++		       "pNFS not initialized.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops || !io_ops->alloc_layout || !io_ops->free_layout) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_layout and free_layout.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->alloc_lseg || !io_ops->free_lseg) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_lseg and free_lseg.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->read_pagelist || !io_ops->write_pagelist ||
++	    !io_ops->commit) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "read_pagelist, write_pagelist, and commit.\n",
++		       __func__);
++		return NULL;
++	}
++
++	pnfs_mod = kmalloc(sizeof(struct pnfs_module), GFP_KERNEL);
++	if (pnfs_mod != NULL) {
++		dprintk("%s Registering id:%u name:%s\n",
++			__func__,
++			ld_type->id,
++			ld_type->name);
++		pnfs_mod->pnfs_ld_type = ld_type;
++		INIT_LIST_HEAD(&pnfs_mod->pnfs_tblid);
++
++		spin_lock(&pnfs_spinlock);
++		list_add(&pnfs_mod->pnfs_tblid, &pnfs_modules_tbl);
++		spin_unlock(&pnfs_spinlock);
++	}
++
++	return &pnfs_ops;
++}
++
++/*  Allow I/O module to set its functions structure */
++void
++pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++
++	if (find_pnfs(ld_type->id, &pnfs_mod)) {
++		dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
++		spin_lock(&pnfs_spinlock);
++		list_del(&pnfs_mod->pnfs_tblid);
++		spin_unlock(&pnfs_spinlock);
++		kfree(pnfs_mod);
++	}
++}
++
++/*
++ * pNFS client layout cache
++ */
++#if defined(CONFIG_SMP)
++#define BUG_ON_UNLOCKED_INO(ino) \
++	BUG_ON(!spin_is_locked(&ino->i_lock))
++#define BUG_ON_UNLOCKED_LO(lo) \
++	BUG_ON_UNLOCKED_INO(PNFS_INODE(lo))
++#else /* CONFIG_SMP */
++#define BUG_ON_UNLOCKED_INO(lo) do {} while (0)
++#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
++#endif /* CONFIG_SMP */
++
++static inline void
++get_layout(struct pnfs_layout_type *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	lo->refcount++;
++}
++
++static inline void
++put_layout_locked(struct pnfs_layout_type *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	BUG_ON(lo->refcount <= 0);
++
++	lo->refcount--;
++	if (!lo->refcount) {
++		struct layoutdriver_io_operations *io_ops = PNFS_LD_IO_OPS(lo);
++		struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++		dprintk("%s: freeing layout cache %p\n", __func__, lo);
++		WARN_ON(!list_empty(&lo->lo_layouts));
++		io_ops->free_layout(lo);
++		nfsi->layout = NULL;
++	}
++}
++
++void
++put_layout(struct inode *inode)
++{
++	spin_lock(&inode->i_lock);
++	put_layout_locked(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++
++}
++
++void
++pnfs_layout_release(struct pnfs_layout_type *lo,
++		    struct nfs4_pnfs_layout_segment *range)
++{
++	struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (range)
++		pnfs_free_layout(lo, range);
++	/*
++	 * Matched in _pnfs_update_layout for layoutget
++	 * and by get_layout in _pnfs_return_layout for layoutreturn
++	 */
++	put_layout_locked(lo);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	wake_up_all(&nfsi->lo_waitq);
++}
++
++void
++pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++	struct pnfs_layout_type *lo;
++	struct nfs4_pnfs_layout_segment range = {
++		.iomode = IOMODE_ANY,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	lo = nfsi->layout;
++	if (lo) {
++		pnfs_free_layout(lo, &range);
++		WARN_ON(!list_empty(&nfsi->layout->segs));
++		WARN_ON(!list_empty(&nfsi->layout->lo_layouts));
++
++		if (nfsi->layout->refcount != 1)
++			printk(KERN_WARNING "%s: layout refcount not=1 %d\n",
++				__func__, nfsi->layout->refcount);
++		WARN_ON(nfsi->layout->refcount != 1);
++
++		/* Matched by refcount set to 1 in alloc_init_layout */
++		put_layout_locked(lo);
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/*
++ * Called by the state manger to remove all layouts established under an
++ * expired lease.
++ */
++void
++pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++	struct pnfs_layout_type *lo;
++
++	while (!list_empty(&clp->cl_layouts)) {
++		lo = list_entry(clp->cl_layouts.next, struct pnfs_layout_type,
++				lo_layouts);
++		dprintk("%s freeing layout for inode %lu\n", __func__,
++			lo->lo_inode->i_ino);
++		pnfs_destroy_layout(NFS_I(lo->lo_inode));
++	}
++}
++
++static inline void
++init_lseg(struct pnfs_layout_type *lo, struct pnfs_layout_segment *lseg)
++{
++	INIT_LIST_HEAD(&lseg->fi_list);
++	kref_init(&lseg->kref);
++	lseg->valid = true;
++	lseg->layout = lo;
++}
++
++static void
++destroy_lseg(struct kref *kref)
++{
++	struct pnfs_layout_segment *lseg =
++		container_of(kref, struct pnfs_layout_segment, kref);
++
++	dprintk("--> %s\n", __func__);
++	/* Matched by get_layout in pnfs_insert_layout */
++	put_layout_locked(lseg->layout);
++	PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
++}
++
++static void
++put_lseg_locked(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	kref_put(&lseg->kref, destroy_lseg);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++
++void
++put_lseg(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	kref_put(&lseg->kref, destroy_lseg);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++EXPORT_SYMBOL(put_lseg);
++
++void get_lseg(struct pnfs_layout_segment *lseg)
++{
++	kref_get(&lseg->kref);
++}
++EXPORT_SYMBOL(get_lseg);
++
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end: NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1: NFS4_MAX_UINT64;
++}
++
++/*
++ * is l2 fully contained in l1?
++ *   start1                             end1
++ *   [----------------------------------)
++ *           start2           end2
++ *           [----------------)
++ */
++static inline int
++lo_seg_contained(struct nfs4_pnfs_layout_segment *l1,
++		 struct nfs4_pnfs_layout_segment *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (start1 <= start2) && (end1 >= end2);
++}
++
++/*
++ * is l1 and l2 intersecting?
++ *   start1                             end1
++ *   [----------------------------------)
++ *                              start2           end2
++ *                              [----------------)
++ */
++static inline int
++lo_seg_intersecting(struct nfs4_pnfs_layout_segment *l1,
++		    struct nfs4_pnfs_layout_segment *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
++	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
++}
++
++void
++pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
++			const nfs4_stateid *stateid)
++{
++	write_seqlock(&lo->seqlock);
++	memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
++	write_sequnlock(&lo->seqlock);
++}
++
++void
++pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	do {
++		seq = read_seqbegin(&lo->seqlock);
++		memcpy(dst->u.data, lo->stateid.u.data,
++		       sizeof(lo->stateid.u.data));
++	} while (read_seqretry(&lo->seqlock, seq));
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void
++pnfs_layout_from_open_stateid(struct pnfs_layout_type *lo,
++			      struct nfs4_state *state)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	write_seqlock(&lo->seqlock);
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE))
++		do {
++			seq = read_seqbegin(&state->seqlock);
++			memcpy(lo->stateid.u.data, state->stateid.u.data,
++					sizeof(state->stateid.u.data));
++		} while (read_seqretry(&state->seqlock, seq));
++	write_sequnlock(&lo->seqlock);
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++* Get layout from server.
++*    for now, assume that whole file layouts are requested.
++*    arg->offset: 0
++*    arg->length: all ones
++*/
++static int
++send_layoutget(struct inode *ino,
++	   struct nfs_open_context *ctx,
++	   struct nfs4_pnfs_layout_segment *range,
++	   struct pnfs_layout_segment **lsegpp,
++	   struct pnfs_layout_type *lo)
++{
++	int status;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct nfs4_pnfs_layoutget *lgp;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
++	if (lgp == NULL) {
++		pnfs_layout_release(lo, NULL);
++		return -ENOMEM;
++	}
++	lgp->args.minlength = NFS4_MAX_UINT64;
++	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
++	lgp->args.lseg.iomode = range->iomode;
++	lgp->args.lseg.offset = 0;
++	lgp->args.lseg.length = NFS4_MAX_UINT64;
++	lgp->args.type = server->pnfs_curr_ld->id;
++	lgp->args.inode = ino;
++	lgp->lsegpp = lsegpp;
++
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
++		struct nfs_open_context *oldctx = ctx;
++
++		if (!oldctx) {
++			ctx = nfs_find_open_context(ino, NULL,
++					(range->iomode == IOMODE_READ) ?
++					FMODE_READ: FMODE_WRITE);
++			BUG_ON(!ctx);
++		}
++		/* Set the layout stateid from the open stateid */
++		pnfs_layout_from_open_stateid(NFS_I(ino)->layout, ctx->state);
++		if (!oldctx)
++			put_nfs_open_context(ctx);
++	}
++
++	/* Retrieve layout information from server */
++	status = pnfs4_proc_layoutget(lgp);
++
++	dprintk("<-- %s status %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	false
++ */
++static inline int
++should_free_lseg(struct pnfs_layout_segment *lseg,
++		   struct nfs4_pnfs_layout_segment *range)
++{
++	return (range->iomode == IOMODE_ANY ||
++		lseg->range.iomode == range->iomode) &&
++	       lo_seg_intersecting(&lseg->range, range);
++}
++
++static struct pnfs_layout_segment *
++has_layout_to_return(struct pnfs_layout_type *lo,
++		     struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *out = NULL, *lseg;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list)
++		if (should_free_lseg(lseg, range)) {
++			out = lseg;
++			break;
++		}
++
++	dprintk("%s:Return lseg=%p\n", __func__, out);
++	return out;
++}
++
++static inline bool
++_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
++{
++	return atomic_read(&lseg->kref.refcount) == 1;
++}
++
++
++static void
++pnfs_free_layout(struct pnfs_layout_type *lo,
++		 struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *lseg, *next;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
++		if (!should_free_lseg(lseg, range) ||
++		    !_pnfs_can_return_lseg(lseg))
++			continue;
++		dprintk("%s: freeing lseg %p iomode %d "
++			"offset %llu length %llu\n", __func__,
++			lseg, lseg->range.iomode, lseg->range.offset,
++			lseg->range.length);
++		list_del(&lseg->fi_list);
++		put_lseg_locked(lseg);
++	}
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp;
++
++		clp = PNFS_NFS_SERVER(lo)->nfs_client;
++		spin_lock(&clp->cl_lock);
++		list_del_init(&lo->lo_layouts);
++		spin_unlock(&clp->cl_lock);
++		pnfs_set_layout_stateid(lo, &zero_stateid);
++	}
++
++	dprintk("%s:Return\n", __func__);
++}
++
++static bool
++pnfs_return_layout_barrier(struct nfs_inode *nfsi,
++			   struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *lseg;
++	bool ret = false;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
++		if (!should_free_lseg(lseg, range))
++			continue;
++		lseg->valid = false;
++		if (!_pnfs_can_return_lseg(lseg)) {
++			dprintk("%s: wait on lseg %p refcount %d\n",
++				__func__, lseg,
++				atomic_read(&lseg->kref.refcount));
++			ret = true;
++		}
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	dprintk("%s:Return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
++	      enum pnfs_layoutreturn_type type, struct pnfs_layout_type *lo,
++	      bool wait)
++{
++	struct nfs4_pnfs_layoutreturn *lrp;
++	struct nfs_server *server = NFS_SERVER(ino);
++	int status = -ENOMEM;
++
++	dprintk("--> %s\n", __func__);
++
++	BUG_ON(type != RETURN_FILE);
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (lrp == NULL) {
++		if (lo && (type == RETURN_FILE))
++			pnfs_layout_release(lo, NULL);
++		goto out;
++	}
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = server->pnfs_curr_ld->id;
++	lrp->args.return_type = type;
++	lrp->args.lseg = *range;
++	lrp->args.inode = ino;
++
++	status = pnfs4_proc_layoutreturn(lrp, wait);
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++}
++
++int
++_pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
++		    const nfs4_stateid *stateid, /* optional */
++		    enum pnfs_layoutreturn_type type,
++		    bool wait)
++{
++	struct pnfs_layout_type *lo = NULL;
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs4_pnfs_layout_segment arg;
++	int status = 0;
++
++	dprintk("--> %s type %d\n", __func__, type);
++
++
++	arg.iomode = range ? range->iomode : IOMODE_ANY;
++	arg.offset = 0;
++	arg.length = NFS4_MAX_UINT64;
++
++	if (type == RETURN_FILE) {
++		spin_lock(&ino->i_lock);
++		lo = nfsi->layout;
++		if (lo && !has_layout_to_return(lo, &arg)) {
++			lo = NULL;
++		}
++		if (!lo) {
++			spin_unlock(&ino->i_lock);
++			dprintk("%s: no layout segments to return\n", __func__);
++			goto out;
++		}
++
++		/* Reference for layoutreturn matched in pnfs_layout_release */
++		get_layout(lo);
++
++		spin_unlock(&ino->i_lock);
++
++		if (pnfs_return_layout_barrier(nfsi, &arg)) {
++			if (stateid) { /* callback */
++				status = -EAGAIN;
++				goto out_put;
++			}
++			dprintk("%s: waiting\n", __func__);
++			wait_event(nfsi->lo_waitq,
++				   !pnfs_return_layout_barrier(nfsi, &arg));
++		}
++
++		if (layoutcommit_needed(nfsi)) {
++			if (stateid && !wait) { /* callback */
++				dprintk("%s: layoutcommit pending\n", __func__);
++				status = -EAGAIN;
++				goto out_put;
++			}
++			status = pnfs_layoutcommit_inode(ino, wait);
++			if (status) {
++				/* Return layout even if layoutcommit fails */
++				dprintk("%s: layoutcommit failed, status=%d. "
++					"Returning layout anyway\n",
++					__func__, status);
++			}
++		}
++
++		if (!stateid)
++			status = return_layout(ino, &arg, type, lo, wait);
++		else
++			pnfs_layout_release(lo, &arg);
++	}
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++out_put:
++	put_layout(ino);
++	goto out;
++}
++
++/*
++ * cmp two layout segments for sorting into layout cache
++ */
++static inline s64
++cmp_layout(struct nfs4_pnfs_layout_segment *l1,
++	   struct nfs4_pnfs_layout_segment *l2)
++{
++	s64 d;
++
++	/* higher offset > lower offset */
++	d = l1->offset - l2->offset;
++	if (d)
++		return d;
++
++	/* longer length > shorter length */
++	d = l1->length - l2->length;
++	if (d)
++		return d;
++
++	/* read > read/write */
++	return (int)(l1->iomode == IOMODE_READ) -
++	       (int)(l2->iomode == IOMODE_READ);
++}
++
++static void
++pnfs_insert_layout(struct pnfs_layout_type *lo,
++		   struct pnfs_layout_segment *lseg)
++{
++	struct pnfs_layout_segment *lp;
++	int found = 0;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp = PNFS_NFS_SERVER(lo)->nfs_client;
++
++		spin_lock(&clp->cl_lock);
++		BUG_ON(!list_empty(&lo->lo_layouts));
++		list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
++		spin_unlock(&clp->cl_lock);
++	}
++	list_for_each_entry (lp, &lo->segs, fi_list) {
++		if (cmp_layout(&lp->range, &lseg->range) > 0)
++			continue;
++		list_add_tail(&lseg->fi_list, &lp->fi_list);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu before "
++			"lp %p iomode %d offset %llu length %llu\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length,
++			lp, lp->range.iomode, lp->range.offset,
++			lp->range.length);
++		found = 1;
++		break;
++	}
++	if (!found) {
++		list_add_tail(&lseg->fi_list, &lo->segs);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu at tail\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length);
++	}
++	get_layout(lo);
++
++	dprintk("%s:Return\n", __func__);
++}
++
++/*
++ * Each layoutdriver embeds pnfs_layout_type as the first field in it's
++ * per-layout type layout cache structure and returns it ZEROed
++ * from layoutdriver_io_ops->alloc_layout
++ */
++static struct pnfs_layout_type *
++alloc_init_layout(struct inode *ino)
++{
++	struct pnfs_layout_type *lo;
++	struct layoutdriver_io_operations *io_ops;
++
++	io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
++	lo = io_ops->alloc_layout(ino);
++	if (!lo) {
++		printk(KERN_ERR
++			"%s: out of memory: io_ops->alloc_layout failed\n",
++			__func__);
++		return NULL;
++	}
++	lo->refcount = 1;
++	INIT_LIST_HEAD(&lo->lo_layouts);
++	INIT_LIST_HEAD(&lo->segs);
++	seqlock_init(&lo->seqlock);
++	lo->lo_inode = ino;
++	return lo;
++}
++
++/*
++ * Retrieve and possibly allocate the inode layout
++ *
++ * ino->i_lock must be taken by the caller.
++ */
++static struct pnfs_layout_type *
++pnfs_alloc_layout(struct inode *ino)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_type *new = NULL;
++
++	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
++
++	BUG_ON_UNLOCKED_INO(ino);
++	if (likely(nfsi->layout))
++		return nfsi->layout;
++
++	spin_unlock(&ino->i_lock);
++	new = alloc_init_layout(ino);
++	spin_lock(&ino->i_lock);
++
++	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
++		nfsi->layout = new;
++	} else if (new) {
++		/* Reference the layout accross i_lock release and grab */
++		get_layout(nfsi->layout);
++		spin_unlock(&ino->i_lock);
++		NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
++		spin_lock(&ino->i_lock);
++		put_layout_locked(nfsi->layout);
++	}
++	return nfsi->layout;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	true
++ */
++static inline int
++has_matching_lseg(struct pnfs_layout_segment *lseg,
++		  struct nfs4_pnfs_layout_segment *range)
++{
++	struct nfs4_pnfs_layout_segment range1;
++
++	if ((range->iomode == IOMODE_RW && lseg->range.iomode != IOMODE_RW) ||
++	    !lo_seg_intersecting(&lseg->range, range))
++		return 0;
++
++	/* range1 covers only the first byte in the range */
++	range1 = *range;
++	range1.length = 1;
++	return lo_seg_contained(&lseg->range, &range1);
++}
++
++/*
++ * lookup range in layout
++ */
++static struct pnfs_layout_segment *
++pnfs_has_layout(struct pnfs_layout_type *lo,
++		struct nfs4_pnfs_layout_segment *range,
++		bool take_ref,
++		bool only_valid)
++{
++	struct pnfs_layout_segment *lseg, *ret = NULL;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list) {
++		if (has_matching_lseg(lseg, range) &&
++		    (lseg->valid || !only_valid)) {
++			ret = lseg;
++			if (take_ref)
++				get_lseg(ret);
++			break;
++		}
++		if (cmp_layout(range, &lseg->range) > 0)
++			break;
++	}
++
++	dprintk("%s:Return lseg %p take_ref %d ref %d valid %d\n",
++		__func__, ret, take_ref,
++		ret ? atomic_read(&ret->kref.refcount) : 0,
++		ret ? ret->valid : 0);
++	return ret;
++}
++
++/* Update the file's layout for the given range and iomode.
++ * Layout is retreived from the server if needed.
++ * If lsegpp is given, the appropriate layout segment is referenced and
++ * returned to the caller.
++ */
++void
++_pnfs_update_layout(struct inode *ino,
++		   struct nfs_open_context *ctx,
++		   loff_t pos,
++		   u64 count,
++		   enum pnfs_iomode iomode,
++		   struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs4_pnfs_layout_segment arg = {
++		.iomode = iomode,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_type *lo;
++	struct pnfs_layout_segment *lseg = NULL;
++	bool take_ref = (lsegpp != NULL);
++
++	if (take_ref)
++		*lsegpp = NULL;
++	spin_lock(&ino->i_lock);
++	lo = pnfs_alloc_layout(ino);
++	if (lo == NULL) {
++		dprintk("%s ERROR: can't get pnfs_layout_type\n", __func__);
++		goto out_unlock;
++	}
++
++	/* Check to see if the layout for the given range already exists */
++	lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
++	if (lseg && !lseg->valid) {
++		if (take_ref)
++			put_lseg_locked(lseg);
++		/* someone is cleaning the layout */
++		lseg = NULL;
++		goto out_unlock;
++	}
++
++	if (lseg) {
++		dprintk("%s: Using cached lseg %p for %llu@%llu iomode %d)\n",
++			__func__,
++			lseg,
++			arg.length,
++			arg.offset,
++			arg.iomode);
++
++		goto out_unlock;
++	}
++
++	/* if get layout already failed once goto out */
++	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->pnfs_layout_state)) {
++		if (unlikely(nfsi->pnfs_layout_suspend &&
++		    get_seconds() >= nfsi->pnfs_layout_suspend)) {
++			dprintk("%s: layout_get resumed\n", __func__);
++			clear_bit(lo_fail_bit(iomode),
++				  &nfsi->layout->pnfs_layout_state);
++			nfsi->pnfs_layout_suspend = 0;
++		} else
++			goto out_unlock;
++	}
++
++	/* Reference the layout for layoutget matched in pnfs_layout_release */
++	get_layout(lo);
++	spin_unlock(&ino->i_lock);
++
++	send_layoutget(ino, ctx, &arg, lsegpp, lo);
++out:
++	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
++		nfsi->layout->pnfs_layout_state, lseg);
++	return;
++out_unlock:
++	if (lsegpp)
++		*lsegpp = lseg;
++	spin_unlock(&ino->i_lock);
++	goto out;
++}
++
++void
++pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
++{
++	struct pnfs_layout_segment *lseg = NULL;
++	struct nfs_inode *nfsi = NFS_I(lgp->args.inode);
++	time_t suspend = 0;
++
++	dprintk("-->%s\n", __func__);
++
++	lgp->status = rpc_status;
++	if (likely(!rpc_status)) {
++		if (unlikely(lgp->res.layout.len < 0)) {
++			printk(KERN_ERR
++			       "%s: ERROR Returned layout size is ZERO\n", __func__);
++			lgp->status = -EIO;
++		}
++		goto out;
++	}
++
++	dprintk("%s: ERROR retrieving layout %d\n", __func__, rpc_status);
++	switch (rpc_status) {
++	case -NFS4ERR_BADLAYOUT:
++		lgp->status = -ENOENT;
++		/* FALLTHROUGH */
++	case -EACCES:	/* NFS4ERR_ACCESS */
++		/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
++		goto out;
++
++	case -NFS4ERR_LAYOUTTRYLATER:
++	case -NFS4ERR_RECALLCONFLICT:
++	case -NFS4ERR_OLD_STATEID:
++	case -EAGAIN:	/* NFS4ERR_LOCKED */
++		lgp->status = -NFS4ERR_DELAY;	/* for nfs4_handle_exception */
++		/* FALLTHROUGH */
++	case -NFS4ERR_GRACE:
++	case -NFS4ERR_DELAY:
++		goto out;
++
++	case -NFS4ERR_ADMIN_REVOKED:
++	case -NFS4ERR_DELEG_REVOKED:
++		/* The layout is expected to be returned at this point.
++		 * This should clear the layout stateid as well */
++		suspend = get_seconds() + 1;
++		break;
++
++	case -NFS4ERR_LAYOUTUNAVAILABLE:
++		lgp->status = -ENOTSUPP;
++		break;
++
++	case -NFS4ERR_REP_TOO_BIG:
++	case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
++		lgp->status = -E2BIG;
++		break;
++
++	/* Leave the following errors untranslated */
++	case -NFS4ERR_DEADSESSION:
++	case -NFS4ERR_DQUOT:
++	case -EINVAL:		/* NFS4ERR_INVAL */
++	case -EIO:		/* NFS4ERR_IO */
++	case -NFS4ERR_FHEXPIRED:
++	case -NFS4ERR_MOVED:
++	case -NFS4ERR_NOSPC:
++	case -ESERVERFAULT:	/* NFS4ERR_SERVERFAULT */
++	case -ESTALE:		/* NFS4ERR_STALE */
++	case -ETOOSMALL:	/* NFS4ERR_TOOSMALL */
++		break;
++
++	/* The following errors are our fault and should never happen */
++	case -NFS4ERR_BADIOMODE:
++	case -NFS4ERR_BADXDR:
++	case -NFS4ERR_REQ_TOO_BIG:
++	case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
++	case -NFS4ERR_WRONG_TYPE:
++		lgp->status = -EINVAL;
++		/* FALLTHROUGH */
++	case -NFS4ERR_BAD_STATEID:
++	case -NFS4ERR_NOFILEHANDLE:
++	case -ENOTSUPP:	/* NFS4ERR_NOTSUPP */
++	case -NFS4ERR_OPENMODE:
++	case -NFS4ERR_OP_NOT_IN_SESSION:
++	case -NFS4ERR_TOO_MANY_OPS:
++		dprintk("%s: error %d: should never happen\n", __func__,
++			rpc_status);
++		break;
++
++	/* The following errors are the server's fault */
++	default:
++		dprintk("%s: illegal error %d\n", __func__, rpc_status);
++		lgp->status = -EIO;
++		break;
++	}
++
++	/* remember that get layout failed and suspend trying */
++	nfsi->pnfs_layout_suspend = suspend;
++	set_bit(lo_fail_bit(lgp->args.lseg.iomode),
++		&nfsi->layout->pnfs_layout_state);
++	dprintk("%s: layout_get suspended until %ld\n",
++		__func__, suspend);
++out:
++	dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
++		__func__, lgp->status, nfsi->layout->pnfs_layout_state, lseg);
++	return;
++}
++
++int
++pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct pnfs_layout_type *lo = NFS_I(lgp->args.inode)->layout;
++	struct nfs4_pnfs_layoutget_res *res = &lgp->res;
++	struct pnfs_layout_segment *lseg;
++	struct inode *ino = PNFS_INODE(lo);
++	int status = 0;
++
++	/* Inject layout blob into I/O device driver */
++	lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, res);
++	if (!lseg || IS_ERR(lseg)) {
++		if (!lseg)
++			status = -ENOMEM;
++		else
++			status = PTR_ERR(lseg);
++		dprintk("%s: Could not allocate layout: error %d\n",
++		       __func__, status);
++		goto out;
++	}
++
++	spin_lock(&ino->i_lock);
++	init_lseg(lo, lseg);
++	lseg->range = res->lseg;
++	if (lgp->lsegpp) {
++		get_lseg(lseg);
++		*lgp->lsegpp = lseg;
++	}
++	pnfs_insert_layout(lo, lseg);
++
++	if (res->return_on_close) {
++		lo->roc_iomode |= res->lseg.iomode;
++		if (!lo->roc_iomode)
++			lo->roc_iomode = IOMODE_ANY;
++	}
++
++	/* Done processing layoutget. Set the layout stateid */
++	pnfs_set_layout_stateid(lo, &res->stateid);
++	spin_unlock(&ino->i_lock);
++out:
++	return status;
++}
++
++void
++readahead_range(struct inode *inode, struct list_head *pages, loff_t *offset,
++		size_t *count)
++{
++	struct page *first, *last;
++	loff_t foff, i_size = i_size_read(inode);
++	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
++	size_t range;
++
++
++	first = list_entry((pages)->prev, struct page, lru);
++	last = list_entry((pages)->next, struct page, lru);
++
++	foff = (loff_t)first->index << PAGE_CACHE_SHIFT;
++
++	range = (last->index - first->index) * PAGE_CACHE_SIZE;
++	if (last->index == end_index)
++		range += ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
++	else
++		range += PAGE_CACHE_SIZE;
++	dprintk("%s foff %lu, range %Zu\n", __func__, (unsigned long)foff,
++		range);
++	*offset = foff;
++	*count = range;
++}
++
++void
++pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)
++{
++	struct pnfs_layout_type *laytype;
++	struct pnfs_layoutdriver_type *ld;
++
++	pgio->pg_test = NULL;
++
++	laytype = NFS_I(inode)->layout;
++	ld = NFS_SERVER(inode)->pnfs_curr_ld;
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !laytype)
++		return;
++
++	if (ld->ld_policy_ops)
++		pgio->pg_test = ld->ld_policy_ops->pg_test;
++}
++
++static u32
++pnfs_getboundary(struct inode *inode)
++{
++	u32 stripe_size = 0;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct layoutdriver_policy_operations *policy_ops;
++
++	if (!nfss->pnfs_curr_ld)
++		goto out;
++
++	policy_ops = nfss->pnfs_curr_ld->ld_policy_ops;
++	if (!policy_ops || !policy_ops->get_stripesize)
++		goto out;
++
++	/* The default is to not gather across stripes */
++	if (pnfs_ld_gather_across_stripes(nfss->pnfs_curr_ld))
++		goto out;
++
++	spin_lock(&inode->i_lock);
++	if (NFS_I(inode)->layout)
++		stripe_size = policy_ops->get_stripesize(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++out:
++	return stripe_size;
++}
++
++/*
++ * rsize is already set by caller to MDS rsize.
++ */
++void
++pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
++		  struct inode *inode,
++		  struct nfs_open_context *ctx,
++		  struct list_head *pages,
++		  size_t *rsize)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	size_t count = 0;
++	loff_t loff;
++
++	pgio->pg_iswrite = 0;
++	pgio->pg_boundary = 0;
++	pgio->pg_test = NULL;
++	pgio->pg_lseg = NULL;
++
++	if (!pnfs_enabled_sb(nfss))
++		return;
++
++	/* Calculate the total read-ahead count */
++	readahead_range(inode, pages, &loff, &count);
++
++	if (count > 0) {
++		_pnfs_update_layout(inode, ctx, loff, count, IOMODE_READ,
++				    &pgio->pg_lseg);
++		if (!pgio->pg_lseg)
++			return;
++
++		*rsize = NFS_SERVER(inode)->ds_rsize;
++		pgio->pg_boundary = pnfs_getboundary(inode);
++		if (pgio->pg_boundary)
++			pnfs_set_pg_test(inode, pgio);
++	}
++}
++
++void
++pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
++		       size_t *wsize)
++{
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	pgio->pg_iswrite = 1;
++	if (!pnfs_enabled_sb(server)) {
++		pgio->pg_boundary = 0;
++		pgio->pg_test = NULL;
++		return;
++	}
++	pgio->pg_boundary = pnfs_getboundary(inode);
++	pnfs_set_pg_test(inode, pgio);
++	*wsize = server->ds_wsize;
++}
++
++/* Return I/O buffer size for a layout driver
++ * This value will determine what size reads and writes
++ * will be gathered into and sent to the data servers.
++ * blocksize must be a multiple of the page cache size.
++ */
++unsigned int
++pnfs_getiosize(struct nfs_server *server)
++{
++	if (!PNFS_EXISTS_LDPOLICY_OP(server, get_blocksize))
++		return 0;
++	return server->pnfs_curr_ld->ld_policy_ops->get_blocksize();
++}
++
++void
++pnfs_set_ds_iosize(struct nfs_server *server)
++{
++	unsigned dssize = pnfs_getiosize(server);
++
++	/* Set buffer size for data servers */
++	if (dssize > 0) {
++		server->ds_rsize = server->ds_wsize =
++			nfs_block_size(dssize, NULL);
++	} else {
++		server->ds_wsize = server->wsize;
++		server->ds_rsize = server->rsize;
++	}
++}
++
++static int
++pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
++{
++	put_lseg(pdata->lseg);
++	pdata->lseg = NULL;
++	pdata->call_ops->rpc_call_done(task, data);
++	if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
++		return -EAGAIN;
++	if (pdata->pnfsflags & PNFS_NO_RPC) {
++		pdata->call_ops->rpc_release(data);
++	} else {
++		/*
++		 * just restore original rpc call ops
++		 * rpc_release will be called later by the rpc scheduling layer.
++		 */
++		task->tk_ops = pdata->call_ops;
++	}
++	return 0;
++}
++
++/* Post-write completion function
++ * Invoked by all layout drivers when write_pagelist is done.
++ *
++ * NOTE: callers set data->pnfsflags PNFS_NO_RPC
++ * so that the NFS cleanup routines perform only the page cache
++ * cleanup.
++ */
++static void
++pnfs_write_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	struct nfs4_pnfs_layout_segment range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = wdata->args.offset;
++	range.length = wdata->args.count;
++	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
++			    wdata->pdata.call_ops, wdata->pdata.how);
++}
++
++static void
++pnfs_writeback_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	/* update last write offset and need layout commit
++	 * for non-files layout types (files layout calls
++	 * pnfs4_write_done for this)
++	 */
++	if ((pdata->pnfsflags & PNFS_NO_RPC) &&
++	    data->task.tk_status >= 0 && data->res.count > 0) {
++		struct nfs_inode *nfsi = NFS_I(data->inode);
++
++		pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++		pnfs_need_layoutcommit(nfsi, data->args.context);
++	}
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++static void _pnfs_clear_lseg_from_pages(struct list_head *head)
++{
++	struct nfs_page *req;
++
++	list_for_each_entry(req, head, wb_list) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem write function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ *
++ * TODO: Is wdata->how and wdata->args.stable always the same value?
++ * TODO: It seems in NFS, the server may not do a stable write even
++ * though it was requested (and vice-versa?).  To check, it looks
++ * in data->res.verf->committed.  Do we need this ability
++ * for non-file layout drivers?
++ */
++enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *wdata,
++			const struct rpc_call_ops *call_ops, int how)
++{
++	struct inode *inode = wdata->inode;
++	enum pnfs_try_status trypnfs;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = wdata->req->wb_lseg;
++
++	wdata->pdata.call_ops = call_ops;
++	wdata->pdata.pnfs_error = 0;
++	wdata->pdata.how = how;
++
++	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
++		inode->i_ino, wdata->args.count, wdata->args.offset, how);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		wdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	wdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->write_pagelist(wdata,
++		nfs_page_array_len(wdata->args.pgbase, wdata->args.count),
++								how);
++
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		wdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		wdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&wdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/* Post-read completion function.  Invoked by all layout drivers when
++ * read_pagelist is done
++ */
++static void
++pnfs_read_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	struct nfs4_pnfs_layout_segment range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = rdata->args.offset;
++	range.length = rdata->args.count;
++	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
++			   rdata->pdata.call_ops);
++}
++
++static void
++pnfs_read_done(struct nfs_read_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem read function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ */
++enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *rdata,
++		       const struct rpc_call_ops *call_ops)
++{
++	struct inode *inode = rdata->inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = rdata->req->wb_lseg;
++	enum pnfs_try_status trypnfs;
++
++	rdata->pdata.call_ops = call_ops;
++	rdata->pdata.pnfs_error = 0;
++
++	dprintk("%s: Reading ino:%lu %u@%llu\n",
++		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		rdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	rdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->read_pagelist(rdata,
++		nfs_page_array_len(rdata->args.pgbase, rdata->args.count));
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		rdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		rdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&rdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/*
++ * This gives the layout driver an opportunity to read in page "around"
++ * the data to be written.  It returns 0 on success, otherwise an error code
++ * which will either be passed up to user, or ignored if
++ * some previous part of write succeeded.
++ * Note the range [pos, pos+len-1] is entirely within the page.
++ */
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata)
++{
++	struct pnfs_fsdata *data;
++	int status = 0;
++
++	dprintk("--> %s: pos=%llu len=%u\n",
++		__func__, (unsigned long long)pos, len);
++	data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
++	if (!data) {
++		status = -ENOMEM;
++		goto out;
++	}
++	data->lseg = lseg; /* refcount passed into data to be managed there */
++	status = NFS_SERVER(inode)->pnfs_curr_ld->ld_io_ops->write_begin(
++						lseg, page, pos, len, data);
++	if (status) {
++		kfree(data);
++		data = NULL;
++	}
++out:
++	*fsdata = data;
++	dprintk("<-- %s: status=%d\n", __func__, status);
++	return status;
++}
++
++/* Return 0 on succes, negative on failure */
++/* CAREFUL - what happens if copied < len??? */
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status;
++
++	status = nfss->pnfs_curr_ld->ld_io_ops->write_end(inode, page,
++						pos, len, copied, lseg);
++	return status;
++}
++
++/* pNFS Commit callback function for all layout drivers */
++static void
++pnfs_commit_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		struct nfs4_pnfs_layout_segment range = {
++			.iomode = IOMODE_RW,
++			.offset = data->args.offset,
++			.length = data->args.count,
++		};
++		dprintk("%s: retrying\n", __func__);
++		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
++				    true);
++		pnfs_initiate_commit(data, NFS_CLIENT(data->inode),
++				     pdata->call_ops, pdata->how, 1);
++	}
++}
++
++enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		    const struct rpc_call_ops *call_ops, int sync)
++{
++	struct inode *inode = data->inode;
++	struct nfs_server *nfss = NFS_SERVER(data->inode);
++	enum pnfs_try_status trypnfs;
++
++	dprintk("%s: Begin\n", __func__);
++
++	/* We need to account for possibility that
++	 * each nfs_page can point to a different lseg (or be NULL).
++	 * For the immediate case of whole-file-only layouts, we at
++	 * least know there can be only a single lseg.
++	 * We still have to account for the possibility of some being NULL.
++	 * This will be done by passing the buck to the layout driver.
++	 */
++	data->pdata.call_ops = call_ops;
++	data->pdata.pnfs_error = 0;
++	data->pdata.how = sync;
++	data->pdata.lseg = NULL;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(data, sync);
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		data->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		_pnfs_clear_lseg_from_pages(&data->pages);
++	} else
++		nfs_inc_stats(inode, NFSIOS_PNFS_COMMIT);
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++void pnfs_cleanup_layoutcommit(struct pnfs_layoutcommit_data *data)
++{
++	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
++
++	/* TODO: Maybe we should avoid this by allowing the layout driver
++	* to directly xdr its layout on the wire.
++	*/
++	if (nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit)
++		nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit(
++					NFS_I(data->args.inode)->layout,
++					&data->args, data->status);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int
++pnfs_layoutcommit_setup(struct inode *inode,
++			struct pnfs_layoutcommit_data *data,
++			loff_t write_begin_pos, loff_t write_end_pos)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int result = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	data->args.inode = inode;
++	data->args.fh = NFS_FH(inode);
++	data->args.layout_type = nfss->pnfs_curr_ld->id;
++	data->res.fattr = &data->fattr;
++	nfs_fattr_init(&data->fattr);
++
++	/* TODO: Need to determine the correct values */
++	data->args.time_modify_changed = 0;
++
++	/* Set values from inode so it can be reset
++	 */
++	data->args.lseg.iomode = IOMODE_RW;
++	data->args.lseg.offset = write_begin_pos;
++	data->args.lseg.length = write_end_pos - write_begin_pos + 1;
++	data->args.lastbytewritten =  min(write_end_pos,
++					  i_size_read(inode) - 1);
++	data->args.bitmask = nfss->attr_bitmask;
++	data->res.server = nfss;
++
++	/* Call layout driver to set the arguments */
++	if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit)
++		result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
++				NFS_I(inode)->layout, &data->args);
++
++	dprintk("<-- %s Status %d\n", __func__, result);
++	return result;
++}
++
++/* Issue a async layoutcommit for an inode.
++ */
++int
++pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	struct pnfs_layoutcommit_data *data;
++	struct nfs_inode *nfsi = NFS_I(inode);
++	loff_t write_begin_pos;
++	loff_t write_end_pos;
++
++	int status = 0;
++
++	dprintk("%s Begin (sync:%d)\n", __func__, sync);
++
++	BUG_ON(!has_layout(nfsi));
++
++	data = pnfs_layoutcommit_alloc();
++	if (!data)
++		return -ENOMEM;
++
++	spin_lock(&inode->i_lock);
++	if (!layoutcommit_needed(nfsi)) {
++		spin_unlock(&inode->i_lock);
++		goto out_free;
++	}
++
++	/* Clear layoutcommit properties in the inode so
++	 * new lc info can be generated
++	 */
++	write_begin_pos = nfsi->layout->pnfs_write_begin_pos;
++	write_end_pos = nfsi->layout->pnfs_write_end_pos;
++	data->cred = nfsi->layout->lo_cred;
++	nfsi->layout->pnfs_write_begin_pos = 0;
++	nfsi->layout->pnfs_write_end_pos = 0;
++	nfsi->layout->lo_cred = NULL;
++	__clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state);
++	pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout);
++
++	/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
++	get_layout(NFS_I(inode)->layout);
++
++	spin_unlock(&inode->i_lock);
++
++	/* Set up layout commit args */
++	status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
++					 write_end_pos);
++	if (status) {
++		/* The layout driver failed to setup the layoutcommit */
++		put_rpccred(data->cred);
++		put_layout(inode);
++		goto out_free;
++	}
++	status = pnfs4_proc_layoutcommit(data, sync);
++out:
++	dprintk("%s end (err:%d)\n", __func__, status);
++	return status;
++out_free:
++	pnfs_layoutcommit_free(data);
++	goto out;
++}
++
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
++{
++	if (fsdata) {
++		/* lseg refcounting handled directly in nfs_Write_end */
++		kfree(fsdata);
++	}
++}
++
++/* Callback operations for layout drivers.
++ */
++struct pnfs_client_operations pnfs_ops = {
++	.nfs_getdevicelist = nfs4_pnfs_getdevicelist,
++	.nfs_getdeviceinfo = nfs4_pnfs_getdeviceinfo,
++	.nfs_readlist_complete = pnfs_read_done,
++	.nfs_writelist_complete = pnfs_writeback_done,
++	.nfs_commit_complete = pnfs_commit_done,
++};
++
++EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
++EXPORT_SYMBOL(pnfs_register_layoutdriver);
++
++
++/* Device ID cache. Supports one layout type per struct nfs_client */
++int
++nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
++			 void (*free_callback)(struct kref *))
++{
++	struct nfs4_deviceid_cache *c;
++
++	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
++	if (!c)
++		return -ENOMEM;
++	spin_lock(&clp->cl_lock);
++	if (clp->cl_devid_cache != NULL) {
++		kref_get(&clp->cl_devid_cache->dc_kref);
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [kref [%d]]\n", __func__,
++			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
++		kfree(c);
++	} else {
++		int i;
++
++		spin_lock_init(&c->dc_lock);
++		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
++			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
++		kref_init(&c->dc_kref);
++		c->dc_free_callback = free_callback;
++		clp->cl_devid_cache = c;
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [new]\n", __func__);
++	}
++	return 0;
++}
++EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
++
++void
++nfs4_init_deviceid_node(struct nfs4_deviceid *d)
++{
++	INIT_HLIST_NODE(&d->de_node);
++	kref_init(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_init_deviceid_node);
++
++/* Called from layoutdriver_io_operations->alloc_lseg */
++void
++nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = d;
++	kref_get(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_set_layout_deviceid);
++
++/* Called from layoutdriver_io_operations->free_lseg */
++void
++nfs4_unset_layout_deviceid(struct pnfs_layout_segment *l,
++			   struct nfs4_deviceid *d,
++			   void (*free_callback)(struct kref *))
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = NULL;
++	kref_put(&d->de_kref, free_callback);
++}
++EXPORT_SYMBOL(nfs4_unset_layout_deviceid);
++
++struct nfs4_deviceid *
++nfs4_find_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			rcu_read_unlock();
++			return d;
++		}
++	}
++	rcu_read_unlock();
++	return NULL;
++}
++EXPORT_SYMBOL(nfs4_find_deviceid);
++
++/*
++ * Add or kref_get a deviceid.
++ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
++ */
++struct nfs4_deviceid *
++nfs4_add_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(&new->de_id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			spin_unlock(&c->dc_lock);
++			dprintk("%s [discard]\n", __func__);
++			c->dc_free_callback(&new->de_kref);
++			return d;
++		}
++	}
++	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
++	spin_unlock(&c->dc_lock);
++	dprintk("%s [new]\n", __func__);
++	return new;
++}
++EXPORT_SYMBOL(nfs4_add_deviceid);
++
++static int
++nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash,
++		     struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (id && memcmp(id, &d->de_id, NFS4_PNFS_DEVICEID4_SIZE))
++			continue;
++		hlist_del_rcu(&d->de_node);
++		spin_unlock(&c->dc_lock);
++		synchronize_rcu();
++		dprintk("%s [%d]\n", __func__,
++			atomic_read(&d->de_kref.refcount));
++		kref_put(&d->de_kref, c->dc_free_callback);
++		return 1;
++	}
++	spin_unlock(&c->dc_lock);
++	return 0;
++}
++
++void
++nfs4_delete_device(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	long hash = nfs4_deviceid_hash(id);
++
++	nfs4_remove_deviceid(c, hash, id);
++}
++EXPORT_SYMBOL(nfs4_delete_device);
++
++static void
++nfs4_free_deviceid_cache(struct kref *kref)
++{
++	struct nfs4_deviceid_cache *cache =
++		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
++	long i;
++
++	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
++		while (nfs4_remove_deviceid(cache, i, NULL))
++			;
++	kfree(cache);
++}
++
++void
++nfs4_put_deviceid_cache(struct nfs_client *clp)
++{
++	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
++	int refcount;
++
++	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
++	spin_lock(&clp->cl_lock);
++	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
++	if (refcount == 1)
++		clp->cl_devid_cache = NULL;
++	spin_unlock(&clp->cl_lock);
++	dprintk("%s [%d]\n", __func__, refcount);
++	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
++}
++EXPORT_SYMBOL(nfs4_put_deviceid_cache);
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
+@@ -0,0 +1,355 @@
++/*
++ *  fs/nfs/pnfs.h
++ *
++ *  pNFS client data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_PNFS_H
++#define FS_NFS_PNFS_H
++
++#include <linux/nfs4_pnfs.h>
++
++#ifdef CONFIG_NFS_V4_1
++
++#include <linux/nfs_page.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs_iostat.h>
++#include "iostat.h"
++
++/* nfs4proc.c */
++extern int nfs4_pnfs_getdevicelist(struct nfs_server *server,
++				   const struct nfs_fh *fh,
++				   struct pnfs_devicelist *devlist);
++extern int nfs4_pnfs_getdeviceinfo(struct nfs_server *server,
++				   struct pnfs_device *dev);
++extern int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp);
++extern int pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data,
++				   int issync);
++extern int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool wait);
++
++/* pnfs.c */
++extern const nfs4_stateid zero_stateid;
++
++void _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp);
++
++int _pnfs_return_layout(struct inode *, struct nfs4_pnfs_layout_segment *,
++			const nfs4_stateid *stateid, /* optional */
++			enum pnfs_layoutreturn_type, bool wait);
++void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *mntfh, u32 id);
++void unmount_pnfs_layoutdriver(struct nfs_server *);
++enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
++					     const struct rpc_call_ops *, int);
++enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
++					    const struct rpc_call_ops *);
++int pnfs_initialize(void);
++void pnfs_uninitialize(void);
++void pnfs_layoutcommit_free(struct pnfs_layoutcommit_data *data);
++void pnfs_cleanup_layoutcommit(struct pnfs_layoutcommit_data *data);
++int pnfs_layoutcommit_inode(struct inode *inode, int sync);
++void pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent);
++void pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx);
++unsigned int pnfs_getiosize(struct nfs_server *server);
++void pnfs_set_ds_iosize(struct nfs_server *server);
++enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
++					 const struct rpc_call_ops *, int);
++void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
++			   struct nfs_open_context *, struct list_head *,
++			   size_t *);
++void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
++			    size_t *);
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
++void pnfs_get_layout_done(struct nfs4_pnfs_layoutget *, int rpc_status);
++int pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp);
++void pnfs_layout_release(struct pnfs_layout_type *, struct nfs4_pnfs_layout_segment *range);
++void pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
++			     const nfs4_stateid *stateid);
++void pnfs_destroy_layout(struct nfs_inode *);
++void pnfs_destroy_all_layouts(struct nfs_client *);
++void put_layout(struct inode *inode);
++void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo);
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata);
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg);
++
++#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops->opname)
++#define PNFS_EXISTS_LDPOLICY_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_policy_ops && \
++				     (srv)->pnfs_curr_ld->ld_policy_ops->opname)
++
++#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
++
++static inline int lo_fail_bit(u32 iomode)
++{
++	return iomode == IOMODE_RW ?
++			 NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED;
++}
++
++/* Return true if a layout driver is being used for this mountpoint */
++static inline int pnfs_enabled_sb(struct nfs_server *nfss)
++{
++	return nfss->pnfs_curr_ld != NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return !fsdata  || ((struct pnfs_layout_segment *)fsdata == lseg) ||
++		!fsdata->bypass_eof;
++}
++
++/* Should the pNFS client commit and return the layout upon a setattr */
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
++		return false;
++	return NFS_SERVER(inode)->pnfs_curr_ld->ld_policy_ops->flags &
++		PNFS_LAYOUTRET_ON_SETATTR;
++}
++
++/* Should the pNFS client commit and return the layout on close
++ */
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return nfsi->layout->roc_iomode;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status = 0;
++
++	*fsdata = lseg;
++	if (lseg && PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++		status = _pnfs_write_begin(inode, page, pos, len, lseg,
++					   (struct pnfs_fsdata **) fsdata);
++	return status;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++
++	if (PNFS_EXISTS_LDIO_OP(nfss, write_end))
++		return _pnfs_write_end(inode, page, pos, len, copied, lseg);
++	else
++		return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_end_cleanup))
++			nfss->pnfs_curr_ld->ld_io_ops->write_end_cleanup(filp, fsdata);
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			pnfs_free_fsdata(fsdata);
++	}
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct nfs4_pnfs_layout_segment *lseg,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss) &&
++	    (type != RETURN_FILE || has_layout(nfsi)))
++		return _pnfs_return_layout(ino, lseg, stateid, type, wait);
++
++	return 0;
++}
++
++static inline void pnfs_update_layout(struct inode *ino,
++	struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss))
++		_pnfs_update_layout(ino, ctx, pos, count, access_type, lsegpp);
++	else {
++		if (lsegpp)
++			*lsegpp = NULL;
++	}
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	if (pnfs_enabled_sb(nfss))
++		return pnfs_ld_use_rpc_code(nfss->pnfs_curr_ld);
++
++	return 1;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			return ((struct pnfs_fsdata *) fsdata)->lseg;
++	}
++	return fsdata;
++}
++#else  /* CONFIG_NFS_V4_1 */
++
++static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++}
++
++static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++}
++
++static inline void get_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void put_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void
++pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	if (lsegpp)
++		*lsegpp = NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return 1;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *data,
++		      const struct rpc_call_ops *call_ops)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *data,
++		       const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		   const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	*fsdata = NULL;
++	return 0;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	return 1;
++}
++
++static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	return 0;
++}
++
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	return false;
++}
++
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct nfs4_pnfs_layout_segment *lseg,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	return 0;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	return NULL;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++#endif /* FS_NFS_PNFS_H */
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
+@@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	fattr = nfs_alloc_fattr();
+ 	status = -ENOMEM;
+ 	if (fh == NULL || fattr == NULL)
+-		goto out;
++		goto out_free;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -455,7 +455,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 */
+ 	if (status == 0)
+ 		status = nfs_instantiate(dentry, fh, fattr);
+-
++out_free:
+ 	nfs_free_fattr(fattr);
+ 	nfs_free_fhandle(fh);
+ out:
+@@ -694,6 +694,7 @@ const struct nfs_rpc_ops nfs_v2_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs_dir_inode_operations,
+ 	.file_inode_ops	= &nfs_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs_proc_get_root,
+ 	.getattr	= nfs_proc_getattr,
+ 	.setattr	= nfs_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
+@@ -18,8 +18,12 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_page.h>
++#include <linux/smp_lock.h>
++#include <linux/module.h>
+ 
+ #include <asm/system.h>
++#include <linux/module.h>
++#include "pnfs.h"
+ 
+ #include "nfs4_fs.h"
+ #include "internal.h"
+@@ -117,11 +121,14 @@ int nfs_readpage_async(struct nfs_open_c
+ 	LIST_HEAD(one_request);
+ 	struct nfs_page	*new;
+ 	unsigned int len;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	len = nfs_page_length(page);
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+-	new = nfs_create_request(ctx, inode, page, 0, len);
++	pnfs_update_layout(inode, ctx, 0, NFS4_MAX_UINT64, IOMODE_READ, &lseg);
++	new = nfs_create_request(ctx, inode, page, 0, len, lseg);
++	put_lseg(lseg);
+ 	if (IS_ERR(new)) {
+ 		unlock_page(page);
+ 		return PTR_ERR(new);
+@@ -155,24 +162,20 @@ static void nfs_readpage_release(struct 
+ 	nfs_release_request(req);
+ }
+ 
+-/*
+- * Set up the NFS read request struct
+- */
+-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset)
++int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		      const struct rpc_call_ops *call_ops)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+ 		.callback_data = data,
+@@ -180,9 +183,46 @@ static int nfs_read_rpcsetup(struct nfs_
+ 		.flags = RPC_TASK_ASYNC | swap_flags,
+ 	};
+ 
++	/* Set up the initial task struct. */
++	NFS_PROTO(inode)->read_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
++			data->task.tk_pid,
++			inode->i_sb->s_id,
++			(long long)NFS_FILEID(inode),
++			data->args.count,
++			(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_read);
++
++int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
++		return pnfs_get_read_status(data);
++
++	return nfs_initiate_read(data, clnt, call_ops);
++}
++
++/*
++ * Set up the NFS read request struct
++ */
++static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	data->req	  = req;
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -190,27 +230,14 @@ static int nfs_read_rpcsetup(struct nfs_
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.count   = count;
+ 	data->res.eof     = 0;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct. */
+-	NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-			data->task.tk_pid,
+-			inode->i_sb->s_id,
+-			(long long)NFS_FILEID(inode),
+-			count,
+-			(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	return pnfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ }
+ 
+ static void
+@@ -354,7 +381,14 @@ static void nfs_readpage_retry(struct rp
+ {
+ 	struct nfs_readargs *argp = &data->args;
+ 	struct nfs_readres *resp = &data->res;
++	struct nfs_client *clp = NFS_SERVER(data->inode)->nfs_client;
+ 
++#ifdef CONFIG_NFS_V4_1
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (resp->eof || resp->count == argp->count)
+ 		return;
+ 
+@@ -368,7 +402,10 @@ static void nfs_readpage_retry(struct rp
+ 	argp->offset += resp->count;
+ 	argp->pgbase += resp->count;
+ 	argp->count -= resp->count;
+-	nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++	data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++	nfs_restart_rpc(task, clp);
+ }
+ 
+ /*
+@@ -409,13 +446,19 @@ static void nfs_readpage_release_partial
+ void nfs_read_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_read_data *data = calldata;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	}
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
+ 				&data->args.seq_args, &data->res.seq_res,
+ 				0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_read_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_read_partial_ops = {
+@@ -568,7 +611,8 @@ readpage_async_filler(void *data, struct
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+ 
+-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
++	new = nfs_create_request(desc->ctx, inode, page, 0, len,
++				 desc->pgio->pg_lseg);
+ 	if (IS_ERR(new))
+ 		goto out_error;
+ 
+@@ -624,6 +668,9 @@ int nfs_readpages(struct file *filp, str
+ 	if (ret == 0)
+ 		goto read_complete; /* all pages were read */
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_read(&pgio, inode, desc.ctx, pages, &rsize);
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (rsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
+ 	else
+@@ -632,6 +679,7 @@ int nfs_readpages(struct file *filp, str
+ 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ 
+ 	nfs_pageio_complete(&pgio);
++	put_lseg(pgio.pg_lseg);
+ 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+ read_complete:
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
+@@ -64,6 +64,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -669,6 +670,28 @@ static int nfs_show_options(struct seq_f
+ 
+ 	return 0;
+ }
++#ifdef CONFIG_NFS_V4_1
++void show_sessions(struct seq_file *m, struct nfs_server *server)
++{
++	if (nfs4_has_session(server->nfs_client))
++		seq_printf(m, ",sessions");
++}
++#else
++void show_sessions(struct seq_file *m, struct nfs_server *server) {}
++#endif
++
++#ifdef CONFIG_NFS_V4_1
++void show_pnfs(struct seq_file *m, struct nfs_server *server)
++{
++	seq_printf(m, ",pnfs=");
++	if (server->pnfs_curr_ld)
++		seq_printf(m, "%s", server->pnfs_curr_ld->name);
++	else
++		seq_printf(m, "not configured");
++}
++#else  /* CONFIG_NFS_V4_1 */
++void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ /*
+  * Present statistical information for this VFS mountpoint
+@@ -707,6 +730,8 @@ static int nfs_show_stats(struct seq_fil
+ 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ 		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
++		show_sessions(m, nfss);
++		show_pnfs(m, nfss);
+ 	}
+ #endif
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
+@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
+ 	struct nfs_unlinkdata *data = calldata;
+ 	struct nfs_server *server = NFS_SERVER(data->dir);
+ 
+-	if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
++	if (nfs4_setup_sequence(server, NULL, &data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ #include <linux/nfs_page.h>
+ #include <linux/backing-dev.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -28,6 +29,7 @@
+ #include "iostat.h"
+ #include "nfs4_fs.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+ 
+@@ -59,6 +61,7 @@ struct nfs_write_data *nfs_commitdata_al
+ 	}
+ 	return p;
+ }
++EXPORT_SYMBOL(nfs_commitdata_alloc);
+ 
+ void nfs_commit_free(struct nfs_write_data *p)
+ {
+@@ -66,6 +69,7 @@ void nfs_commit_free(struct nfs_write_da
+ 		kfree(p->pagevec);
+ 	mempool_free(p, nfs_commit_mempool);
+ }
++EXPORT_SYMBOL(nfs_commit_free);
+ 
+ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+ {
+@@ -418,6 +422,17 @@ static void nfs_inode_remove_request(str
+ 	nfs_clear_request(req);
+ 	nfs_release_request(req);
+ }
++static void
++nfs_mark_request_nopnfs(struct nfs_page *req)
++{
++	struct pnfs_layout_segment *lseg = req->wb_lseg;
++
++	if (req->wb_lseg == NULL)
++		return;
++	req->wb_lseg = NULL;
++	put_lseg(lseg);
++	dprintk(" retry through MDS\n");
++}
+ 
+ static void
+ nfs_mark_request_dirty(struct nfs_page *req)
+@@ -523,7 +538,7 @@ nfs_need_commit(struct nfs_inode *nfsi)
+  * The requests are *not* checked to ensure that they form a contiguous set.
+  */
+ static int
+-nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
++nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int *use_pnfs)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	int ret;
+@@ -531,7 +546,8 @@ nfs_scan_commit(struct inode *inode, str
+ 	if (!nfs_need_commit(nfsi))
+ 		return 0;
+ 
+-	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
++	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT,
++			    use_pnfs);
+ 	if (ret > 0)
+ 		nfsi->ncommit -= ret;
+ 	if (nfs_need_commit(NFS_I(inode)))
+@@ -560,7 +576,8 @@ static inline int nfs_scan_commit(struct
+ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+ 		struct page *page,
+ 		unsigned int offset,
+-		unsigned int bytes)
++		unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page *req;
+ 	unsigned int rqend;
+@@ -585,8 +602,8 @@ static struct nfs_page *nfs_try_to_updat
+ 		 * Note: nfs_flush_incompatible() will already
+ 		 * have flushed out requests having wrong owners.
+ 		 */
+-		if (offset > rqend
+-		    || end < req->wb_offset)
++		if (offset > rqend || end < req->wb_offset ||
++		    req->wb_lseg != lseg)
+ 			goto out_flushme;
+ 
+ 		if (nfs_set_page_tag_locked(req))
+@@ -634,16 +651,17 @@ out_err:
+  * already called nfs_flush_incompatible() if necessary.
+  */
+ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+-		struct page *page, unsigned int offset, unsigned int bytes)
++		struct page *page, unsigned int offset, unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	struct nfs_page	*req;
+ 	int error;
+ 
+-	req = nfs_try_to_update_request(inode, page, offset, bytes);
++	req = nfs_try_to_update_request(inode, page, offset, bytes, lseg);
+ 	if (req != NULL)
+ 		goto out;
+-	req = nfs_create_request(ctx, inode, page, offset, bytes);
++	req = nfs_create_request(ctx, inode, page, offset, bytes, lseg);
+ 	if (IS_ERR(req))
+ 		goto out;
+ 	error = nfs_inode_add_request(inode, req);
+@@ -656,23 +674,27 @@ out:
+ }
+ 
+ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+-		unsigned int offset, unsigned int count)
++			       unsigned int offset, unsigned int count,
++			       struct pnfs_layout_segment *lseg,
++			       void *fsdata)
+ {
+ 	struct nfs_page	*req;
+ 
+-	req = nfs_setup_write_request(ctx, page, offset, count);
++	req = nfs_setup_write_request(ctx, page, offset, count, lseg);
+ 	if (IS_ERR(req))
+ 		return PTR_ERR(req);
+ 	nfs_mark_request_dirty(req);
+ 	/* Update file length */
+-	nfs_grow_file(page, offset, count);
++	if (pnfs_grow_ok(lseg, fsdata))
++		nfs_grow_file(page, offset, count);
+ 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	return 0;
+ }
+ 
+-int nfs_flush_incompatible(struct file *file, struct page *page)
++int nfs_flush_incompatible(struct file *file, struct page *page,
++			   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct nfs_page	*req;
+@@ -689,7 +711,10 @@ int nfs_flush_incompatible(struct file *
+ 		req = nfs_page_find_request(page);
+ 		if (req == NULL)
+ 			return 0;
+-		do_flush = req->wb_page != page || req->wb_context != ctx;
++		do_flush = req->wb_page != page || req->wb_context != ctx ||
++			req->wb_lock_context->lockowner != current->files ||
++			req->wb_lock_context->pid != current->tgid ||
++			req->wb_lseg != lseg;
+ 		nfs_release_request(req);
+ 		if (!do_flush)
+ 			return 0;
+@@ -716,7 +741,8 @@ static int nfs_write_pageuptodate(struct
+  * things with a page scheduled for an RPC call (e.g. invalidate it).
+  */
+ int nfs_updatepage(struct file *file, struct page *page,
+-		unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg, void *fsdata)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct inode	*inode = page->mapping->host;
+@@ -741,7 +767,7 @@ int nfs_updatepage(struct file *file, st
+ 		offset = 0;
+ 	}
+ 
+-	status = nfs_writepage_setup(ctx, page, offset, count);
++	status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata);
+ 	if (status < 0)
+ 		nfs_set_pageerror(page);
+ 
+@@ -771,25 +797,21 @@ static int flush_task_priority(int how)
+ 	return RPC_PRIORITY_NORMAL;
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_write_rpcsetup(struct nfs_page *req,
+-		struct nfs_write_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset,
+-		int how)
++int nfs_initiate_write(struct nfs_write_data *data,
++		       struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops,
++		       int how)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.task = &data->task,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+@@ -800,12 +822,62 @@ static int nfs_write_rpcsetup(struct nfs
+ 	};
+ 	int ret = 0;
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->write_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	if (how & FLUSH_SYNC) {
++		ret = rpc_wait_for_completion_task(task);
++		if (ret == 0)
++			ret = task->tk_status;
++	}
++	rpc_put_task(task);
++out:
++	return ret;
++}
++EXPORT_SYMBOL(nfs_initiate_write);
++
++int pnfs_initiate_write(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_write(data, clnt, call_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_write_rpcsetup(struct nfs_page *req,
++		struct nfs_write_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset,
++		int how)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	data->req = req;
+ 	data->inode = inode = req->wb_context->path.dentry->d_inode;
+-	data->cred = msg.rpc_cred;
++	data->cred = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -813,6 +885,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 	data->args.stable  = NFS_UNSTABLE;
+ 	if (how & FLUSH_STABLE) {
+ 		data->args.stable = NFS_DATA_SYNC;
+@@ -825,30 +898,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated write call "
+-		"(req %s/%lld, %u bytes @ offset %llu)\n",
+-		data->task.tk_pid,
+-		inode->i_sb->s_id,
+-		(long long)NFS_FILEID(inode),
+-		count,
+-		(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task)) {
+-		ret = PTR_ERR(task);
+-		goto out;
+-	}
+-	if (how & FLUSH_SYNC) {
+-		ret = rpc_wait_for_completion_task(task);
+-		if (ret == 0)
+-			ret = task->tk_status;
+-	}
+-	rpc_put_task(task);
+-out:
+-	return ret;
++	return pnfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ }
+ 
+ /* If a nfs_flush_* function fails, it should remove reqs from @head and
+@@ -859,6 +909,7 @@ static void nfs_redirty_request(struct n
+ {
+ 	struct page *page = req->wb_page;
+ 
++	nfs_mark_request_nopnfs(req);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	nfs_end_page_writeback(page);
+@@ -971,6 +1022,10 @@ static void nfs_pageio_init_write(struct
+ {
+ 	size_t wsize = NFS_SERVER(inode)->wsize;
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_write(pgio, inode, &wsize);
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	if (wsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
+ 	else
+@@ -1036,13 +1091,27 @@ out:
+ void nfs_write_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_write_data *data = calldata;
+-	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(clp, &data->args.seq_args,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	} else if (data->args.count > NFS_SERVER(data->inode)->wsize) {
++		/* retrying via MDS? */
++		data->pdata.orig_count = data->args.count;
++		data->args.count = NFS_SERVER(data->inode)->wsize;
++		dprintk("%s: trimmed count %u to wsize %u\n", __func__,
++		data->pdata.orig_count, data->args.count);
++	} else
++		data->pdata.orig_count = 0;
++
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
++				&data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_write_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_write_partial_ops = {
+@@ -1126,10 +1195,11 @@ int nfs_writeback_done(struct rpc_task *
+ 	struct nfs_writeargs	*argp = &data->args;
+ 	struct nfs_writeres	*resp = &data->res;
+ 	struct nfs_server	*server = NFS_SERVER(data->inode);
++	struct nfs_client	*clp = server->nfs_client;
+ 	int status;
+ 
+-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
+-		task->tk_pid, task->tk_status);
++	dprintk("NFS: %5u nfs_writeback_done (status %d count %u)\n",
++		task->tk_pid, task->tk_status, resp->count);
+ 
+ 	/*
+ 	 * ->write_done will attempt to use post-op attributes to detect
+@@ -1142,6 +1212,13 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (status != 0)
+ 		return status;
+ 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
++#ifdef CONFIG_NFS_V4_1
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+@@ -1158,7 +1235,7 @@ int nfs_writeback_done(struct rpc_task *
+ 		if (time_before(complain, jiffies)) {
+ 			dprintk("NFS:       faulty NFS server %s:"
+ 				" (committed = %d) != (stable = %d)\n",
+-				server->nfs_client->cl_hostname,
++				clp->cl_hostname,
+ 				resp->verf->committed, argp->stable);
+ 			complain = jiffies + 300 * HZ;
+ 		}
+@@ -1168,6 +1245,9 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (task->tk_status >= 0 && resp->count < argp->count) {
+ 		static unsigned long    complain;
+ 
++		dprintk("NFS:       short write:"
++			" (resp->count %u) < (argp->count = %u)\n",
++			resp->count, argp->count);
+ 		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ 
+ 		/* Has the server at least made some progress? */
+@@ -1184,7 +1264,10 @@ int nfs_writeback_done(struct rpc_task *
+ 				 */
+ 				argp->stable = NFS_FILE_SYNC;
+ 			}
+-			nfs_restart_rpc(task, server->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++			data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++			nfs_restart_rpc(task, clp);
+ 			return -EAGAIN;
+ 		}
+ 		if (time_before(complain, jiffies)) {
+@@ -1228,40 +1311,73 @@ static void nfs_commitdata_release(void 
+ 	nfs_commit_free(wdata);
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_commit_rpcsetup(struct list_head *head,
+-		struct nfs_write_data *data,
+-		int how)
++int nfs_initiate_commit(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
+ {
+-	struct nfs_page *first = nfs_list_entry(head->next);
+-	struct inode *inode = first->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = first->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+-		.callback_ops = &nfs_commit_ops,
++		.callback_ops = call_ops,
+ 		.callback_data = data,
+ 		.workqueue = nfsiod_workqueue,
+ 		.flags = RPC_TASK_ASYNC,
+ 		.priority = priority,
+ 	};
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->commit_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_commit);
++
++
++int pnfs_initiate_commit(struct nfs_write_data *data,
++			 struct rpc_clnt *clnt,
++			 const struct rpc_call_ops *call_ops,
++			 int how, int pnfs)
++{
++	if (pnfs &&
++	    (pnfs_try_to_commit(data, &nfs_commit_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_commit(data, clnt, &nfs_commit_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_commit_rpcsetup(struct list_head *head,
++		struct nfs_write_data *data,
++		int how, int pnfs)
++{
++	struct nfs_page *first = nfs_list_entry(head->next);
++	struct inode *inode = first->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	list_splice_init(head, &data->pages);
+ 
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = first->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(data->inode);
+ 	/* Note: we always request a commit of the entire inode */
+@@ -1272,45 +1388,47 @@ static int nfs_commit_rpcsetup(struct li
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
++	kref_init(&data->refcount);
++	data->parent      = NULL;
++	data->args.context = first->wb_context;  /* used by commit done */
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->commit_setup(data, &msg);
++	return pnfs_initiate_commit(data, NFS_CLIENT(inode), &nfs_commit_ops,
++				    how, pnfs);
++}
+ 
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++/* Handle memory error during commit */
++void nfs_mark_list_commit(struct list_head *head)
++{
++	struct nfs_page         *req;
+ 
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	while (!list_empty(head)) {
++		req = nfs_list_entry(head->next);
++		nfs_list_remove_request(req);
++		nfs_mark_request_commit(req);
++		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
++		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
++				BDI_RECLAIMABLE);
++		nfs_clear_page_tag_locked(req);
++	}
+ }
++EXPORT_SYMBOL(nfs_mark_list_commit);
+ 
+ /*
+  * Commit dirty pages
+  */
+ static int
+-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
++nfs_commit_list(struct inode *inode, struct list_head *head, int how, int pnfs)
+ {
+ 	struct nfs_write_data	*data;
+-	struct nfs_page         *req;
+ 
+ 	data = nfs_commitdata_alloc();
+-
+ 	if (!data)
+ 		goto out_bad;
+ 
+ 	/* Set up the argument struct */
+-	return nfs_commit_rpcsetup(head, data, how);
++	return nfs_commit_rpcsetup(head, data, how, pnfs);
+  out_bad:
+-	while (!list_empty(head)) {
+-		req = nfs_list_entry(head->next);
+-		nfs_list_remove_request(req);
+-		nfs_mark_request_commit(req);
+-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+-				BDI_RECLAIMABLE);
+-		nfs_clear_page_tag_locked(req);
+-	}
++	nfs_mark_list_commit(head);
+ 	nfs_commit_clear_lock(NFS_I(inode));
+ 	return -ENOMEM;
+ }
+@@ -1330,6 +1448,19 @@ static void nfs_commit_done(struct rpc_t
+ 		return;
+ }
+ 
++static inline void nfs_commit_cleanup(struct kref *kref)
++{
++	struct nfs_write_data *data;
++
++	data = container_of(kref, struct nfs_write_data, refcount);
++	/* Clear lock only when all cloned commits are finished */
++	if (data->parent)
++		kref_put(&data->parent->refcount, nfs_commit_cleanup);
++	else
++		nfs_commit_clear_lock(NFS_I(data->inode));
++	nfs_commitdata_release(data);
++}
++
+ static void nfs_commit_release(void *calldata)
+ {
+ 	struct nfs_write_data	*data = calldata;
+@@ -1347,6 +1478,11 @@ static void nfs_commit_release(void *cal
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 		if (status < 0) {
++			if (req->wb_lseg) {
++				nfs_mark_request_nopnfs(req);
++				nfs_mark_request_dirty(req);
++				goto next;
++			}
+ 			nfs_context_set_write_error(req->wb_context, status);
+ 			nfs_inode_remove_request(req);
+ 			dprintk(", error = %d\n", status);
+@@ -1363,12 +1499,12 @@ static void nfs_commit_release(void *cal
+ 		}
+ 		/* We have a mismatch. Write the page again */
+ 		dprintk(" mismatch\n");
++		nfs_mark_request_nopnfs(req);
+ 		nfs_mark_request_dirty(req);
+ 	next:
+ 		nfs_clear_page_tag_locked(req);
+ 	}
+-	nfs_commit_clear_lock(NFS_I(data->inode));
+-	nfs_commitdata_release(calldata);
++	kref_put(&data->refcount, nfs_commit_cleanup);
+ }
+ 
+ static const struct rpc_call_ops nfs_commit_ops = {
+@@ -1384,21 +1520,22 @@ int nfs_commit_inode(struct inode *inode
+ 	LIST_HEAD(head);
+ 	int may_wait = how & FLUSH_SYNC;
+ 	int res = 0;
++	int use_pnfs = 0;
+ 
+ 	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ 		goto out_mark_dirty;
+ 	spin_lock(&inode->i_lock);
+-	res = nfs_scan_commit(inode, &head, 0, 0);
++	res = nfs_scan_commit(inode, &head, 0, 0, &use_pnfs);
+ 	spin_unlock(&inode->i_lock);
+ 	if (res) {
+-		int error = nfs_commit_list(inode, &head, how);
++		int error = nfs_commit_list(inode, &head, how, use_pnfs);
+ 		if (error < 0)
+ 			return error;
+-		if (may_wait)
++		if (may_wait) {
+ 			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+ 					nfs_wait_bit_killable,
+ 					TASK_KILLABLE);
+-		else
++		} else
+ 			goto out_mark_dirty;
+ 	} else
+ 		nfs_commit_clear_lock(NFS_I(inode));
+@@ -1451,7 +1588,18 @@ static int nfs_commit_unstable_pages(str
+ 
+ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ {
+-	return nfs_commit_unstable_pages(inode, wbc);
++	int ret;
++	ret = nfs_commit_unstable_pages(inode, wbc);
++	if (ret >= 0 && layoutcommit_needed(NFS_I(inode))) {
++		int err, sync = wbc->sync_mode;
++
++		if (wbc->nonblocking || wbc->for_background)
++			sync = 0;
++		err = pnfs_layoutcommit_inode(inode, sync);
++		if (err < 0)
++			ret = err;
++	}
++	return ret;
+ }
+ 
+ /*
+@@ -1459,6 +1607,7 @@ int nfs_write_inode(struct inode *inode,
+  */
+ int nfs_wb_all(struct inode *inode)
+ {
++	int ret;
+ 	struct writeback_control wbc = {
+ 		.sync_mode = WB_SYNC_ALL,
+ 		.nr_to_write = LONG_MAX,
+@@ -1466,7 +1615,8 @@ int nfs_wb_all(struct inode *inode)
+ 		.range_end = LLONG_MAX,
+ 	};
+ 
+-	return sync_inode(inode, &wbc);
++	ret = sync_inode(inode, &wbc);
++	return ret;
+ }
+ 
+ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
+--- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
+@@ -2,6 +2,7 @@
+ #define LINUX_EXPORTFS_H 1
+ 
+ #include <linux/types.h>
++#include <linux/exp_xdr.h>
+ 
+ struct dentry;
+ struct inode;
+@@ -175,4 +176,62 @@ extern struct dentry *generic_fh_to_pare
+ 	struct fid *fid, int fh_len, int fh_type,
+ 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+ 
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct pnfs_filelayout_device;
++struct pnfs_filelayout_layout;
++
++extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				     const struct pnfs_filelayout_device *fdev);
++extern enum nfsstat4 filelayout_encode_layout(struct exp_xdr_stream *xdr,
++				      const struct pnfs_filelayout_layout *flp);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct list_head;
++
++extern int blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				      const struct list_head *volumes);
++
++extern enum nfsstat4 blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++					       const struct list_head *layouts);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_PNFSD)
++#include <linux/module.h>
++
++struct pnfsd_cb_operations;
++
++struct pnfsd_cb_ctl {
++	spinlock_t lock;
++	struct module *module;
++	const struct pnfsd_cb_operations *cb_op;
++};
++
++/* in expfs.c so that file systems can depend on it */
++extern struct pnfsd_cb_ctl pnfsd_cb_ctl;
++
++static inline int
++pnfsd_get_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	int ret = -ENOENT;
++
++	spin_lock(&pnfsd_cb_ctl.lock);
++	if (!pnfsd_cb_ctl.cb_op)
++		goto out;
++	if (!try_module_get(pnfsd_cb_ctl.module))
++		goto out;
++	ctl->cb_op = pnfsd_cb_ctl.cb_op;
++	ctl->module = pnfsd_cb_ctl.module;
++	ret = 0;
++out:
++	spin_unlock(&pnfsd_cb_ctl.lock);
++	return ret;
++}
++
++static inline void
++pnfsd_put_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	module_put(ctl->module);
++}
++#endif /* CONFIG_PNFSD */
+ #endif /* LINUX_EXPORTFS_H */
+diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
+@@ -0,0 +1,141 @@
++#ifndef _LINUX_EXP_XDR_H
++#define _LINUX_EXP_XDR_H
++
++#include <asm/byteorder.h>
++#include <asm/unaligned.h>
++#include <linux/string.h>
++
++struct exp_xdr_stream {
++	__be32 *p;
++	__be32 *end;
++};
++
++/**
++ * exp_xdr_qwords - Calculate the number of quad-words holding nbytes
++ * @nbytes: number of bytes to encode
++ */
++static inline size_t
++exp_xdr_qwords(__u32 nbytes)
++{
++	return DIV_ROUND_UP(nbytes, 4);
++}
++
++/**
++ * exp_xdr_qbytes - Calculate the number of bytes holding qwords
++ * @qwords: number of quad-words to encode
++ */
++static inline size_t
++exp_xdr_qbytes(size_t qwords)
++{
++	return qwords << 2;
++}
++
++/**
++ * exp_xdr_reserve_space - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nbytes: number of bytes to reserve
++ *
++ * Checks that we have enough buffer space to encode 'nbytes' more
++ * bytes of data. If so, update the xdr stream.
++ */
++static inline __be32 *
++exp_xdr_reserve_space(struct exp_xdr_stream *xdr, size_t nbytes)
++{
++	__be32 *p = xdr->p;
++	__be32 *q;
++
++	/* align nbytes on the next 32-bit boundary */
++	q = p + exp_xdr_qwords(nbytes);
++	if (unlikely(q > xdr->end || q < p))
++		return NULL;
++	xdr->p = q;
++	return p;
++}
++
++/**
++ * exp_xdr_reserve_qwords - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nwords: number of quad words (u32's) to reserve
++ */
++static inline __be32 *
++exp_xdr_reserve_qwords(struct exp_xdr_stream *xdr, size_t qwords)
++{
++	return exp_xdr_reserve_space(xdr, exp_xdr_qbytes(qwords));
++}
++
++/**
++ * exp_xdr_encode_u32 - Encode an unsigned 32-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u32(__be32 *p, __u32 val)
++{
++	*p = cpu_to_be32(val);
++	return p + 1;
++}
++
++/**
++ * exp_xdr_encode_u64 - Encode an unsigned 64-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u64(__be32 *p, __u64 val)
++{
++	put_unaligned_be64(val, p);
++	return p + 2;
++}
++
++/**
++ * exp_xdr_encode_bytes - Encode an array of bytes onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the array of bytes
++ * @nbytes: number of bytes to encode
++ */
++static inline __be32 *
++exp_xdr_encode_bytes(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	if (likely(nbytes != 0)) {
++		unsigned int qwords = exp_xdr_qwords(nbytes);
++		unsigned int padding = exp_xdr_qbytes(qwords) - nbytes;
++
++		memcpy(p, ptr, nbytes);
++		if (padding != 0)
++			memset((char *)p + nbytes, 0, padding);
++		p += qwords;
++	}
++	return p;
++}
++
++/**
++ * exp_xdr_encode_opaque - Encode an opaque type onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the opaque array
++ * @nbytes: number of bytes to encode
++ *
++ * Encodes the 32-bit opaque size in bytes followed by the opaque value.
++ */
++static inline __be32 *
++exp_xdr_encode_opaque(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	p = exp_xdr_encode_u32(p, nbytes);
++	return exp_xdr_encode_bytes(p, ptr, nbytes);
++}
++
++/**
++ * exp_xdr_encode_opaque_qlen - Encode the opaque length onto a xdr stream
++ * @lenp: pointer to the opaque length destination
++ * @endp: pointer to the end of the opaque array
++ *
++ * Encodes the 32-bit opaque size in bytes given the start and end pointers
++ */
++static inline __be32 *
++exp_xdr_encode_opaque_len(__be32 *lenp, const void *endp)
++{
++	size_t nbytes = (char *)endp - (char *)(lenp + 1);
++
++	exp_xdr_encode_u32(lenp, nbytes);
++	return lenp + 1 + exp_xdr_qwords(nbytes);
++}
++#endif /* _LINUX_EXP_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
+@@ -387,6 +387,7 @@ struct inodes_stat_t {
+ #include <asm/byteorder.h>
+ 
+ struct export_operations;
++struct pnfs_export_operations;
+ struct hd_geometry;
+ struct iovec;
+ struct nameidata;
+@@ -1329,6 +1330,7 @@ struct super_block {
+ 	const struct dquot_operations	*dq_op;
+ 	const struct quotactl_ops	*s_qcop;
+ 	const struct export_operations *s_export_op;
++	const struct pnfs_export_operations *s_pnfs_op;
+ 	unsigned long		s_flags;
+ 	unsigned long		s_magic;
+ 	struct dentry		*s_root;
+diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
+--- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
+@@ -17,7 +17,10 @@
+ 
+ #define NFS4_BITMAP_SIZE	2
+ #define NFS4_VERIFIER_SIZE	8
+-#define NFS4_STATEID_SIZE	16
++#define NFS4_CLIENTID_SIZE	8
++#define NFS4_STATEID_SEQID_SIZE 4
++#define NFS4_STATEID_OTHER_SIZE 12
++#define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
+ #define NFS4_FHSIZE		128
+ #define NFS4_MAXPATHLEN		PATH_MAX
+ #define NFS4_MAXNAMLEN		NAME_MAX
+@@ -119,6 +122,13 @@
+ #define EXCHGID4_FLAG_MASK_A			0x40070003
+ #define EXCHGID4_FLAG_MASK_R			0x80070003
+ 
++static inline bool
++is_ds_only_session(u32 exchange_flags)
++{
++	u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;
++	return (exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
++}
++
+ #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED	0x00000004
+@@ -166,8 +176,25 @@ struct nfs4_acl {
+ 	struct nfs4_ace	aces[0];
+ };
+ 
++struct nfs4_fsid {
++	u64	major;
++	u64	minor;
++};
++
+ typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
+-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
++typedef struct { char data[NFS4_CLIENTID_SIZE]; } nfs4_clientid;
++
++struct nfs41_stateid {
++	__be32 seqid;
++	char other[NFS4_STATEID_OTHER_SIZE];
++} __attribute__ ((packed));
++
++typedef struct {
++	union {
++		char data[NFS4_STATEID_SIZE];
++		struct nfs41_stateid stateid;
++	} u;
++} nfs4_stateid;
+ 
+ enum nfs_opnum4 {
+ 	OP_ACCESS = 3,
+@@ -471,6 +498,8 @@ enum lock_type4 {
+ #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
+ #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
+ #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
++#define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
++#define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+ 
+ #define NFSPROC4_NULL 0
+ #define NFSPROC4_COMPOUND 1
+@@ -523,6 +552,7 @@ enum {
+ 	NFSPROC4_CLNT_GETACL,
+ 	NFSPROC4_CLNT_SETACL,
+ 	NFSPROC4_CLNT_FS_LOCATIONS,
++	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
+ 
+ 	/* nfs41 */
+ 	NFSPROC4_CLNT_EXCHANGE_ID,
+@@ -531,6 +561,13 @@ enum {
+ 	NFSPROC4_CLNT_SEQUENCE,
+ 	NFSPROC4_CLNT_GET_LEASE_TIME,
+ 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
++	NFSPROC4_CLNT_PNFS_LAYOUTGET,
++	NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT,
++	NFSPROC4_CLNT_PNFS_LAYOUTRETURN,
++	NFSPROC4_CLNT_PNFS_GETDEVICELIST,
++	NFSPROC4_CLNT_PNFS_GETDEVICEINFO,
++	NFSPROC4_CLNT_PNFS_WRITE,
++	NFSPROC4_CLNT_PNFS_COMMIT,
+ };
+ 
+ /* nfs41 types */
+@@ -549,6 +586,43 @@ enum state_protect_how4 {
+ 	SP4_SSV		= 2
+ };
+ 
++enum pnfs_layouttype {
++	LAYOUT_NFSV4_1_FILES  = 1,
++	LAYOUT_OSD2_OBJECTS = 2,
++	LAYOUT_BLOCK_VOLUME = 3,
++};
++
++/* used for both layout return and recall */
++enum pnfs_layoutreturn_type {
++	RETURN_FILE = 1,
++	RETURN_FSID = 2,
++	RETURN_ALL  = 3
++};
++
++enum pnfs_iomode {
++	IOMODE_READ = 1,
++	IOMODE_RW = 2,
++	IOMODE_ANY = 3,
++};
++
++enum pnfs_notify_deviceid_type4 {
++	NOTIFY_DEVICEID4_CHANGE = 1 << 1,
++	NOTIFY_DEVICEID4_DELETE = 1 << 2,
++};
++
++#define NFL4_UFLG_MASK			0x0000003F
++#define NFL4_UFLG_DENSE			0x00000001
++#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
++#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
++
++/* Encoded in the loh_body field of type layouthint4 */
++enum filelayout_hint_care4 {
++	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
++	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
++	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
++	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
++};
++
+ #endif
+ #endif
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
+@@ -0,0 +1,330 @@
++/*
++ *  include/linux/nfs4_pnfs.h
++ *
++ *  Common data structures needed by the pnfs client and pnfs layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_NFS4_PNFS_H
++#define LINUX_NFS4_PNFS_H
++
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs_page.h>
++
++enum pnfs_try_status {
++	PNFS_ATTEMPTED     = 0,
++	PNFS_NOT_ATTEMPTED = 1,
++};
++
++#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
++
++/* Per-layout driver specific registration structure */
++struct pnfs_layoutdriver_type {
++	const u32 id;
++	const char *name;
++	struct layoutdriver_io_operations *ld_io_ops;
++	struct layoutdriver_policy_operations *ld_policy_ops;
++};
++
++struct pnfs_fsdata {
++	int bypass_eof;
++	struct pnfs_layout_segment *lseg;
++	void *private;
++};
++
++#if defined(CONFIG_NFS_V4_1)
++
++static inline struct nfs_inode *
++PNFS_NFS_INODE(struct pnfs_layout_type *lo)
++{
++	return NFS_I(lo->lo_inode);
++}
++
++static inline struct inode *
++PNFS_INODE(struct pnfs_layout_type *lo)
++{
++	return lo->lo_inode;
++}
++
++static inline struct nfs_server *
++PNFS_NFS_SERVER(struct pnfs_layout_type *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo));
++}
++
++static inline struct pnfs_layoutdriver_type *
++PNFS_LD(struct pnfs_layout_type *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo))->pnfs_curr_ld;
++}
++
++static inline struct layoutdriver_io_operations *
++PNFS_LD_IO_OPS(struct pnfs_layout_type *lo)
++{
++	return PNFS_LD(lo)->ld_io_ops;
++}
++
++static inline struct layoutdriver_policy_operations *
++PNFS_LD_POLICY_OPS(struct pnfs_layout_type *lo)
++{
++	return PNFS_LD(lo)->ld_policy_ops;
++}
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return nfsi->layout != NULL;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return has_layout(nfsi) &&
++	       test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state);
++}
++
++extern void put_lseg(struct pnfs_layout_segment *lseg);
++extern void get_lseg(struct pnfs_layout_segment *lseg);
++
++#else /* CONFIG_NFS_V4_1 */
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return false;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++struct pnfs_layout_segment {
++	struct list_head fi_list;
++	struct nfs4_pnfs_layout_segment range;
++	struct kref kref;
++	bool valid;
++	struct pnfs_layout_type *layout;
++	struct nfs4_deviceid *deviceid;
++	u8 ld_data[];			/* layout driver private data */
++};
++
++static inline void *
++LSEG_LD_DATA(struct pnfs_layout_segment *lseg)
++{
++	return lseg->ld_data;
++}
++
++/* Layout driver I/O operations.
++ * Either the pagecache or non-pagecache read/write operations must be implemented
++ */
++struct layoutdriver_io_operations {
++	/* Functions that use the pagecache.
++	 * If use_pagecache == 1, then these functions must be implemented.
++	 */
++	/* read and write pagelist should return just 0 (to indicate that
++	 * the layout code has taken control) or 1 (to indicate that the
++	 * layout code wishes to fall back to normal nfs.)  If 0 is returned,
++	 * information can be passed back through nfs_data->res and
++	 * nfs_data->task.tk_status, and the appropriate pnfs done function
++	 * MUST be called.
++	 */
++	enum pnfs_try_status
++	(*read_pagelist) (struct nfs_read_data *nfs_data, unsigned nr_pages);
++	enum pnfs_try_status
++	(*write_pagelist) (struct nfs_write_data *nfs_data, unsigned nr_pages, int how);
++	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
++			    loff_t pos, unsigned count,
++			    struct pnfs_fsdata *fsdata);
++	int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
++			 unsigned count, unsigned copied,
++			 struct pnfs_layout_segment *lseg);
++	void (*write_end_cleanup)(struct file *filp,
++				  struct pnfs_fsdata *fsdata);
++
++	/* Consistency ops */
++	/* 2 problems:
++	 * 1) the page list contains nfs_pages, NOT pages
++	 * 2) currently the NFS code doesn't create a page array (as it does with read/write)
++	 */
++	enum pnfs_try_status
++	(*commit) (struct nfs_write_data *nfs_data, int how);
++
++	/* Layout information. For each inode, alloc_layout is executed once to retrieve an
++	 * inode specific layout structure.  Each subsequent layoutget operation results in
++	 * a set_layout call to set the opaque layout in the layout driver.*/
++	struct pnfs_layout_type * (*alloc_layout) (struct inode *inode);
++	void (*free_layout) (struct pnfs_layout_type *);
++	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_type *layoutid, struct nfs4_pnfs_layoutget_res *lgr);
++	void (*free_lseg) (struct pnfs_layout_segment *lseg);
++
++	int (*setup_layoutcommit) (struct pnfs_layout_type *layoutid,
++				struct pnfs_layoutcommit_arg *args);
++
++	void (*encode_layoutcommit) (struct pnfs_layout_type *layoutid,
++				struct xdr_stream *xdr,
++				const struct pnfs_layoutcommit_arg *args);
++	void (*cleanup_layoutcommit) (struct pnfs_layout_type *layoutid,
++				      struct pnfs_layoutcommit_arg *args,
++				      int status);
++	void (*encode_layoutreturn) (struct pnfs_layout_type *layoutid,
++				struct xdr_stream *xdr,
++				const struct nfs4_pnfs_layoutreturn_arg *args);
++
++	/* Registration information for a new mounted file system
++	 */
++	int (*initialize_mountpoint) (struct nfs_server *,
++				      const struct nfs_fh * mntfh);
++	int (*uninitialize_mountpoint) (struct nfs_server *server);
++};
++
++enum layoutdriver_policy_flags {
++	/* Should the full nfs rpc cleanup code be used after io */
++	PNFS_USE_RPC_CODE		= 1 << 0,
++
++	/* Should the NFS req. gather algorithm cross stripe boundaries? */
++	PNFS_GATHER_ACROSS_STRIPES	= 1 << 1,
++
++	/* Should the pNFS client commit and return the layout upon a setattr */
++	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 3,
++};
++
++struct layoutdriver_policy_operations {
++	unsigned flags;
++
++	/* The stripe size of the file system */
++	ssize_t (*get_stripesize) (struct pnfs_layout_type *layoutid);
++
++	/* test for nfs page cache coalescing */
++	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++
++	/* Retreive the block size of the file system.
++	 * If gather_across_stripes == 1, then the file system will gather
++	 * requests into the block size.
++	 * TODO: Where will the layout driver get this info?  It is hard
++	 * coded in PVFS2.
++	 */
++	ssize_t (*get_blocksize) (void);
++};
++
++/* Should the full nfs rpc cleanup code be used after io */
++static inline int
++pnfs_ld_use_rpc_code(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_USE_RPC_CODE;
++}
++
++/* Should the NFS req. gather algorithm cross stripe boundaries? */
++static inline int
++pnfs_ld_gather_across_stripes(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_GATHER_ACROSS_STRIPES;
++}
++
++struct pnfs_device {
++	struct pnfs_deviceid dev_id;
++	unsigned int  layout_type;
++	unsigned int  mincount;
++	struct page **pages;
++	void          *area;
++	unsigned int  pgbase;
++	unsigned int  pglen;
++	unsigned int  dev_notify_types;
++};
++
++struct pnfs_devicelist {
++	unsigned int		eof;
++	unsigned int		num_devs;
++	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
++};
++
++/*
++ * Device ID RCU cache. A device ID is unique per client ID and layout type.
++ */
++#define NFS4_DEVICE_ID_HASH_BITS	5
++#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
++#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
++
++static inline u32
++nfs4_deviceid_hash(struct pnfs_deviceid *id)
++{
++	unsigned char *cptr = (unsigned char *)id->data;
++	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
++	u32 x = 0;
++
++	while (nbytes--) {
++		x *= 37;
++		x += *cptr++;
++	}
++	return x & NFS4_DEVICE_ID_HASH_MASK;
++}
++
++struct nfs4_deviceid_cache {
++	spinlock_t		dc_lock;
++	struct kref		dc_kref;
++	void			(*dc_free_callback)(struct kref *);
++	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
++};
++
++/* Device ID cache node */
++struct nfs4_deviceid {
++	struct hlist_node	de_node;
++	struct pnfs_deviceid	de_id;
++	struct kref		de_kref;
++};
++
++extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_put_deviceid_cache(struct nfs_client *);
++extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
++extern struct nfs4_deviceid *nfs4_find_deviceid(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++extern struct nfs4_deviceid *nfs4_add_deviceid(struct nfs4_deviceid_cache *,
++				struct nfs4_deviceid *);
++extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *);
++extern void nfs4_unset_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_delete_device(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++
++/* pNFS client callback functions.
++ * These operations allow the layout driver to access pNFS client
++ * specific information or call pNFS client->server operations.
++ * E.g., getdeviceinfo, I/O callbacks, etc
++ */
++struct pnfs_client_operations {
++	int (*nfs_getdevicelist) (struct nfs_server *,
++				  const struct nfs_fh *fh,
++				  struct pnfs_devicelist *devlist);
++	int (*nfs_getdeviceinfo) (struct nfs_server *,
++				  struct pnfs_device *dev);
++
++	/* Post read callback. */
++	void (*nfs_readlist_complete) (struct nfs_read_data *nfs_data);
++
++	/* Post write callback. */
++	void (*nfs_writelist_complete) (struct nfs_write_data *nfs_data);
++
++	/* Post commit callback. */
++	void (*nfs_commit_complete) (struct nfs_write_data *nfs_data);
++	void (*nfs_return_layout) (struct inode *);
++};
++
++extern struct pnfs_client_operations pnfs_ops;
++
++extern struct pnfs_client_operations *pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
++extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
++
++#define NFS4_PNFS_MAX_LAYOUTS 4
++#define NFS4_PNFS_PRIVATE_LAYOUT 0x80000000
++
++#endif /* LINUX_NFS4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
+@@ -0,0 +1,101 @@
++#ifndef NFSD4_BLOCK
++#define NFSD4_BLOCK
++
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#define PNFS_BLOCK_SUCCESS		1
++#define PNFS_BLOCK_FAILURE		0
++
++#define PNFS_BLOCK_CTL_START		1
++#define PNFS_BLOCK_CTL_STOP		2
++#define PNFS_BLOCK_CTL_VERS		3 /* Allows daemon to request current
++					   * version from kernel via an upcall.
++					   */
++
++#define PNFS_UPCALL_MSG_STOP	0
++#define PNFS_UPCALL_MSG_GETSIG	1
++#define PNFS_UPCALL_MSG_GETSLICE	2
++#define PNFS_UPCALL_MSG_DMCHK	3	// See if dev_t is a DM volume
++#define PNFS_UPCALL_MSG_DMGET	4
++#define PNFS_UPCALL_MSG_VERS	5
++
++#define PNFS_UPCALL_VERS		8
++
++typedef struct stripe_dev {
++	int	major,
++		minor,
++		offset;
++} stripe_dev_t;
++
++typedef struct bl_comm_res {
++	int				res_status;
++	union {
++		struct {
++			long long	start,
++					length;
++		} slice;
++		struct {
++			int		num_stripes,
++					stripe_size;
++			stripe_dev_t	devs[];
++		} stripe;
++		struct {
++			long long	sector;
++			int		offset,
++					len;
++			char		sig[];
++		} sig;
++		int			vers,
++					dm_vol;
++	} u;
++} bl_comm_res_t;
++
++typedef struct bl_comm_msg {
++	int		msg_type,
++			msg_status;
++	union {
++		dev_t	msg_dev;
++		int	msg_vers;
++	} u;
++	bl_comm_res_t	*msg_res;
++} bl_comm_msg_t;
++
++#ifdef __KERNEL__
++
++typedef struct bl_comm {
++	/* ---- protects access to this structure ---- */
++	struct mutex		lock;
++	/* ---- protects access to rpc pipe ---- */
++	struct mutex		pipe_lock;
++	struct dentry		*pipe_dentry;
++	wait_queue_head_t	pipe_wq;
++	bl_comm_msg_t		msg;
++} bl_comm_t;
++
++int pnfs_block_enabled(struct inode *, int);
++int bl_layout_type(struct super_block *sb);
++int bl_getdeviceiter(struct super_block *, u32 layout_type,
++		     struct nfsd4_pnfs_dev_iter_res *);
++int bl_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++		     u32 layout_type,
++		     const struct nfsd4_pnfs_deviceid *);
++enum nfsstat4 bl_layoutget(struct inode *, struct exp_xdr_stream *,
++			   const struct nfsd4_pnfs_layoutget_arg *,
++			   struct nfsd4_pnfs_layoutget_res *);
++int bl_layoutcommit(struct inode *,
++		    const struct nfsd4_pnfs_layoutcommit_arg *,
++		    struct nfsd4_pnfs_layoutcommit_res *);
++int bl_layoutreturn(struct inode *,
++		    const struct nfsd4_pnfs_layoutreturn_arg *);
++int bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len);
++int bl_init_proc(void);
++int bl_upcall(bl_comm_t *, bl_comm_msg_t *, bl_comm_res_t **);
++
++extern bl_comm_t	*bl_comm_global;	// Ugly...
++#endif /* __KERNEL__ */
++
++#endif /* NFSD4_BLOCK */
++
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
+@@ -0,0 +1,345 @@
++/*
++ * include/linux/nfsd4_spnfs.h
++ *
++ * spNFS - simple pNFS implementation with userspace daemon
++ *
++ */
++
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#ifndef NFS_SPNFS_H
++#define NFS_SPNFS_H
++
++
++#ifdef __KERNEL__
++#include "exportfs.h"
++#include "sunrpc/svc.h"
++#include "nfsd/nfsfh.h"
++#else
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#define SPNFS_STATUS_INVALIDMSG		0x01
++#define SPNFS_STATUS_AGAIN		0x02
++#define SPNFS_STATUS_FAIL		0x04
++#define SPNFS_STATUS_SUCCESS		0x08
++
++#define SPNFS_TYPE_LAYOUTGET		0x01
++#define SPNFS_TYPE_LAYOUTCOMMIT		0x02
++#define SPNFS_TYPE_LAYOUTRETURN		0x03
++#define SPNFS_TYPE_GETDEVICEITER	0x04
++#define SPNFS_TYPE_GETDEVICEINFO	0x05
++#define SPNFS_TYPE_SETATTR		0x06
++#define SPNFS_TYPE_OPEN			0x07
++#define	SPNFS_TYPE_CLOSE		0x08
++#define SPNFS_TYPE_CREATE		0x09
++#define SPNFS_TYPE_REMOVE		0x0a
++#define SPNFS_TYPE_COMMIT		0x0b
++#define SPNFS_TYPE_READ			0x0c
++#define SPNFS_TYPE_WRITE		0x0d
++
++#define	SPNFS_MAX_DEVICES		1
++#define	SPNFS_MAX_DATA_SERVERS		16
++#define SPNFS_MAX_IO			512
++
++/* layout */
++struct spnfs_msg_layoutget_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_filelayout_list {
++	u_int32_t       fh_len;
++	unsigned char   fh_val[128]; /* DMXXX fix this const */
++};
++
++struct spnfs_msg_layoutget_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t stripe_size;
++	u_int32_t stripe_type;
++	u_int32_t stripe_count;
++	struct spnfs_filelayout_list flist[SPNFS_MAX_DATA_SERVERS];
++};
++
++/* layoutcommit */
++struct spnfs_msg_layoutcommit_args {
++	unsigned long inode;
++	unsigned long generation;
++	u_int64_t file_size;
++};
++
++struct spnfs_msg_layoutcommit_res {
++	int status;
++};
++
++/* layoutreturn */
++/* No op for the daemon */
++/*
++struct spnfs_msg_layoutreturn_args {
++};
++
++struct spnfs_msg_layoutreturn_res {
++};
++*/
++
++/* getdeviceiter */
++struct spnfs_msg_getdeviceiter_args {
++	unsigned long inode;
++	u_int64_t cookie;
++	u_int64_t verf;
++};
++
++struct spnfs_msg_getdeviceiter_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t cookie;
++	u_int64_t verf;
++	u_int32_t eof;
++};
++
++/* getdeviceinfo */
++struct spnfs_data_server {
++	u_int32_t dsid;
++	char netid[5];
++	char addr[29];
++};
++
++struct spnfs_device {
++	u_int64_t devid;
++	int dscount;
++	struct spnfs_data_server dslist[SPNFS_MAX_DATA_SERVERS];
++};
++
++struct spnfs_msg_getdeviceinfo_args {
++	u_int64_t devid;
++};
++
++struct spnfs_msg_getdeviceinfo_res {
++	int status;
++	struct spnfs_device devinfo;
++};
++
++/* setattr */
++struct spnfs_msg_setattr_args {
++	unsigned long inode;
++	unsigned long generation;
++	int file_size;
++};
++
++struct spnfs_msg_setattr_res {
++	int status;
++};
++
++/* open */
++struct spnfs_msg_open_args {
++	unsigned long inode;
++	unsigned long generation;
++	int create;
++	int createmode;
++	int truncate;
++};
++
++struct spnfs_msg_open_res {
++	int status;
++};
++
++/* close */
++/* No op for daemon */
++struct spnfs_msg_close_args {
++	int x;
++};
++
++struct spnfs_msg_close_res {
++	int y;
++};
++
++/* create */
++/*
++struct spnfs_msg_create_args {
++	int x;
++};
++
++struct spnfs_msg_create_res {
++	int y;
++};
++*/
++
++/* remove */
++struct spnfs_msg_remove_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_msg_remove_res {
++	int status;
++};
++
++/* commit */
++/*
++struct spnfs_msg_commit_args {
++	int x;
++};
++
++struct spnfs_msg_commit_res {
++	int y;
++};
++*/
++
++/* read */
++struct spnfs_msg_read_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++};
++
++struct spnfs_msg_read_res {
++	int status;
++	char data[SPNFS_MAX_IO];
++};
++
++/* write */
++struct spnfs_msg_write_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++	char data[SPNFS_MAX_IO];
++};
++
++struct spnfs_msg_write_res {
++	int status;
++};
++
++/* bundle args and responses */
++union spnfs_msg_args {
++	struct spnfs_msg_layoutget_args		layoutget_args;
++	struct spnfs_msg_layoutcommit_args	layoutcommit_args;
++/*
++	struct spnfs_msg_layoutreturn_args	layoutreturn_args;
++*/
++	struct spnfs_msg_getdeviceiter_args     getdeviceiter_args;
++	struct spnfs_msg_getdeviceinfo_args     getdeviceinfo_args;
++	struct spnfs_msg_setattr_args		setattr_args;
++	struct spnfs_msg_open_args		open_args;
++	struct spnfs_msg_close_args		close_args;
++/*
++	struct spnfs_msg_create_args		create_args;
++*/
++	struct spnfs_msg_remove_args		remove_args;
++/*
++	struct spnfs_msg_commit_args		commit_args;
++*/
++	struct spnfs_msg_read_args		read_args;
++	struct spnfs_msg_write_args		write_args;
++};
++
++union spnfs_msg_res {
++	struct spnfs_msg_layoutget_res		layoutget_res;
++	struct spnfs_msg_layoutcommit_res	layoutcommit_res;
++/*
++	struct spnfs_msg_layoutreturn_res	layoutreturn_res;
++*/
++	struct spnfs_msg_getdeviceiter_res      getdeviceiter_res;
++	struct spnfs_msg_getdeviceinfo_res      getdeviceinfo_res;
++	struct spnfs_msg_setattr_res		setattr_res;
++	struct spnfs_msg_open_res		open_res;
++	struct spnfs_msg_close_res		close_res;
++/*
++	struct spnfs_msg_create_res		create_res;
++*/
++	struct spnfs_msg_remove_res		remove_res;
++/*
++	struct spnfs_msg_commit_res		commit_res;
++*/
++	struct spnfs_msg_read_res		read_res;
++	struct spnfs_msg_write_res		write_res;
++};
++
++/* a spnfs message, args and response */
++struct spnfs_msg {
++	unsigned char		im_type;
++	unsigned char		im_status;
++	union spnfs_msg_args	im_args;
++	union spnfs_msg_res	im_res;
++};
++
++/* spnfs configuration info */
++struct spnfs_config {
++	unsigned char		dense_striping;
++	int			stripe_size;
++	int			num_ds;
++	char			ds_dir[SPNFS_MAX_DATA_SERVERS][80];  /* XXX */
++};
++
++#if defined(__KERNEL__) && defined(CONFIG_SPNFS)
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* pipe mgmt structure.  messages flow through here */
++struct spnfs {
++	struct dentry		*spnfs_dentry;    /* dentry for pipe */
++	wait_queue_head_t	spnfs_wq;
++	struct spnfs_msg	spnfs_im;         /* spnfs message */
++	struct mutex		spnfs_lock;       /* Serializes upcalls */
++	struct mutex		spnfs_plock;
++};
++
++struct nfsd4_open;
++
++int spnfs_layout_type(struct super_block *);
++enum nfsstat4 spnfs_layoutget(struct inode *, struct exp_xdr_stream *xdr,
++			      const struct nfsd4_pnfs_layoutget_arg *,
++			      struct nfsd4_pnfs_layoutget_res *);
++int spnfs_layoutcommit(void);
++int spnfs_layoutreturn(struct inode *,
++		       const struct nfsd4_pnfs_layoutreturn_arg *);
++int spnfs_getdeviceiter(struct super_block *,
++			u32 layout_type,
++			struct nfsd4_pnfs_dev_iter_res *);
++int spnfs_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++			u32 layout_type,
++			const struct nfsd4_pnfs_deviceid *);
++int spnfs_setattr(void);
++int spnfs_open(struct inode *, struct nfsd4_open *);
++int spnfs_get_state(struct inode *, struct knfsd_fh *, struct pnfs_get_state *);
++int spnfs_remove(unsigned long, unsigned long);
++__be32 spnfs_read(struct inode *, loff_t, unsigned long *,
++		  int, struct svc_rqst *);
++__be32 spnfs_write(struct inode *, loff_t, size_t, int, struct svc_rqst *);
++int spnfs_getfh(int, struct nfs_fh *);
++int spnfs_test_layoutrecall(char *, u64, u64);
++int spnfs_layoutrecall(struct inode *, int, u64, u64);
++
++int nfsd_spnfs_new(void);
++void nfsd_spnfs_delete(void);
++int spnfs_upcall(struct spnfs *, struct spnfs_msg *, union spnfs_msg_res *);
++int spnfs_enabled(void);
++int spnfs_init_proc(void);
++
++extern struct spnfs_config *spnfs_config;
++
++#endif /* __KERNEL__ && CONFIG_SPNFS */
++
++#endif /* NFS_SPNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
+--- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
+@@ -29,6 +29,7 @@
+ #ifdef __KERNEL__
+ 
+ #include <linux/sunrpc/msg_prot.h>
++#include <linux/sunrpc/svc.h>
+ 
+ /*
+  * Largest number of bytes we need to allocate for an NFS
+diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
+--- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
+@@ -32,6 +32,8 @@
+ #define NFSDDBG_REPCACHE	0x0080
+ #define NFSDDBG_XDR		0x0100
+ #define NFSDDBG_LOCKD		0x0200
++#define NFSDDBG_PNFS		0x0400
++#define NFSDDBG_FILELAYOUT	0x0800
+ #define NFSDDBG_ALL		0x7FFF
+ #define NFSDDBG_NOCHANGE	0xFFFF
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
+--- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
+@@ -100,6 +100,7 @@ struct svc_export {
+ 	uid_t			ex_anon_uid;
+ 	gid_t			ex_anon_gid;
+ 	int			ex_fsid;
++	int			ex_pnfs;
+ 	unsigned char *		ex_uuid; /* 16 byte fsid */
+ 	struct nfsd4_fs_locations ex_fslocs;
+ 	int			ex_nflavors;
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
+@@ -0,0 +1,132 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef NFSD_NFS4LAYOUTXDR_H
++#define NFSD_NFS4LAYOUTXDR_H
++
++#include <linux/sunrpc/xdr.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
++struct pnfs_filelayout_devaddr {
++	struct xdr_netobj	r_netid;
++	struct xdr_netobj	r_addr;
++};
++
++/* list of multipath servers */
++struct pnfs_filelayout_multipath {
++	u32				fl_multipath_length;
++	struct pnfs_filelayout_devaddr 	*fl_multipath_list;
++};
++
++struct pnfs_filelayout_device {
++	u32					fl_stripeindices_length;
++	u32       		 		*fl_stripeindices_list;
++	u32					fl_device_length;
++	struct pnfs_filelayout_multipath 	*fl_device_list;
++};
++
++struct pnfs_filelayout_layout {
++	u32                             lg_layout_type; /* response */
++	u32                             lg_stripe_type; /* response */
++	u32                             lg_commit_through_mds; /* response */
++	u64                             lg_stripe_unit; /* response */
++	u64                             lg_pattern_offset; /* response */
++	u32                             lg_first_stripe_index;	/* response */
++	struct nfsd4_pnfs_deviceid	device_id;		/* response */
++	u32                             lg_fh_length;		/* response */
++	struct knfsd_fh                 *lg_fh_list;		/* response */
++};
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++enum pnfs_block_extent_state4 {
++        PNFS_BLOCK_READWRITE_DATA       = 0,
++        PNFS_BLOCK_READ_DATA            = 1,
++        PNFS_BLOCK_INVALID_DATA         = 2,
++        PNFS_BLOCK_NONE_DATA            = 3
++};
++
++enum pnfs_block_volume_type4 {
++        PNFS_BLOCK_VOLUME_SIMPLE = 0,
++        PNFS_BLOCK_VOLUME_SLICE = 1,
++        PNFS_BLOCK_VOLUME_CONCAT = 2,
++        PNFS_BLOCK_VOLUME_STRIPE = 3,
++};
++typedef enum pnfs_block_volume_type4 pnfs_block_volume_type4;
++
++enum bl_cache_state {
++	BLOCK_LAYOUT_NEW	= 0,
++	BLOCK_LAYOUT_CACHE	= 1,
++	BLOCK_LAYOUT_UPDATE	= 2,
++};
++
++typedef struct pnfs_blocklayout_layout {
++        struct list_head                bll_list;
++        struct nfsd4_pnfs_deviceid      bll_vol_id;
++        u64                             bll_foff;	// file offset
++        u64                             bll_len;
++        u64                             bll_soff;	// storage offset
++	int				bll_recalled;
++        enum pnfs_block_extent_state4   bll_es;
++	enum bl_cache_state		bll_cache_state;
++} pnfs_blocklayout_layout_t;
++
++typedef struct pnfs_blocklayout_devinfo {
++        struct list_head                bld_list;
++        pnfs_block_volume_type4         bld_type;
++        struct nfsd4_pnfs_deviceid      bld_devid;
++        int                             bld_index_loc;
++        union {
++                struct {
++                        u64             bld_offset;
++                        u32             bld_sig_len,
++                                        *bld_sig;
++                } simple;
++                struct {
++                        u64             bld_start,
++                                        bld_len;
++                        u32             bld_index;      /* Index of Simple Volume */
++                } slice;
++                struct {
++                        u32             bld_stripes;
++                        u64             bld_chunk_size;
++                        u32             *bld_stripe_indexs;
++                } stripe;
++        } u;
++} pnfs_blocklayout_devinfo_t;
++
++#endif /* NFSD_NFS4LAYOUTXDR_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
+@@ -0,0 +1,54 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++#include <linux/genhd.h>
++
++/*
++ * Length of comma separated pnfs data server IPv4 addresses. Enough room for
++ * 32 addresses.
++ */
++#define NFSD_DLM_DS_LIST_MAX   512
++/*
++ * Length of colon separated pnfs dlm device of the form
++ * disk_name:comma separated data server IPv4 address
++ */
++#define NFSD_PNFS_DLM_DEVICE_MAX (NFSD_DLM_DS_LIST_MAX + DISK_NAME_LEN + 1)
++
++#ifdef CONFIG_PNFSD
++
++/* For use by DLM cluster file systems exported by pNFSD */
++extern const struct pnfs_export_operations pnfs_dlm_export_ops;
++
++int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);
++
++void nfsd4_pnfs_dlm_shutdown(void);
++
++ssize_t nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen);
++
++#else /* CONFIG_PNFSD */
++
++static inline void nfsd4_pnfs_dlm_shutdown(void)
++{
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
+@@ -0,0 +1,271 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef _LINUX_NFSD_NFSD4_PNFS_H
++#define _LINUX_NFSD_NFSD4_PNFS_H
++
++#include <linux/exportfs.h>
++#include <linux/exp_xdr.h>
++#include <linux/nfs_xdr.h>
++
++struct nfsd4_pnfs_deviceid {
++	u64	sbid;			/* per-superblock unique ID */
++	u64	devid;			/* filesystem-wide unique device ID */
++};
++
++struct nfsd4_pnfs_dev_iter_res {
++	u64		gd_cookie;	/* request/repsonse */
++	u64		gd_verf;	/* request/repsonse */
++	u64		gd_devid;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++/* Arguments for set_device_notify */
++struct pnfs_devnotify_arg {
++	struct nfsd4_pnfs_deviceid dn_devid;	/* request */
++	u32 dn_layout_type;			/* request */
++	u32 dn_notify_types;			/* request/response */
++};
++
++struct nfsd4_layout_seg {
++	u64	clientid;
++	u32	layout_type;
++	u32	iomode;
++	u64	offset;
++	u64	length;
++};
++
++/* Used by layout_get to encode layout (loc_body var in spec)
++ * Args:
++ * minlength - min number of accessible bytes given by layout
++ * fsid - Major part of struct pnfs_deviceid.  File system uses this
++ * to build the deviceid returned in the layout.
++ * fh - fs can modify the file handle for use on data servers
++ * seg - layout info requested and layout info returned
++ * xdr - xdr info
++ * return_on_close - true if layout to be returned on file close
++ */
++
++struct nfsd4_pnfs_layoutget_arg {
++	u64			lg_minlength;
++	u64			lg_sbid;
++	const struct knfsd_fh	*lg_fh;
++};
++
++struct nfsd4_pnfs_layoutget_res {
++	struct nfsd4_layout_seg	lg_seg;	/* request/resopnse */
++	u32			lg_return_on_close;
++};
++
++struct nfsd4_pnfs_layoutcommit_arg {
++	struct nfsd4_layout_seg	lc_seg;		/* request */
++	u32			lc_reclaim;	/* request */
++	u32			lc_newoffset;	/* request */
++	u64			lc_last_wr;	/* request */
++	struct nfstime4		lc_mtime;	/* request */
++	u32			lc_up_len;	/* layout length */
++	void			*lc_up_layout;	/* decoded by callback */
++};
++
++struct nfsd4_pnfs_layoutcommit_res {
++	u32			lc_size_chg;	/* boolean for response */
++	u64			lc_newsize;	/* response */
++};
++
++#define PNFS_LAST_LAYOUT_NO_RECALLS ((void *)-1) /* used with lr_cookie below */
++
++struct nfsd4_pnfs_layoutreturn_arg {
++	u32			lr_return_type;	/* request */
++	struct nfsd4_layout_seg	lr_seg;		/* request */
++	u32			lr_reclaim;	/* request */
++	u32			lrf_body_len;	/* request */
++	void			*lrf_body;	/* request */
++	void			*lr_cookie;	/* fs private */
++};
++
++/* pNFS Metadata to Data server state communication */
++struct pnfs_get_state {
++	u32			dsid;    /* request */
++	u64			ino;      /* request */
++	nfs4_stateid		stid;     /* request;response */
++	nfs4_clientid		clid;     /* response */
++	u32			access;    /* response */
++	u32			stid_gen;    /* response */
++	u32			verifier[2]; /* response */
++};
++
++/*
++ * pNFS export operations vector.
++ *
++ * The filesystem must implement the following methods:
++ *   layout_type
++ *   get_device_info
++ *   layout_get
++ *
++ * All other methods are optional and can be set to NULL if not implemented.
++ */
++struct pnfs_export_operations {
++	/* Returns the supported pnfs_layouttype4. */
++	int (*layout_type) (struct super_block *);
++
++	/* Encode device info onto the xdr stream. */
++	int (*get_device_info) (struct super_block *,
++				struct exp_xdr_stream *,
++				u32 layout_type,
++				const struct nfsd4_pnfs_deviceid *);
++
++	/* Retrieve all available devices via an iterator.
++	 * arg->cookie == 0 indicates the beginning of the list,
++	 * otherwise arg->verf is used to verify that the list hasn't changed
++	 * while retrieved.
++	 *
++	 * On output, the filesystem sets the devid based on the current cookie
++	 * and sets res->cookie and res->verf corresponding to the next entry.
++	 * When the last entry in the list is retrieved, res->eof is set to 1.
++	 */
++	int (*get_device_iter) (struct super_block *,
++				u32 layout_type,
++				struct nfsd4_pnfs_dev_iter_res *);
++
++	int (*set_device_notify) (struct super_block *,
++				  struct pnfs_devnotify_arg *);
++
++	/* Retrieve and encode a layout for inode onto the xdr stream.
++	 * arg->minlength is the minimum number of accessible bytes required
++	 *   by the client.
++	 * The maximum number of bytes to encode the layout is given by
++	 *   the xdr stream end pointer.
++	 * arg->fsid contains the major part of struct pnfs_deviceid.
++	 *   The file system uses this to build the deviceid returned
++	 *   in the layout.
++	 * res->seg - layout segment requested and layout info returned.
++	 * res->fh can be modified the file handle for use on data servers
++	 * res->return_on_close - true if layout to be returned on file close
++	 *
++	 * return one of the following nfs errors:
++	 * NFS_OK			Success
++	 * NFS4ERR_ACCESS		Permission error
++	 * NFS4ERR_BADIOMODE		Server does not support requested iomode
++	 * NFS4ERR_BADLAYOUT		No layout matching loga_minlength rules
++	 * NFS4ERR_INVAL		Parameter other than layout is invalid
++	 * NFS4ERR_IO			I/O error
++	 * NFS4ERR_LAYOUTTRYLATER	Layout may be retrieved later
++	 * NFS4ERR_LAYOUTUNAVAILABLE	Layout unavailable for this file
++	 * NFS4ERR_LOCKED		Lock conflict
++	 * NFS4ERR_NOSPC		Out-of-space error occured
++	 * NFS4ERR_RECALLCONFLICT	Layout currently unavialable due to
++	 *				a conflicting CB_LAYOUTRECALL
++	 * NFS4ERR_SERVERFAULT		Server went bezerk
++	 * NFS4ERR_TOOSMALL		loga_maxcount too small to fit layout
++	 * NFS4ERR_WRONG_TYPE		Wrong file type (not a regular file)
++	 */
++	enum nfsstat4 (*layout_get) (struct inode *,
++				     struct exp_xdr_stream *xdr,
++				     const struct nfsd4_pnfs_layoutget_arg *,
++				     struct nfsd4_pnfs_layoutget_res *);
++
++	/* Commit changes to layout */
++	int (*layout_commit) (struct inode *,
++			      const struct nfsd4_pnfs_layoutcommit_arg *,
++			      struct nfsd4_pnfs_layoutcommit_res *);
++
++	/* Returns the layout */
++	int (*layout_return) (struct inode *,
++			      const struct nfsd4_pnfs_layoutreturn_arg *);
++
++	/* Can layout segments be merged for this layout type? */
++	int (*can_merge_layouts) (u32 layout_type);
++
++	/* pNFS Files layout specific operations */
++
++	/* Get the write verifier for DS (called on MDS only) */
++	void (*get_verifier) (struct super_block *, u32 *p);
++	/* Call fs on DS only */
++	int (*get_state) (struct inode *, struct knfsd_fh *,
++			  struct pnfs_get_state *);
++};
++
++struct nfsd4_pnfs_cb_layout {
++	u32			cbl_recall_type;	/* request */
++	struct nfsd4_layout_seg cbl_seg;		/* request */
++	u32			cbl_layoutchanged;	/* request */
++	nfs4_stateid		cbl_sid;		/* request */
++	struct nfs4_fsid	cbl_fsid;
++	void			*cbl_cookie;		/* fs private */
++};
++
++/* layoutrecall request (from exported filesystem) */
++struct nfs4_layoutrecall {
++	struct kref			clr_ref;
++	struct nfsd4_pnfs_cb_layout	cb;	/* request */
++	struct list_head		clr_perclnt; /* on cl_layoutrecalls */
++	struct nfs4_client	       *clr_client;
++	struct nfs4_file	       *clr_file;
++	struct timespec			clr_time;	/* last activity */
++	struct super_block 		*clr_sb; /* We might not have a file */
++	struct nfs4_layoutrecall	*parent; /* The initiating recall */
++
++	void				*clr_args;	/* nfsd internal */
++};
++
++struct nfsd4_pnfs_cb_dev_item {
++	u32			cbd_notify_type;	/* request */
++	u32			cbd_layout_type;	/* request */
++	struct nfsd4_pnfs_deviceid cbd_devid;		/* request */
++	u32			cbd_immediate;		/* request */
++};
++
++struct nfsd4_pnfs_cb_dev_list {
++	u32				cbd_len;  /* request */
++	struct nfsd4_pnfs_cb_dev_item  *cbd_list; /* request */
++};
++
++/*
++ * callbacks provided by the nfsd
++ */
++struct pnfsd_cb_operations {
++	/* Generic callbacks */
++	int (*cb_layout_recall) (struct super_block *, struct inode *,
++				 struct nfsd4_pnfs_cb_layout *);
++	int (*cb_device_notify) (struct super_block *,
++				 struct nfsd4_pnfs_cb_dev_list *);
++
++	/* pNFS Files layout specific callbacks */
++
++	/* Callback from fs on MDS only */
++	int (*cb_get_state) (struct super_block *, struct pnfs_get_state *);
++	/* Callback from fs on DS only */
++	int (*cb_change_state) (struct pnfs_get_state *);
++};
++
++#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
+--- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
+@@ -29,6 +29,7 @@
+ /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
+ #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
+ #define	NFSCTL_GETFS		8	/* get an fh by path with max FH len */
++#define	NFSCTL_FD2FH		9	/* get a fh from a fd */
+ 
+ /* SVC */
+ struct nfsctl_svc {
+@@ -71,6 +72,11 @@ struct nfsctl_fsparm {
+ 	int			gd_maxlen;
+ };
+ 
++/* FD2FH */
++struct nfsctl_fd2fh {
++	int			fd;
++};
++
+ /*
+  * This is the argument union.
+  */
+@@ -82,6 +88,7 @@ struct nfsctl_arg {
+ 		struct nfsctl_export	u_export;
+ 		struct nfsctl_fdparm	u_getfd;
+ 		struct nfsctl_fsparm	u_getfs;
++		struct nfsctl_fd2fh	u_fd2fh;
+ 		/*
+ 		 * The following dummy member is needed to preserve binary compatibility
+ 		 * on platforms where alignof(void*)>alignof(int).  It's needed because
+@@ -95,6 +102,7 @@ struct nfsctl_arg {
+ #define ca_export	u.u_export
+ #define ca_getfd	u.u_getfd
+ #define	ca_getfs	u.u_getfs
++#define	ca_fd2fh	u.u_fd2fh
+ };
+ 
+ union nfsctl_res {
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
+@@ -72,13 +72,20 @@ struct nfs_access_entry {
+ 	int			mask;
+ };
+ 
++struct nfs_lock_context {
++	atomic_t count;
++	struct list_head list;
++	struct nfs_open_context *open_context;
++	fl_owner_t lockowner;
++	pid_t pid;
++};
++
+ struct nfs4_state;
+ struct nfs_open_context {
+-	atomic_t count;
++	struct nfs_lock_context lock_context;
+ 	struct path path;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+-	fl_owner_t lockowner;
+ 	fmode_t mode;
+ 
+ 	unsigned long flags;
+@@ -97,6 +104,26 @@ struct nfs_delegation;
+ 
+ struct posix_acl;
+ 
++struct pnfs_layout_type {
++	int refcount;
++	struct list_head lo_layouts;	/* other client layouts */
++	struct list_head segs;		/* layout segments list */
++	int roc_iomode;			/* iomode to return on close, 0=none */
++	seqlock_t seqlock;		/* Protects the stateid */
++	nfs4_stateid stateid;
++	unsigned long pnfs_layout_state;
++	#define NFS_INO_RO_LAYOUT_FAILED 0      /* get ro layout failed stop trying */
++	#define NFS_INO_RW_LAYOUT_FAILED 1      /* get rw layout failed stop trying */
++	#define NFS_INO_LAYOUTCOMMIT     3      /* LAYOUTCOMMIT needed */
++	struct rpc_cred         *lo_cred; /* layoutcommit credential */
++	/* DH: These vars keep track of the maximum write range
++	 * so the values can be used for layoutcommit.
++	 */
++	loff_t                  pnfs_write_begin_pos;
++	loff_t                  pnfs_write_end_pos;
++	struct inode		*lo_inode;
++};
++
+ /*
+  * nfs fs inode data in memory
+  */
+@@ -181,6 +208,13 @@ struct nfs_inode {
+ 	struct nfs_delegation	*delegation;
+ 	fmode_t			 delegation_state;
+ 	struct rw_semaphore	rwsem;
++
++	/* pNFS layout information */
++#if defined(CONFIG_NFS_V4_1)
++	wait_queue_head_t lo_waitq;
++	struct pnfs_layout_type *layout;
++	time_t pnfs_layout_suspend;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif /* CONFIG_NFS_V4*/
+ #ifdef CONFIG_NFS_FSCACHE
+ 	struct fscache_cookie	*fscache;
+@@ -353,6 +387,8 @@ extern void nfs_setattr_update_inode(str
+ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
+ extern void put_nfs_open_context(struct nfs_open_context *ctx);
+ extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
++extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
++extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
+@@ -481,8 +517,12 @@ extern void nfs_unblock_sillyrename(stru
+ extern int  nfs_congestion_kb;
+ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
+ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
+-extern int  nfs_flush_incompatible(struct file *file, struct page *page);
+-extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
++struct pnfs_layout_segment;
++extern int  nfs_flush_incompatible(struct file *file, struct page *page,
++				   struct pnfs_layout_segment *lseg);
++extern int  nfs_updatepage(struct file *, struct page *,
++			   unsigned int offset, unsigned int count,
++			   struct pnfs_layout_segment *lseg, void *fsdata);
+ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+ 
+ /*
+@@ -604,6 +644,8 @@ extern void * nfs_root_data(void);
+ #define NFSDBG_CLIENT		0x0200
+ #define NFSDBG_MOUNT		0x0400
+ #define NFSDBG_FSCACHE		0x0800
++#define NFSDBG_PNFS		0x1000
++#define NFSDBG_PNFS_LD		0x2000
+ #define NFSDBG_ALL		0xFFFF
+ 
+ #ifdef __KERNEL__
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
+@@ -15,6 +15,7 @@ struct nlm_host;
+ struct nfs4_sequence_args;
+ struct nfs4_sequence_res;
+ struct nfs_server;
++struct nfs4_minor_version_ops;
+ 
+ /*
+  * The nfs_client identifies our client state to the server.
+@@ -70,11 +71,7 @@ struct nfs_client {
+ 	 */
+ 	char			cl_ipaddr[48];
+ 	unsigned char		cl_id_uniquifier;
+-	int		     (* cl_call_sync)(struct nfs_server *server,
+-					      struct rpc_message *msg,
+-					      struct nfs4_sequence_args *args,
+-					      struct nfs4_sequence_res *res,
+-					      int cache_reply);
++	const struct nfs4_minor_version_ops *cl_mvops;
+ #endif /* CONFIG_NFS_V4 */
+ 
+ #ifdef CONFIG_NFS_V4_1
+@@ -85,6 +82,8 @@ struct nfs_client {
+ 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_session	*cl_session; 	/* sharred session */
++	struct list_head	cl_layouts;
++	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ #ifdef CONFIG_NFS_FSCACHE
+@@ -92,6 +91,16 @@ struct nfs_client {
+ #endif
+ };
+ 
++static inline bool
++is_ds_only_client(struct nfs_client *clp)
++{
++#ifdef CONFIG_NFS_V4_1
++	return is_ds_only_session(clp->cl_exchange_flags);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * NFS client parameters stored in the superblock.
+  */
+@@ -136,7 +145,7 @@ struct nfs_server {
+ #endif
+ 
+ #ifdef CONFIG_NFS_V4
+-	u32			attr_bitmask[2];/* V4 bitmask representing the set
++	u32			attr_bitmask[3];/* V4 bitmask representing the set
+ 						   of attributes supported on this
+ 						   filesystem */
+ 	u32			cache_consistency_bitmask[2];
+@@ -148,6 +157,15 @@ struct nfs_server {
+ 						   that are supported on this
+ 						   filesystem */
+ #endif
++
++#ifdef CONFIG_NFS_V4_1
++	u32				pnfs_blksize; /* layout_blksize attr */
++	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
++	void			       *pnfs_ld_data; /* Per-mount data */
++	unsigned int			ds_rsize;  /* Data server read size */
++	unsigned int			ds_wsize;  /* Data server write size */
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	void (*destroy)(struct nfs_server *);
+ 
+ 	atomic_t active; /* Keep trace of any activity to this server */
+diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
+--- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
+@@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
+ 	NFSIOS_SHORTREAD,
+ 	NFSIOS_SHORTWRITE,
+ 	NFSIOS_DELAY,
++	NFSIOS_PNFS_READ,
++	NFSIOS_PNFS_WRITE,
++	NFSIOS_PNFS_COMMIT,
+ 	__NFSIOS_COUNTSMAX,
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
+--- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
+@@ -39,6 +39,7 @@ struct nfs_page {
+ 	struct list_head	wb_list;	/* Defines state of page: */
+ 	struct page		*wb_page;	/* page to read in/write out */
+ 	struct nfs_open_context	*wb_context;	/* File state context info */
++	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
+ 	atomic_t		wb_complete;	/* i/os we're waiting for */
+ 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
+ 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+@@ -47,6 +48,7 @@ struct nfs_page {
+ 	struct kref		wb_kref;	/* reference count */
+ 	unsigned long		wb_flags;
+ 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
++	struct pnfs_layout_segment *wb_lseg;	/* Pnfs layout info */
+ };
+ 
+ struct nfs_pageio_descriptor {
+@@ -60,6 +62,12 @@ struct nfs_pageio_descriptor {
+ 	int			(*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
+ 	int 			pg_ioflags;
+ 	int			pg_error;
++	struct pnfs_layout_segment *pg_lseg;
++#ifdef CONFIG_NFS_V4_1
++	int			pg_iswrite;
++	int			pg_boundary;
++	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
+@@ -68,13 +76,15 @@ extern	struct nfs_page *nfs_create_reque
+ 					    struct inode *inode,
+ 					    struct page *page,
+ 					    unsigned int offset,
+-					    unsigned int count);
++					    unsigned int count,
++					    struct pnfs_layout_segment *lseg);
+ extern	void nfs_clear_request(struct nfs_page *req);
+ extern	void nfs_release_request(struct nfs_page *req);
+ 
+ 
+ extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+-			  pgoff_t idx_start, unsigned int npages, int tag);
++			  pgoff_t idx_start, unsigned int npages, int tag,
++			  int *use_pnfs);
+ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ 			     struct inode *inode,
+ 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
+@@ -3,6 +3,8 @@
+ 
+ #include <linux/nfsacl.h>
+ #include <linux/nfs3.h>
++#include <linux/nfs4.h>
++#include <linux/sunrpc/sched.h>
+ 
+ /*
+  * To change the maximum rsize and wsize supported by the NFS client, adjust
+@@ -10,7 +12,7 @@
+  * support a megabyte or more.  The default is left at 4096 bytes, which is
+  * reasonable for NFS over UDP.
+  */
+-#define NFS_MAX_FILE_IO_SIZE	(1048576U)
++#define NFS_MAX_FILE_IO_SIZE	(4U * 1048576U)
+ #define NFS_DEF_FILE_IO_SIZE	(4096U)
+ #define NFS_MIN_FILE_IO_SIZE	(1024U)
+ 
+@@ -113,6 +115,10 @@ struct nfs_fsinfo {
+ 	__u32			dtpref;	/* pref. readdir transfer size */
+ 	__u64			maxfilesize;
+ 	__u32			lease_time; /* in seconds */
++#if defined(CONFIG_NFS_V4_1)
++	__u32			layouttype; /* supported pnfs layout driver */
++	__u32			blksize; /* preferred pnfs io block size */
++#endif
+ };
+ 
+ struct nfs_fsstat {
+@@ -196,8 +202,10 @@ struct nfs_openargs {
+ 	__u64                   clientid;
+ 	__u64                   id;
+ 	union {
+-		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+-		nfs4_verifier   verifier; /* EXCLUSIVE */
++		struct {
++			struct iattr *  attrs;    /* UNCHECKED, GUARDED */
++			nfs4_verifier   verifier; /* EXCLUSIVE */
++		};
+ 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
+ 		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
+ 	} u;
+@@ -313,6 +321,10 @@ struct nfs_lockt_res {
+ 	struct nfs4_sequence_res	seq_res;
+ };
+ 
++struct nfs_release_lockowner_args {
++	struct nfs_lowner	lock_owner;
++};
++
+ struct nfs4_delegreturnargs {
+ 	const struct nfs_fh *fhandle;
+ 	const nfs4_stateid *stateid;
+@@ -332,6 +344,7 @@ struct nfs4_delegreturnres {
+ struct nfs_readargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	unsigned int		pgbase;
+@@ -352,6 +365,7 @@ struct nfs_readres {
+ struct nfs_writeargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	enum nfs3_stable_how	stable;
+@@ -846,7 +860,7 @@ struct nfs4_server_caps_arg {
+ };
+ 
+ struct nfs4_server_caps_res {
+-	u32				attr_bitmask[2];
++	u32				attr_bitmask[3];
+ 	u32				acl_bitmask;
+ 	u32				has_links;
+ 	u32				has_symlinks;
+@@ -961,6 +975,27 @@ struct nfs_page;
+ 
+ #define NFS_PAGEVEC_SIZE	(8U)
+ 
++#if defined(CONFIG_NFS_V4_1)
++/* pnfsflag values */
++#define PNFS_NO_RPC		0x0001   /* non rpc result callback switch */
++
++/* pnfs-specific data needed for read, write, and commit calls */
++struct pnfs_call_data {
++	struct pnfs_layout_segment *lseg;
++	const struct rpc_call_ops *call_ops;
++	u32			orig_count;	/* for retry via MDS */
++	int			pnfs_error;
++	u8			pnfsflags;
++	u8			how;		/* for FLUSH_STABLE */
++};
++
++/* files layout-type specific data for read, write, and commit */
++struct pnfs_fl_call_data {
++	struct nfs_client	*ds_nfs_client;
++	__u64			orig_offset;
++};
++#endif /* CONFIG_NFS_V4_1 */
++
+ struct nfs_read_data {
+ 	int			flags;
+ 	struct rpc_task		task;
+@@ -976,10 +1011,16 @@ struct nfs_read_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+ struct nfs_write_data {
++	struct kref		refcount;	/* For pnfs commit splitting */
++	struct nfs_write_data	*parent;	/* For pnfs commit splitting */
+ 	int			flags;
+ 	struct rpc_task		task;
+ 	struct inode		*inode;
+@@ -995,6 +1036,10 @@ struct nfs_write_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+@@ -1008,6 +1053,7 @@ struct nfs_rpc_ops {
+ 	const struct dentry_operations *dentry_ops;
+ 	const struct inode_operations *dir_inode_ops;
+ 	const struct inode_operations *file_inode_ops;
++	const struct file_operations *file_ops;
+ 
+ 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
+ 			    struct nfs_fsinfo *);
+@@ -1072,6 +1118,7 @@ struct nfs_rpc_ops {
+ extern const struct nfs_rpc_ops	nfs_v2_clientops;
+ extern const struct nfs_rpc_ops	nfs_v3_clientops;
+ extern const struct nfs_rpc_ops	nfs_v4_clientops;
++extern const struct nfs_rpc_ops	pnfs_v4_clientops;
+ extern struct rpc_version	nfs_version2;
+ extern struct rpc_version	nfs_version3;
+ extern struct rpc_version	nfs_version4;
+diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
+@@ -0,0 +1,57 @@
++#ifndef _PANFS_SHIM_API_H
++#define _PANFS_SHIM_API_H
++
++/*
++ * imported panfs functions
++ */
++struct panfs_export_operations {
++	int (*convert_rc)(pan_status_t rc);
++
++	int (*sm_sec_t_get_size_otw)(
++		pan_sm_sec_otw_t *var,
++		pan_size_t *core_sizep,
++		pan_size_t *wire_size,
++		void *buf_end);
++
++	int (*sm_sec_t_unmarshall)(
++		pan_sm_sec_otw_t *in,
++		pan_sm_sec_t *out,
++		void *buf,
++		pan_size_t size,
++		pan_size_t *otw_consumed,
++		pan_size_t *in_core_consumed);
++
++	int (*ucreds_get)(void **ucreds_pp);
++
++	void (*ucreds_put)(void *ucreds);
++
++	int (*sam_read)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_read_args_t    *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_read_cb_t       closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_read_res_t     *res_p);
++
++	int (*sam_write)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_write_args_t   *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_write_cb_t      closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_write_res_t    *res_p);
++};
++
++extern int
++panfs_shim_register(struct panfs_export_operations *ops);
++
++extern int
++panfs_shim_unregister(void);
++
++#endif /* _PANFS_SHIM_API_H */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
+@@ -0,0 +1,440 @@
++/*
++ *  pnfs_osd_xdr.h
++ *
++ *  pNFS-osd on-the-wire data structures
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#ifndef __PNFS_OSD_XDR_H__
++#define __PNFS_OSD_XDR_H__
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/exp_xdr.h>
++#include <linux/pnfs_xdr.h>
++#include <scsi/osd_protocol.h>
++
++#define PNFS_OSD_OSDNAME_MAXSIZE 256
++
++/*
++ * START OF "GENERIC" DECODE ROUTINES.
++ *   These may look a little ugly since they are imported from a "generic"
++ * set of XDR encode/decode routines which are intended to be shared by
++ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
++ *
++ * If the pain of reading these is too great, it should be a straightforward
++ * task to translate them into Linux-specific versions which are more
++ * consistent with the style used in NFSv2/v3...
++ */
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {			\
++	(x) = (u64)ntohl(*p++) << 32;		\
++	(x) |= ntohl(*p++);			\
++} while (0)
++#define COPYMEM(x, nbytes) do {			\
++	memcpy((x), p, nbytes);			\
++	p += XDR_QUADLEN(nbytes);		\
++} while (0)
++
++/*
++ * draft-ietf-nfsv4-minorversion-22
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/* Layout Structure */
++
++enum pnfs_osd_raid_algorithm4 {
++	PNFS_OSD_RAID_0		= 1,
++	PNFS_OSD_RAID_4		= 2,
++	PNFS_OSD_RAID_5		= 3,
++	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
++};
++
++/*   struct pnfs_osd_data_map4 {
++ *       uint32_t                    odm_num_comps;
++ *       length4                     odm_stripe_unit;
++ *       uint32_t                    odm_group_width;
++ *       uint32_t                    odm_group_depth;
++ *       uint32_t                    odm_mirror_cnt;
++ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
++ *   };
++ */
++struct pnfs_osd_data_map {
++	u32	odm_num_comps;
++	u64	odm_stripe_unit;
++	u32	odm_group_width;
++	u32	odm_group_depth;
++	u32	odm_mirror_cnt;
++	u32	odm_raid_algorithm;
++};
++
++static inline int
++pnfs_osd_data_map_xdr_sz(void)
++{
++	return 1 + 2 + 1 + 1 + 1 + 1;
++}
++
++static inline size_t
++pnfs_osd_data_map_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_data_map);
++}
++
++/*   struct pnfs_osd_objid4 {
++ *       deviceid4       oid_device_id;
++ *       uint64_t        oid_partition_id;
++ *       uint64_t        oid_object_id;
++ *   };
++ */
++struct pnfs_osd_objid {
++	struct pnfs_deviceid	oid_device_id;
++	u64			oid_partition_id;
++	u64			oid_object_id;
++};
++
++/* For printout. I use "dev(%llx:%llx)", _DEVID_LO(), _DEVID_HI BE style */
++#define _DEVID_LO(oid_device_id) \
++	(unsigned long long)be64_to_cpup((__be64 *)oid_device_id.data)
++
++#define _DEVID_HI(oid_device_id) \
++	(unsigned long long)be64_to_cpup(((__be64 *)oid_device_id.data) + 1)
++
++static inline int
++pnfs_osd_objid_xdr_sz(void)
++{
++	return (NFS4_PNFS_DEVICEID4_SIZE / 4) + 2 + 2;
++}
++
++static inline size_t
++pnfs_osd_objid_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_objid);
++}
++
++enum pnfs_osd_version {
++	PNFS_OSD_MISSING              = 0,
++	PNFS_OSD_VERSION_1            = 1,
++	PNFS_OSD_VERSION_2            = 2
++};
++
++struct pnfs_osd_opaque_cred {
++	u32 cred_len;
++	u8 *cred;
++};
++
++static inline int
++pnfs_osd_opaque_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	READ32(n);
++	p += XDR_QUADLEN(n);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_opaque_cred_incore_sz(u32 *p)
++{
++	u32 n;
++
++	READ32(n);
++	return XDR_QUADLEN(n) * 4;
++}
++
++enum pnfs_osd_cap_key_sec {
++	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
++	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
++};
++
++/*   struct pnfs_osd_object_cred4 {
++ *       pnfs_osd_objid4         oc_object_id;
++ *       pnfs_osd_version4       oc_osd_version;
++ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
++ *       opaque                  oc_capability_key<>;
++ *       opaque                  oc_capability<>;
++ *   };
++ */
++struct pnfs_osd_object_cred {
++	struct pnfs_osd_objid		oc_object_id;
++	u32				oc_osd_version;
++	u32				oc_cap_key_sec;
++	struct pnfs_osd_opaque_cred	oc_cap_key;
++	struct pnfs_osd_opaque_cred	oc_cap;
++};
++
++static inline int
++pnfs_osd_object_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_object_cred_incore_sz(u32 *p)
++{
++	size_t sz = sizeof(struct pnfs_osd_object_cred);
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	return sz;
++}
++
++/*   struct pnfs_osd_layout4 {
++ *       pnfs_osd_data_map4      olo_map;
++ *       uint32_t                olo_comps_index;
++ *       pnfs_osd_object_cred4   olo_components<>;
++ *   };
++ */
++struct pnfs_osd_layout {
++	struct pnfs_osd_data_map	olo_map;
++	u32				olo_comps_index;
++	u32				olo_num_comps;
++	struct pnfs_osd_object_cred	*olo_comps;
++};
++
++static inline int
++pnfs_osd_layout_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	while ((int)(n--) > 0)
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_layout_incore_sz(u32 *p)
++{
++	u32 n;
++	size_t sz;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	sz = sizeof(struct pnfs_osd_layout);
++	while ((int)(n--) > 0) {
++		sz += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++	return sz;
++}
++
++/* Device Address */
++
++enum pnfs_osd_targetid_type {
++	OBJ_TARGET_ANON = 1,
++	OBJ_TARGET_SCSI_NAME = 2,
++	OBJ_TARGET_SCSI_DEVICE_ID = 3,
++};
++
++/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
++ *       case OBJ_TARGET_SCSI_NAME:
++ *           string              oti_scsi_name<>;
++ *
++ *       case OBJ_TARGET_SCSI_DEVICE_ID:
++ *           opaque              oti_scsi_device_id<>;
++ *
++ *       default:
++ *           void;
++ *   };
++ *
++ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
++ *       case TRUE:
++ *           netaddr4            ota_netaddr;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_deviceaddr4 {
++ *       pnfs_osd_targetid4      oda_targetid;
++ *       pnfs_osd_targetaddr4    oda_targetaddr;
++ *       uint64_t                oda_lun;
++ *       opaque                  oda_systemid<>;
++ *       pnfs_osd_object_cred4   oda_root_obj_cred;
++ *       opaque                  oda_osdname<>;
++ *   };
++ */
++struct pnfs_osd_targetid {
++	u32				oti_type;
++	struct nfs4_string		oti_scsi_device_id;
++};
++
++enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
++
++/*   struct netaddr4 {
++ *       // see struct rpcb in RFC1833
++ *       string r_netid<>;    // network id
++ *       string r_addr<>;     // universal address
++ *   };
++ */
++struct pnfs_osd_net_addr {
++	struct nfs4_string	r_netid;
++	struct nfs4_string	r_addr;
++};
++
++struct pnfs_osd_targetaddr {
++	u32				ota_available;
++	struct pnfs_osd_net_addr	ota_netaddr;
++};
++
++enum {
++	NETWORK_ID_MAX = 16 / 4,
++	UNIVERSAL_ADDRESS_MAX = 64 / 4,
++	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
++};
++
++struct pnfs_osd_deviceaddr {
++	struct pnfs_osd_targetid	oda_targetid;
++	struct pnfs_osd_targetaddr	oda_targetaddr;
++	u8				oda_lun[8];
++	struct nfs4_string		oda_systemid;
++	struct pnfs_osd_object_cred	oda_root_obj_cred;
++	struct nfs4_string		oda_osdname;
++};
++
++enum {
++	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
++	PNFS_OSD_DEVICEADDR_MAX =
++		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
++		2 /*oda_lun*/ +
++		1 + OSD_SYSTEMID_LEN +
++		1 + ODA_OSDNAME_MAX,
++};
++
++/* LAYOUTCOMMIT: layoutupdate */
++
++/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
++ *       case TRUE:
++ *           int64_t     dsu_delta;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_layoutupdate4 {
++ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
++ *       bool                        olu_ioerr_flag;
++ *   };
++ */
++struct pnfs_osd_layoutupdate {
++	u32	dsu_valid;
++	s64	dsu_delta;
++	u32	olu_ioerr_flag;
++};
++
++/* LAYOUTRETURN: I/O Rrror Report */
++
++enum pnfs_osd_errno {
++	PNFS_OSD_ERR_EIO		= 1,
++	PNFS_OSD_ERR_NOT_FOUND		= 2,
++	PNFS_OSD_ERR_NO_SPACE		= 3,
++	PNFS_OSD_ERR_BAD_CRED		= 4,
++	PNFS_OSD_ERR_NO_ACCESS		= 5,
++	PNFS_OSD_ERR_UNREACHABLE	= 6,
++	PNFS_OSD_ERR_RESOURCE		= 7
++};
++
++/*   struct pnfs_osd_ioerr4 {
++ *       pnfs_osd_objid4     oer_component;
++ *       length4             oer_comp_offset;
++ *       length4             oer_comp_length;
++ *       bool                oer_iswrite;
++ *       pnfs_osd_errno4     oer_errno;
++ *   };
++ */
++struct pnfs_osd_ioerr {
++	struct pnfs_osd_objid	oer_component;
++	u64			oer_comp_offset;
++	u64			oer_comp_length;
++	u32			oer_iswrite;
++	u32			oer_errno;
++};
++
++static inline unsigned
++pnfs_osd_ioerr_xdr_sz(void)
++{
++	return pnfs_osd_objid_xdr_sz() + 2 + 2 + 1 + 1;
++}
++
++/* OSD XDR API */
++
++/* Layout helpers */
++extern struct pnfs_osd_layout *pnfs_osd_xdr_decode_layout(
++	struct pnfs_osd_layout *layout, u32 *p);
++
++extern int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *layout);
++
++/* Device Info helpers */
++
++/* First pass calculate total size for space needed */
++extern size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p);
++
++/* Note: some strings pointed to inside @deviceaddr might point
++ * to space inside @p. @p should stay valid while @deviceaddr
++ * is in use.
++ * It is assumed that @deviceaddr points to bigger memory of size
++ * calculated in first pass by pnfs_osd_xdr_deviceaddr_incore_sz()
++ */
++extern void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p);
++
++/* For Servers */
++extern int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr);
++
++/* layoutupdate (layout_commit) xdr helpers */
++extern int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou);
++extern __be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p);
++
++/* osd_ioerror encoding/decoding (layout_return) */
++extern int
++pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr, struct pnfs_osd_ioerr *ioerr);
++extern __be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p);
++
++#endif /* __PNFS_OSD_XDR_H__ */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
+@@ -0,0 +1,134 @@
++/*
++ *  include/linux/pnfs_xdr.h
++ *
++ *  Common xdr data structures needed by pnfs client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ * Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_PNFS_XDR_H
++#define LINUX_PNFS_XDR_H
++
++#define PNFS_LAYOUT_MAXSIZE 4096
++#define NFS4_PNFS_DEVICEID4_SIZE 16
++
++struct pnfs_deviceid {
++	char data[NFS4_PNFS_DEVICEID4_SIZE];
++};
++
++struct nfs4_pnfs_layout {
++	__u32 len;
++	void *buf;
++};
++
++struct nfs4_pnfs_layout_segment {
++	u32 iomode;
++	u64 offset;
++	u64 length;
++};
++
++struct nfs4_pnfs_layoutget_arg {
++	__u32 type;
++	struct nfs4_pnfs_layout_segment lseg;
++	__u64 minlength;
++	__u32 maxcount;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_layoutget_res {
++	__u32 return_on_close;
++	struct nfs4_pnfs_layout_segment lseg;
++	__u32 type;
++	nfs4_stateid stateid;
++	struct nfs4_pnfs_layout layout;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_pnfs_layoutget {
++	struct nfs4_pnfs_layoutget_arg args;
++	struct nfs4_pnfs_layoutget_res res;
++	struct pnfs_layout_segment **lsegpp;
++	int status;
++};
++
++struct pnfs_layoutcommit_arg {
++	nfs4_stateid stateid;
++	__u64 lastbytewritten;
++	__u32 time_modify_changed;
++	struct timespec time_modify;
++	const u32 *bitmask;
++	struct nfs_fh *fh;
++	struct inode *inode;
++
++	/* Values set by layout driver */
++	struct nfs4_pnfs_layout_segment lseg;
++	__u32 layout_type;
++	void *layoutdriver_data;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct pnfs_layoutcommit_res {
++	__u32 sizechanged;
++	__u64 newsize;
++	struct nfs_fattr *fattr;
++	const struct nfs_server *server;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct pnfs_layoutcommit_data {
++	struct rpc_task task;
++	struct rpc_cred *cred;
++	struct nfs_fattr fattr;
++	struct pnfs_layoutcommit_arg args;
++	struct pnfs_layoutcommit_res res;
++	int status;
++};
++
++struct nfs4_pnfs_layoutreturn_arg {
++	__u32	reclaim;
++	__u32	layout_type;
++	__u32	return_type;
++	struct nfs4_pnfs_layout_segment lseg;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_layoutreturn_res {
++	struct nfs4_sequence_res seq_res;
++	u32 lrs_present;
++	nfs4_stateid stateid;
++};
++
++struct nfs4_pnfs_layoutreturn {
++	struct nfs4_pnfs_layoutreturn_arg args;
++	struct nfs4_pnfs_layoutreturn_res res;
++	struct rpc_cred *cred;
++	int rpc_status;
++};
++
++struct nfs4_pnfs_getdevicelist_arg {
++	const struct nfs_fh *fh;
++	u32 layoutclass;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_getdevicelist_res {
++	struct pnfs_devicelist *devlist;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_pnfs_getdeviceinfo_arg {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_getdeviceinfo_res {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_res seq_res;
++};
++
++#endif /* LINUX_PNFS_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
+--- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
+@@ -8,6 +8,7 @@
+ #ifndef __LINUX_POSIX_ACL_H
+ #define __LINUX_POSIX_ACL_H
+ 
++#include <linux/fs.h>
+ #include <linux/slab.h>
+ 
+ #define ACL_UNDEFINED_ID	(-1)
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
+@@ -14,6 +14,8 @@
+ /* size of an XDR encoding unit in bytes, i.e. 32bit */
+ #define XDR_UNIT	(4)
+ 
++#include <linux/types.h>
++
+ /* spec defines authentication flavor as an unsigned 32 bit integer */
+ typedef u32	rpc_authflavor_t;
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
+@@ -3,6 +3,7 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/fs.h>
+ #include <linux/workqueue.h>
+ 
+ struct rpc_pipe_msg {
+@@ -11,6 +12,10 @@ struct rpc_pipe_msg {
+ 	size_t len;
+ 	size_t copied;
+ 	int errno;
++#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
++#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
++#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
++	u8 flags;
+ };
+ 
+ struct rpc_pipe_ops {
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
+@@ -0,0 +1,111 @@
++/*
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#ifndef _SIMPLE_RPC_PIPEFS_H_
++#define _SIMPLE_RPC_PIPEFS_H_
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++
++
++#define payload_of(headerp)  ((void *)(headerp + 1))
++
++/*
++ * struct pipefs_hdr -- the generic message format for simple_rpc_pipefs.
++ * Messages may simply be the header itself, although having an optional
++ * data payload follow the header allows much more flexibility.
++ *
++ * Messages are created using pipefs_alloc_init_msg() and
++ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
++ * (optional) data payload.
++ *
++ * Given a struct pipefs_hdr *msg that has a struct foo payload, the data
++ * can be accessed using: struct foo *foop = payload_of(msg)
++ */
++struct pipefs_hdr {
++	u32 msgid;
++	u8  type;
++	u8  flags;
++	u16 totallen; /* length of entire message, including hdr itself */
++	u32 status;
++};
++
++/*
++ * struct pipefs_list -- a type of list used for tracking callers who've made an
++ * upcall and are blocked waiting for a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
++ */
++struct pipefs_list {
++	struct list_head list;
++	spinlock_t list_lock;
++};
++
++
++/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
++extern struct dentry *pipefs_mkpipe(const char *name,
++				    const struct rpc_pipe_ops *ops,
++				    int wait_for_open);
++extern void pipefs_closepipe(struct dentry *pipe);
++extern void pipefs_init_list(struct pipefs_list *list);
++extern struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++						void *data, u16 datalen);
++extern struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
++						       u8 flags, void *data,
++						       u16 datalen, u16 padlen);
++extern struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++							struct pipefs_hdr *msg,
++							struct pipefs_list
++							*uplist, u8 upflags,
++							u32 timeout);
++extern int pipefs_queue_upcall_noreply(struct dentry *pipe,
++				       struct pipefs_hdr *msg, u8 upflags);
++extern int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++				      struct pipefs_list *uplist);
++extern struct pipefs_hdr *pipefs_readmsg(struct file *filp,
++					 const char __user *src, size_t len);
++extern ssize_t pipefs_generic_upcall(struct file *filp,
++				     struct rpc_pipe_msg *rpcmsg,
++				     char __user *dst, size_t buflen);
++extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
++
++#endif /* _SIMPLE_RPC_PIPEFS_H_ */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
+@@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
+ 
+ 	return buf;
+ }
++
++/*
++ * Print a network address in a universal format (see rfc1833 and nfsv4.1)
++ */
++static inline int __svc_print_netaddr(struct sockaddr *addr,
++				      struct xdr_netobj *na)
++{
++	u16 port;
++	ssize_t len;
++
++	switch (addr->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *sin = (struct sockaddr_in *)addr;
++		port = ntohs(sin->sin_port);
++
++		len = snprintf(na->data, na->len, "%pI4.%u.%u",
++				&sin->sin_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
++		port = ntohs(sin6->sin6_port);
++
++		len = snprintf(na->data, na->len, "%pI6.%u.%u",
++				&sin6->sin6_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	default:
++		snprintf(na->data, na->len, "unknown address type: %d",
++			 addr->sa_family);
++		len = -EINVAL;
++		break;
++	}
++	return len;
++}
+ #endif /* SUNRPC_SVC_XPRT_H */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
+@@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
+ 	return p + 2;
+ }
+ 
++static inline __be32 *
++xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
++{
++	memcpy(ptr, p, len);
++	return p + XDR_QUADLEN(len);
++}
++
+ /*
+  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
+  */
+@@ -197,6 +204,7 @@ struct xdr_stream {
+ 
+ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
++extern __be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q);
+ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
+ 		unsigned int base, unsigned int len);
+ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
+--- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
++++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
+@@ -0,0 +1 @@
++-pnfs
+diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
+@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
+ 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+ 	    addr.o rpcb_clnt.o timer.o xdr.o \
+ 	    sunrpc_syms.o cache.o rpc_pipe.o \
+-	    svc_xprt.o
++	    svc_xprt.o simple_rpc_pipefs.o
+ sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+ sunrpc-$(CONFIG_PROC_FS) += stats.o
+ sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
+@@ -0,0 +1,424 @@
++/*
++ *  net/sunrpc/simple_rpc_pipefs.c
++ *
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#include <linux/completion.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++
++/*
++ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
++ * filesystem.
++ *
++ * If @wait_for_open is non-zero and an upcall is later queued but the userland
++ * end of the pipe has not yet been opened, the upcall will remain queued until
++ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
++ */
++struct dentry *pipefs_mkpipe(const char *name, const struct rpc_pipe_ops *ops,
++			     int wait_for_open)
++{
++	struct dentry *dir, *pipe;
++	struct vfsmount *mnt;
++
++	mnt = rpc_get_mount();
++	if (IS_ERR(mnt)) {
++		pipe = ERR_CAST(mnt);
++		goto out;
++	}
++	dir = mnt->mnt_root;
++	if (!dir) {
++		pipe = ERR_PTR(-ENOENT);
++		goto out;
++	}
++	pipe = rpc_mkpipe(dir, name, NULL, ops,
++			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
++out:
++	return pipe;
++}
++EXPORT_SYMBOL(pipefs_mkpipe);
++
++/*
++ * Shutdown a pipe made by pipefs_mkpipe().
++ * XXX: do we need to retain an extra reference on the mount?
++ */
++void pipefs_closepipe(struct dentry *pipe)
++{
++	rpc_unlink(pipe);
++	rpc_put_mount();
++}
++EXPORT_SYMBOL(pipefs_closepipe);
++
++/*
++ * Initialize a struct pipefs_list -- which are a way to keep track of callers
++ * who're blocked having made an upcall and are awaiting a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
++ * to use them.
++ */
++inline void pipefs_init_list(struct pipefs_list *list)
++{
++	INIT_LIST_HEAD(&list->list);
++	spin_lock_init(&list->list_lock);
++}
++EXPORT_SYMBOL(pipefs_init_list);
++
++/*
++ * Alloc/init a generic pipefs message header and copy into its message body
++ * an arbitrary data payload.
++ *
++ * struct pipefs_hdr's are meant to serve as generic, general-purpose message
++ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
++ * struct pipefs_hdr is assigned to a struct rpc_pipe_msg and delivered
++ * therein.  --And yes, the naming can seem a little confusing at first:
++ *
++ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
++ * struct pipefs_hdr (possibly with an attached message body).  A
++ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
++ * message is delivered and processed.
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
++					   void *data, u16 datalen, u16 padlen)
++{
++	u16 totallen;
++	struct pipefs_hdr *msg = NULL;
++
++	totallen = sizeof(*msg) + datalen + padlen;
++	if (totallen > PAGE_SIZE) {
++		msg = ERR_PTR(-E2BIG);
++		goto out;
++	}
++
++	msg = kzalloc(totallen, GFP_KERNEL);
++	if (!msg) {
++		msg = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	msg->msgid = msgid;
++	msg->type = type;
++	msg->flags = flags;
++	msg->totallen = totallen;
++	memcpy(payload_of(msg), data, datalen);
++out:
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
++
++/*
++ * See the description of pipefs_alloc_init_msg_padded().
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++				    void *data, u16 datalen)
++{
++	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
++					    datalen, 0);
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg);
++
++
++static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg,
++			       struct pipefs_hdr *msg, u8 upflags)
++{
++	memset(rpcmsg, 0, sizeof(*rpcmsg));
++	rpcmsg->data = msg;
++	rpcmsg->len = msg->totallen;
++	rpcmsg->flags = upflags;
++}
++
++static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(struct pipefs_hdr *msg,
++						     u8 upflags)
++{
++	struct rpc_pipe_msg *rpcmsg;
++
++	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
++	if (!rpcmsg)
++		return ERR_PTR(-ENOMEM);
++
++	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
++	return rpcmsg;
++}
++
++
++/* represents an upcall that'll block and wait for a reply */
++struct pipefs_upcall {
++	u32 msgid;
++	struct rpc_pipe_msg rpcmsg;
++	struct list_head list;
++	wait_queue_head_t waitq;
++	struct pipefs_hdr *reply;
++};
++
++
++static void pipefs_init_upcall_waitreply(struct pipefs_upcall *upcall,
++					 struct pipefs_hdr *msg, u8 upflags)
++{
++	upcall->reply = NULL;
++	upcall->msgid = msg->msgid;
++	INIT_LIST_HEAD(&upcall->list);
++	init_waitqueue_head(&upcall->waitq);
++	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
++}
++
++static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					   struct pipefs_upcall *upcall,
++					   struct pipefs_list *uplist,
++					   u32 timeout)
++{
++	int err = 0;
++	DECLARE_WAITQUEUE(wq, current);
++
++	add_wait_queue(&upcall->waitq, &wq);
++	spin_lock(&uplist->list_lock);
++	list_add(&upcall->list, &uplist->list);
++	spin_unlock(&uplist->list_lock);
++
++	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
++	if (err < 0)
++		goto out;
++
++	if (timeout) {
++		/* retval of 0 means timer expired */
++		err = schedule_timeout_uninterruptible(timeout);
++		if (err == 0 && upcall->reply == NULL)
++			err = -ETIMEDOUT;
++	} else {
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule();
++		__set_current_state(TASK_RUNNING);
++	}
++
++out:
++	spin_lock(&uplist->list_lock);
++	list_del_init(&upcall->list);
++	spin_unlock(&uplist->list_lock);
++	remove_wait_queue(&upcall->waitq, &wq);
++	return err;
++}
++
++/*
++ * Queue a pipefs msg for an upcall to userspace, place the calling thread
++ * on @uplist, and block the thread to wait for a reply.  If @timeout is
++ * nonzero, the thread will be blocked for at most @timeout jiffies.
++ *
++ * (To convert time units into jiffies, consider the functions
++ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
++ *  timespec_to_jiffies().)
++ *
++ * Once a reply is received by your downcall handler, call
++ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
++ * assign the reply, and wake the waiting thread.
++ *
++ * This function's return value pointer may be an error and should be checked
++ * with IS_ERR() before attempting to access the reply message.
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					    struct pipefs_hdr *msg,
++					    struct pipefs_list *uplist,
++					    u8 upflags, u32 timeout)
++{
++	int err = 0;
++	struct pipefs_upcall upcall;
++
++	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
++	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
++	if (err < 0) {
++		kfree(upcall.reply);
++		upcall.reply = ERR_PTR(err);
++	}
++
++	return upcall.reply;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
++
++/*
++ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
++ * no reply is expected).
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++int pipefs_queue_upcall_noreply(struct dentry *pipe, struct pipefs_hdr *msg,
++				u8 upflags)
++{
++	int err = 0;
++	struct rpc_pipe_msg *rpcmsg;
++
++	upflags |= PIPEFS_AUTOFREE_RPCMSG;
++	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
++	if (IS_ERR(rpcmsg)) {
++		err = PTR_ERR(rpcmsg);
++		goto out;
++	}
++	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
++
++
++static struct pipefs_upcall *pipefs_find_upcall_msgid(u32 msgid,
++						 struct pipefs_list *uplist)
++{
++	struct pipefs_upcall *upcall;
++
++	spin_lock(&uplist->list_lock);
++	list_for_each_entry(upcall, &uplist->list, list)
++		if (upcall->msgid == msgid)
++			goto out;
++	upcall = NULL;
++out:
++	spin_unlock(&uplist->list_lock);
++	return upcall;
++}
++
++/*
++ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
++ * message and have determined that it is a reply to a waiting upcall,
++ * you can use this function to find the appropriate upcall, assign the result,
++ * and wake the upcall thread.
++ *
++ * The reply message must have the same msgid as the original upcall message's.
++ *
++ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
++ */
++int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++			       struct pipefs_list *uplist)
++{
++	int err = 0;
++	struct pipefs_upcall *upcall;
++
++	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
++	if (!upcall) {
++		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
++			"for msgid %d\n", __func__, reply->msgid);
++		err = -ENOENT;
++		goto out;
++	}
++	upcall->reply = reply;
++	wake_up(&upcall->waitq);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_assign_upcall_reply);
++
++/*
++ * Generic method to read-in and return a newly-allocated message which begins
++ * with a struct pipefs_hdr.
++ */
++struct pipefs_hdr *pipefs_readmsg(struct file *filp, const char __user *src,
++			     size_t len)
++{
++	int err = 0, hdrsize;
++	struct pipefs_hdr *msg = NULL;
++
++	hdrsize = sizeof(*msg);
++	if (len < hdrsize) {
++		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
++		       __func__, (int) len, hdrsize);
++		err = -EINVAL;
++		goto out;
++	}
++
++	msg = kzalloc(len, GFP_KERNEL);
++	if (!msg) {
++		err = -ENOMEM;
++		goto out;
++	}
++	if (copy_from_user(msg, src, len))
++		err = -EFAULT;
++out:
++	if (err) {
++		kfree(msg);
++		msg = ERR_PTR(err);
++	}
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_readmsg);
++
++/*
++ * Generic rpc_pipe_ops->upcall() handler implementation.
++ *
++ * Don't call this directly: to make an upcall, use
++ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
++ */
++ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
++			      char __user *dst, size_t buflen)
++{
++	char *data;
++	ssize_t len, left;
++
++	data = (char *)rpcmsg->data + rpcmsg->copied;
++	len = rpcmsg->len - rpcmsg->copied;
++	if (len > buflen)
++		len = buflen;
++
++	left = copy_to_user(dst, data, len);
++	if (left < 0) {
++		rpcmsg->errno = left;
++		return left;
++	}
++
++	len -= left;
++	rpcmsg->copied += len;
++	rpcmsg->errno = 0;
++	return len;
++}
++EXPORT_SYMBOL(pipefs_generic_upcall);
++
++/*
++ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
++ *
++ * Items are only freed if @rpcmsg->flags has been set appropriately.
++ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
++ */
++void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
++{
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
++		kfree(rpcmsg->data);
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
++		kfree(rpcmsg);
++}
++EXPORT_SYMBOL(pipefs_generic_destroy_msg);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
+@@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
+ 
+ 	/* Shift the tail first */
+ 	if (tail->iov_len != 0) {
+-		p = (char *)tail->iov_base + len;
+-		if (tail->iov_len > len) {
+-			copy = tail->iov_len - len;
+-			memmove(p, tail->iov_base, copy);
+-		} else
+-			buf->buflen -= len;
+-		/* Copy from the inlined pages into the tail */
+ 		copy = len;
+-		if (copy > tail->iov_len)
++		if (tail->iov_len > len) {
++			p = (char *)tail->iov_base + len;
++			memmove(p, tail->iov_base, tail->iov_len - len);
++		} else {
+ 			copy = tail->iov_len;
++		}
++		/* Copy from the inlined pages into the tail */
+ 		_copy_from_pages((char *)tail->iov_base,
+ 				buf->pages, buf->page_base + pglen - len,
+ 				copy);
+@@ -496,6 +494,27 @@ __be32 * xdr_reserve_space(struct xdr_st
+ EXPORT_SYMBOL_GPL(xdr_reserve_space);
+ 
+ /**
++ * xdr_rewind_stream - rewind a stream back to some checkpoint
++ * @xdr: pointer to xdr_stream
++ * @q: some checkpoint at historical place of @xdr
++ *
++ * Restors an xdr stream to some historical point. @q must be
++ * a logical xdr point in the past that was sampled by @q = @xdr->p.
++ */
++__be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q)
++{
++	size_t nbytes = (xdr->p - q) << 2;
++
++	BUG_ON(xdr->p < q);
++	BUG_ON(nbytes > xdr->iov->iov_len || nbytes > xdr->buf->len);
++	xdr->p = q;
++	xdr->iov->iov_len -= nbytes;
++	xdr->buf->len -= nbytes;
++	return q;
++}
++EXPORT_SYMBOL_GPL(xdr_rewind_stream);
++
++/**
+  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
+  * @xdr: pointer to xdr_stream
+  * @pages: list of pages

From b07c836a880dbed3d7509ad31b42a05c8270cac3 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 14:15:46 -0400
Subject: [PATCH 06/20] Fixed a couple compile errors in the server code.

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 nfsd-35-fc.patch | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
index ef99b4995..2825464af 100644
--- a/nfsd-35-fc.patch
+++ b/nfsd-35-fc.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
 --- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 14:12:24.165356789 -0400
 @@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
     | READ                 | REQ        |              | Section 18.22  |
     | READDIR              | REQ        |              | Section 18.23  |
@@ -12,7 +12,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig
     | RENAME               | REQ        |              | Section 18.26  |
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
 --- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 14:12:24.519356675 -0400
 @@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
  	.alloc		= expkey_alloc,
  };
@@ -108,7 +108,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  out_put_clp:
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 14:12:52.625429773 -0400
 @@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
  					cb_sequence_dec_sz +            \
  					op_dec_sz)
@@ -211,7 +211,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  	int status;
  
 -	status = rpc_call_async(cb->cb_client, &msg,
-+	status = rpc_call_async(cb->cl_cb_client, &msg,
++	status = rpc_call_async(clp->cl_cb_client, &msg,
  				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
  				&nfsd4_cb_probe_ops, (void *)clp);
 -	if (status) {
@@ -402,7 +402,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  }
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 14:12:25.698356909 -0400
 @@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
  static const char *nfsd4_op_name(unsigned opnum);
  
@@ -490,7 +490,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 14:12:25.700356284 -0400
 @@ -45,8 +45,8 @@
  #define NFSDDBG_FACILITY                NFSDDBG_PROC
  
@@ -1280,9 +1280,21 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
 -{
 -	user_lease_time = leasetime;
 -}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-23 14:14:22.882428704 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 14:14:33.418376589 -0400
+@@ -1900,7 +1900,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ 	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+ 		if ((buflen -= 4) < 0)
+ 			goto out_resource;
+-		WRITE32(NFSD_LEASE_TIME);
++		WRITE32(nfsd4_lease);
+ 	}
+ 	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
+ 		if ((buflen -= 4) < 0)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 14:12:25.821359224 -0400
 @@ -46,6 +46,7 @@ enum {
  	 */
  #ifdef CONFIG_NFSD_V4
@@ -1403,7 +1415,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  		/* last one */ {""}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 14:12:25.835418441 -0400
 @@ -82,7 +82,6 @@ int nfs4_state_init(void);
  void nfsd4_free_slabs(void);
  int nfs4_state_start(void);
@@ -1440,7 +1452,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  /*
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
 --- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 14:12:25.836366516 -0400
 @@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
  	struct nfs4_client	*cbs_clp;
  };
@@ -1558,7 +1570,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
  nfs4_put_stateowner(struct nfs4_stateowner *so)
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
 --- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 14:12:25.837387292 -0400
 @@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
  	struct nfs4_sessionid	sessionid;
  };
@@ -1600,7 +1612,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 14:12:25.838377224 -0400
 @@ -40,12 +40,12 @@ struct nfs_fhbase_old {
   * This is the new flexible, extensible style NFSv2/v3 file handle.
   * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
@@ -1619,7 +1631,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch
   * This might allow a file to be confirmed to be in a writable part of a
 diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
 --- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
++++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 14:12:25.839376838 -0400
 @@ -49,11 +49,17 @@ static void cache_init(struct cache_head
  	h->last_refresh = now;
  }
@@ -1686,7 +1698,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sun
  		/* entry is valid */
 diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
 --- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 14:12:25.840384371 -0400
 @@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
  			dprintk("svc: recvfrom returned error %d\n", -err);
  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1753,7 +1765,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/s
  error:
 diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
 --- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 14:12:25.841371223 -0400
 @@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
  		if (rqstp->rq_deferred) {
  			svc_xprt_received(xprt);
@@ -1782,7 +1794,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/
  void svc_close_xprt(struct svc_xprt *xprt)
 diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
 --- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 14:12:25.842376584 -0400
 @@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
  		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
  		rqstp->rq_arg.head[0].iov_len);

From 2121c4cc7dfeed915de92d51ac364c76bf4b5b6e Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 24 Aug 2010 14:49:23 -0400
Subject: [PATCH 07/20] Removed the localversion-pnfs file from the pnfs patch

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |   2 +-
 pnfs-all-2.6.35-2010-08-19-f13.patch | 395 +++++++++++++--------------
 2 files changed, 196 insertions(+), 201 deletions(-)

diff --git a/kernel.spec b/kernel.spec
index 14956777b..4fb3481f3 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs_all_2.6.35_2010_08_19
+%define buildid .pnfs34.2010.08.19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
index a9d78ba0e..10df9b15c 100644
--- a/pnfs-all-2.6.35-2010-08-19-f13.patch
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
---- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
-+++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-24 14:14:03.643355000 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-24 14:17:48.415730000 -0400
 @@ -13,6 +13,7 @@
  #include <sys/stat.h>
  #include <sys/mman.h>
@@ -11,7 +11,7 @@ diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arc
  #include "os.h"
 diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
 --- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-24 14:17:48.421730000 -0400
 @@ -1009,6 +1009,7 @@ static void disk_release(struct device *
  struct class block_class = {
  	.name		= "block",
@@ -21,8 +21,8 @@ diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.
  static char *block_devnode(struct device *dev, mode_t *mode)
  {
 diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
---- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-24 14:17:48.423729000 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-24 14:17:48.425730000 -0400
 @@ -0,0 +1,211 @@
 +(c) 2007 Network Appliance Inc.
 +
@@ -237,7 +237,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.
 +
 diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
 --- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-24 14:17:48.430730000 -0400
 @@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
  	return r;
  }
@@ -292,7 +292,7 @@ diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/driv
  	int r;
 diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
 --- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-24 14:17:48.435733000 -0400
 @@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
  	put_device(&class_to_shost(dev)->shost_gendev);
  }
@@ -304,7 +304,7 @@ diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drive
  };
 diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
 --- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-24 14:17:48.440733000 -0400
 @@ -36,13 +36,9 @@
  #include <linux/fs.h>
  #include <linux/time.h>
@@ -360,8 +360,8 @@ diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/
 +
  #endif
 diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
---- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
-+++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-24 14:17:48.444731000 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-24 14:17:48.446730000 -0400
 @@ -0,0 +1,396 @@
 +/*
 + * export.c - Implementation of the pnfs_export_operations
@@ -761,7 +761,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs
 +}
 diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
 --- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-24 14:17:48.452730000 -0400
 @@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
  	if (unlikely(wait_obj_created(oi)))
  		goto fail;
@@ -781,7 +781,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/
   * Fill in an inode read from the OSD and set it up for use
 diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
 --- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-24 14:17:48.457733000 -0400
 @@ -13,4 +13,5 @@
  #
  
@@ -790,7 +790,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/K
  obj-$(CONFIG_EXOFS_FS) += exofs.o
 diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
 --- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-24 14:17:48.462739000 -0400
 @@ -1,6 +1,7 @@
  config EXOFS_FS
  	tristate "exofs: OSD based file system support"
@@ -801,7 +801,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/
  	  as its backing storage.
 diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
 --- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-24 14:17:48.468730000 -0400
 @@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
  	sb->s_fs_info = sbi;
  	sb->s_op = &exofs_sops;
@@ -812,7 +812,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/
  		EXOFS_ERR("ERROR: exofs_iget failed\n");
 diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
 --- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-24 14:17:48.473730000 -0400
 @@ -16,6 +16,13 @@
  #include <linux/namei.h>
  #include <linux/sched.h>
@@ -829,7 +829,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exp
  
 diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
 --- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-24 14:17:48.478733000 -0400
 @@ -3,4 +3,7 @@
  
  obj-$(CONFIG_EXPORTFS) += exportfs.o
@@ -840,8 +840,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/ex
 +exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
 +exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-24 14:17:48.482731000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-24 14:17:48.484734000 -0400
 @@ -0,0 +1,158 @@
 +/*
 + *  linux/fs/nfsd/nfs4blocklayoutxdr.c
@@ -1002,8 +1002,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.
 +}
 +EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-24 14:17:48.487733000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-24 14:17:48.489734000 -0400
 @@ -0,0 +1,218 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -1224,8 +1224,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.n
 +}
 +EXPORT_SYMBOL(filelayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
---- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-24 14:17:48.493729000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-24 14:17:48.494735000 -0400
 @@ -0,0 +1,289 @@
 +/*
 + *  pnfs_osd_xdr_enc.c
@@ -1518,7 +1518,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.no
 +EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
 diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
 --- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-24 14:17:48.499730000 -0400
 @@ -19,6 +19,7 @@
  #include <linux/gfs2_ondisk.h>
  #include <linux/slow-work.h>
@@ -1539,7 +1539,7 @@ diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gf
  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
 --- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-24 14:17:48.505733000 -0400
 @@ -224,6 +224,31 @@ config LOCKD_V4
  config EXPORTFS
  	tristate
@@ -1573,8 +1573,8 @@ diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
  	tristate
  	select FS_POSIX_ACL
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-24 14:17:48.509734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-24 14:17:48.511732000 -0400
 @@ -0,0 +1,66 @@
 +#include <linux/module.h>
 +#include <linux/uaccess.h>
@@ -1643,8 +1643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.or
 +	return;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-24 14:17:48.514733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-24 14:17:48.516731000 -0400
 @@ -0,0 +1,1160 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.c
@@ -2807,8 +2807,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.
 +module_init(nfs4blocklayout_init);
 +module_exit(nfs4blocklayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-24 14:17:48.519731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-24 14:17:48.521730000 -0400
 @@ -0,0 +1,335 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdev.c
@@ -3146,8 +3146,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.
 +	goto out;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-24 14:17:48.523733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-24 14:17:48.525730000 -0400
 @@ -0,0 +1,120 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdm.c
@@ -3270,8 +3270,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.3
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-24 14:17:48.528729000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-24 14:17:48.529735000 -0400
 @@ -0,0 +1,303 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -3577,8 +3577,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.
 +
 +#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-24 14:17:48.532731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-24 14:17:48.534734000 -0400
 @@ -0,0 +1,948 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -4529,8 +4529,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noar
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
---- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-24 14:17:48.537729000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-24 14:17:48.538739000 -0400
 @@ -0,0 +1,6 @@
 +#
 +# Makefile for the pNFS block layout driver kernel module
@@ -4540,7 +4540,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarc
 +			extents.o block-device-discovery-pipe.o
 diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
 --- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-24 14:17:48.544730000 -0400
 @@ -8,6 +8,8 @@
  #ifndef __LINUX_FS_NFS_CALLBACK_H
  #define __LINUX_FS_NFS_CALLBACK_H
@@ -4613,7 +4613,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/c
  extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
 --- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-24 14:17:48.562731000 -0400
 @@ -8,10 +8,15 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -5096,7 +5096,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/
  	return status;
 diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
 --- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-24 14:17:48.568730000 -0400
 @@ -22,6 +22,8 @@
  #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
  
@@ -5298,8 +5298,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/n
  		.process_op = (callback_process_op_t)nfs4_callback_sequence,
  		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
---- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-24 14:14:13.062705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-24 14:17:48.575730000 -0400
 @@ -39,6 +39,7 @@
  #include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
@@ -5508,8 +5508,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/cli
  		goto error;
  
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
---- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-24 14:17:48.578729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-24 14:17:48.579735000 -0400
 @@ -0,0 +1,292 @@
 +#if defined(CONFIG_SPNFS_BLOCK)
 +
@@ -5804,8 +5804,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +}
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
---- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-24 14:17:48.584729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-24 14:17:48.586730000 -0400
 @@ -0,0 +1,1672 @@
 +/*
 + *  bl_ops.c
@@ -7480,8 +7480,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
---- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-24 14:14:13.068705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-24 14:17:48.592730000 -0400
 @@ -104,7 +104,8 @@ again:
  			continue;
  		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
@@ -7558,7 +7558,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs
  	rcu_read_unlock();
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
 --- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-24 14:17:48.597733000 -0400
 @@ -34,9 +34,7 @@ enum {
  int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
  void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -7571,8 +7571,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs
  
  struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
---- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-24 14:14:13.612707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-24 14:17:48.604730000 -0400
 @@ -17,11 +17,19 @@
  #include <linux/module.h>
  #include <linux/exportfs.h>
@@ -7750,7 +7750,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  	svcauth_unix_purge();
 diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
 --- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-24 14:17:48.610730000 -0400
 @@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
  	.rpc_release = nfs_direct_read_release,
  };
@@ -7996,7 +7996,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/dir
  		user_addr += bytes;
 diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
 --- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-24 14:17:48.616730000 -0400
 @@ -79,3 +79,52 @@ config NFSD_V4
  	  available from http://linux-nfs.org/.
  
@@ -8052,7 +8052,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kc
 +	  If unsure, say N.
 diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
 --- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-24 14:17:48.621733000 -0400
 @@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
@@ -8062,8 +8062,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/M
 +nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
 +nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-24 14:14:13.618705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-24 14:17:48.628730000 -0400
 @@ -40,7 +40,6 @@
  
  #define NFSPROC4_CB_NULL 0
@@ -8603,8 +8603,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
 +}
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-24 14:17:48.633729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-24 14:17:48.641730000 -0400
 @@ -0,0 +1,1679 @@
 +/******************************************************************************
 + *
@@ -10286,8 +10286,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfs
 +	return status;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-24 14:17:48.645731000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-24 14:17:48.647730000 -0400
 @@ -0,0 +1,461 @@
 +/******************************************************************************
 + *
@@ -10751,8 +10751,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/n
 +};
 +EXPORT_SYMBOL(pnfs_dlm_export_ops);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-24 14:17:48.651729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-24 14:17:48.652735000 -0400
 @@ -0,0 +1,620 @@
 +/*
 +*  linux/fs/nfsd/nfs4pnfsds.c
@@ -11375,8 +11375,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nf
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-24 14:14:13.623707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-24 14:17:48.658733000 -0400
 @@ -34,10 +34,14 @@
   */
  #include <linux/file.h>
@@ -11851,8 +11851,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-24 14:14:13.632707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-24 14:17:48.667732000 -0400
 @@ -42,6 +42,8 @@
  #include "xdr4.h"
  #include "vfs.h"
@@ -12368,8 +12368,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
  }
  
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-24 14:14:13.639707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-24 14:17:48.675730000 -0400
 @@ -47,9 +47,14 @@
  #include <linux/nfsd_idmap.h>
  #include <linux/nfs4_acl.h>
@@ -12988,8 +12988,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
  	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
  	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
---- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-24 14:14:13.645705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-24 14:17:48.681730000 -0400
 @@ -13,10 +13,15 @@
  #include <linux/nfsd/syscall.h>
  #include <linux/lockd/lockd.h>
@@ -13166,8 +13166,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  	remove_proc_entry("fs/nfs/exports", NULL);
  	remove_proc_entry("fs/nfs", NULL);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
---- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-24 14:14:13.651705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-24 14:17:48.687730000 -0400
 @@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
  #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
  	NFSD4_SUPPORTED_ATTRS_WORD0
@@ -13189,7 +13189,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-24 14:17:48.693730000 -0400
 @@ -10,6 +10,7 @@
  #include <linux/exportfs.h>
  
@@ -13227,7 +13227,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nf
  		__u32 tfh[2];
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-24 14:17:48.698733000 -0400
 @@ -14,6 +14,7 @@ enum nfsd_fsid {
  	FSID_UUID8,
  	FSID_UUID16,
@@ -13280,8 +13280,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nf
 +
  #endif /* _LINUX_NFSD_FH_INT_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
---- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-24 14:14:06.365163000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-24 14:17:48.704731000 -0400
 @@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
  
  };
@@ -13292,8 +13292,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/n
  int nfsd_vers(int vers, enum vers_op change)
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
---- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-24 14:17:48.708729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-24 14:17:48.710730000 -0400
 @@ -0,0 +1,143 @@
 +/*
 + *  Copyright (c) 2005 The Regents of the University of Michigan.
@@ -13439,8 +13439,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pn
 +
 +#endif /* LINUX_NFSD_PNFSD_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
---- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-24 14:17:48.713731000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-24 14:17:48.715730000 -0400
 @@ -0,0 +1,225 @@
 +/*
 + * linux/fs/nfsd/pnfs_lexp.c
@@ -13668,8 +13668,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nf
 +	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-24 14:17:48.719729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-24 14:17:48.720735000 -0400
 @@ -0,0 +1,535 @@
 +/*
 + * fs/nfsd/spnfs_com.c
@@ -14207,8 +14207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfs
 +}
 +#endif /* CONFIG_PROC_FS */
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-24 14:17:48.724733000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-24 14:17:48.726730000 -0400
 @@ -0,0 +1,878 @@
 +/*
 + * fs/nfsd/spnfs_ops.c
@@ -15089,8 +15089,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfs
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
---- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-24 14:14:13.656705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-24 14:17:48.731738000 -0400
 @@ -242,6 +242,12 @@ struct nfs4_client {
  	u32			cl_cb_seq_nr;
  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
@@ -15207,8 +15207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
 +
  #endif   /* NFSD4_STATE_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
---- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-24 14:14:06.371160000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-24 14:17:48.737742000 -0400
 @@ -37,7 +37,12 @@
  #ifdef CONFIG_NFSD_V4
  #include <linux/nfs4_acl.h>
@@ -15335,8 +15335,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.
  out_nfserr:
  	err = nfserrno(host_err);
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
---- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-24 14:14:13.661705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-24 14:17:48.743747000 -0400
 @@ -37,6 +37,8 @@
  #ifndef _LINUX_NFSD_XDR4_H
  #define _LINUX_NFSD_XDR4_H
@@ -15413,8 +15413,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  	struct nfs4_replay *			replay;
  };
 diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
---- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
-+++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-24 14:14:13.079708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-24 14:17:48.749746000 -0400
 @@ -28,6 +28,7 @@
  #include <linux/aio.h>
  #include <linux/gfp.h>
@@ -15540,8 +15540,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.
  	if (!ret)
  		return VM_FAULT_LOCKED;
 diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
---- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-24 14:14:13.095705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-24 14:17:48.757730000 -0400
 @@ -48,6 +48,7 @@
  #include "internal.h"
  #include "fscache.h"
@@ -15755,8 +15755,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inod
  	nfs_fs_proc_exit();
  	nfsiod_stop();
 diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
---- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-24 14:14:13.100708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-24 14:17:48.763734000 -0400
 @@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
  					   struct nfs_fattr *);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
@@ -15817,7 +15817,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/i
  		struct page *, struct page *);
 diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
 --- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-24 14:17:48.769730000 -0400
 @@ -79,10 +79,48 @@ config NFS_V4_1
  	depends on NFS_V4 && EXPERIMENTAL
  	help
@@ -15870,7 +15870,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kcon
  	depends on NFS_FS=y && IP_PNP
 diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
 --- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-24 14:17:48.774730000 -0400
 @@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
  			   delegation.o idmap.o \
  			   callback.o callback_xdr.o callback_proc.o \
@@ -15885,8 +15885,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Mak
 +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 +obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-24 14:14:13.119708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-24 14:17:48.780730000 -0400
 @@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
  	.dentry_ops	= &nfs_dentry_operations,
  	.dir_inode_ops	= &nfs3_dir_inode_operations,
@@ -15896,8 +15896,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.getattr	= nfs3_proc_getattr,
  	.setattr	= nfs3_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-24 14:17:48.784731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-24 14:17:48.786730000 -0400
 @@ -0,0 +1,765 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayout.c
@@ -16665,8 +16665,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs
 +module_init(nfs4filelayout_init);
 +module_exit(nfs4filelayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-24 14:17:48.790731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-24 14:17:48.792730000 -0400
 @@ -0,0 +1,636 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayoutdev.c
@@ -17305,8 +17305,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch
 +}
 +
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-24 14:17:48.795731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-24 14:17:48.796742000 -0400
 @@ -0,0 +1,97 @@
 +/*
 + *  pnfs_nfs4filelayout.h
@@ -17406,8 +17406,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs
 +
 +#endif /* FS_NFS_NFS4FILELAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
---- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-24 14:14:13.130705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-24 14:17:48.802730000 -0400
 @@ -45,8 +45,28 @@ enum nfs4_client_state {
  	NFS4CLNT_RECLAIM_NOGRACE,
  	NFS4CLNT_DELEGRETURN,
@@ -17556,8 +17556,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nf
  
  /* nfs4xdr.c */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-24 14:14:13.143709000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-24 14:17:48.811734000 -0400
 @@ -49,12 +49,15 @@
  #include <linux/mount.h>
  #include <linux/module.h>
@@ -19223,7 +19223,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.setattr	= nfs4_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
 --- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-24 14:17:48.818733000 -0400
 @@ -54,17 +54,17 @@
  void
  nfs4_renew_state(struct work_struct *work)
@@ -19246,8 +19246,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs
  	spin_lock(&clp->cl_lock);
  	lease = clp->cl_lease_time;
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
---- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-24 14:14:13.150705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-24 14:17:48.825730000 -0400
 @@ -53,6 +53,9 @@
  #include "callback.h"
  #include "delegation.h"
@@ -19566,8 +19566,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/
  			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
  			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-24 14:14:13.159705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-24 14:17:48.834738000 -0400
 @@ -50,8 +50,11 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -21078,8 +21078,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nf
  };
  
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
---- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-24 14:17:48.839734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-24 14:17:48.840742000 -0400
 @@ -0,0 +1,11 @@
 +#
 +# Makefile for the pNFS Objects Layout Driver kernel module
@@ -21093,8 +21093,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs
 +panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
 +obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-24 14:17:48.843735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-24 14:17:48.845739000 -0400
 @@ -0,0 +1,1087 @@
 +/*
 + *  objio_osd.c
@@ -22184,8 +22184,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noar
 +module_init(objlayout_init);
 +module_exit(objlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-24 14:17:48.848735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-24 14:17:48.851730000 -0400
 @@ -0,0 +1,790 @@
 +/*
 + *  objlayout.c
@@ -22978,8 +22978,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noar
 +	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
 +};
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-24 14:17:48.852735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-24 14:17:48.854746000 -0400
 @@ -0,0 +1,171 @@
 +/*
 + *  objlayout.h
@@ -23153,8 +23153,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noar
 +
 +#endif /* _OBJLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-24 14:17:48.857735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-24 14:17:48.860740000 -0400
 @@ -0,0 +1,734 @@
 +/*
 + *  panfs_shim.c
@@ -23891,8 +23891,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noa
 +module_init(panlayout_init);
 +module_exit(panlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-24 14:17:48.863734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-24 14:17:48.864730000 -0400
 @@ -0,0 +1,482 @@
 +/*
 + *  panfs_shim.h
@@ -24377,8 +24377,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noa
 +
 +#endif /* _PANLAYOUT_PANFS_SHIM_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-24 14:17:48.868731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-24 14:17:48.869739000 -0400
 @@ -0,0 +1,435 @@
 +/*
 + *  pnfs_osd_xdr.c
@@ -24816,8 +24816,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
---- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-24 14:14:13.169705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-24 14:17:48.875733000 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  
@@ -24940,8 +24940,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/p
  				if (res == INT_MAX)
  					goto out;
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
---- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-24 14:17:48.880733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-24 14:17:48.883730000 -0400
 @@ -0,0 +1,2027 @@
 +/*
 + *  linux/fs/nfs/pnfs.c
@@ -26971,8 +26971,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +}
 +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
---- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-24 14:17:48.886733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-24 14:17:48.887735000 -0400
 @@ -0,0 +1,355 @@
 +/*
 + *  fs/nfs/pnfs.h
@@ -27330,8 +27330,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +
 +#endif /* FS_NFS_PNFS_H */
 diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
---- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-24 14:14:13.174707000 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-24 14:17:48.893730000 -0400
 @@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
  	fattr = nfs_alloc_fattr();
  	status = -ENOMEM;
@@ -27359,8 +27359,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.
  	.getattr	= nfs_proc_getattr,
  	.setattr	= nfs_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
---- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-24 14:14:13.179708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-24 14:17:48.899733000 -0400
 @@ -18,8 +18,12 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
@@ -27575,8 +27575,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.
  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
  read_complete:
 diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
---- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
-+++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-24 14:14:13.186707000 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-24 14:17:48.907729000 -0400
 @@ -64,6 +64,7 @@
  #include "iostat.h"
  #include "internal.h"
@@ -27624,8 +27624,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/supe
  #endif
  
 diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
---- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
-+++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-24 14:14:13.192705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-24 14:17:48.913730000 -0400
 @@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
  	struct nfs_unlinkdata *data = calldata;
  	struct nfs_server *server = NFS_SERVER(data->dir);
@@ -27636,8 +27636,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unl
  		return;
  	rpc_call_start(task);
 diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
---- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
-+++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-24 14:14:06.360160000 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-24 14:17:48.921712000 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  #include <linux/nfs_page.h>
@@ -28326,7 +28326,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/writ
  int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
 --- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-24 14:17:48.933713000 -0400
 @@ -2,6 +2,7 @@
  #define LINUX_EXPORTFS_H 1
  
@@ -28399,8 +28399,8 @@ diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/i
 +#endif /* CONFIG_PNFSD */
  #endif /* LINUX_EXPORTFS_H */
 diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
---- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
-+++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-24 14:17:48.945690000 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-24 14:17:48.946693000 -0400
 @@ -0,0 +1,141 @@
 +#ifndef _LINUX_EXP_XDR_H
 +#define _LINUX_EXP_XDR_H
@@ -28544,8 +28544,8 @@ diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/in
 +}
 +#endif /* _LINUX_EXP_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
---- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
-+++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-24 14:14:13.014707000 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-24 14:17:48.961675000 -0400
 @@ -387,6 +387,7 @@ struct inodes_stat_t {
  #include <asm/byteorder.h>
  
@@ -28564,7 +28564,7 @@ diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include
  	struct dentry		*s_root;
 diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
 --- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-24 14:17:48.974681000 -0400
 @@ -17,7 +17,10 @@
  
  #define NFS4_BITMAP_SIZE	2
@@ -28694,8 +28694,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/inclu
  #endif
  
 diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-24 14:17:48.986670000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-24 14:17:48.989666000 -0400
 @@ -0,0 +1,330 @@
 +/*
 + *  include/linux/nfs4_pnfs.h
@@ -29028,8 +29028,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/
 +
 +#endif /* LINUX_NFS4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
---- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-24 14:17:48.998668000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-24 14:17:49.000665000 -0400
 @@ -0,0 +1,101 @@
 +#ifndef NFSD4_BLOCK
 +#define NFSD4_BLOCK
@@ -29133,8 +29133,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarc
 +#endif /* NFSD4_BLOCK */
 +
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-24 14:17:49.012664000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-24 14:17:49.013671000 -0400
 @@ -0,0 +1,345 @@
 +/*
 + * include/linux/nfsd4_spnfs.h
@@ -29483,7 +29483,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarc
 +#endif /* NFS_SPNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
 --- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-24 14:17:49.018668000 -0400
 @@ -29,6 +29,7 @@
  #ifdef __KERNEL__
  
@@ -29494,7 +29494,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch
   * Largest number of bytes we need to allocate for an NFS
 diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
 --- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-24 14:17:49.024673000 -0400
 @@ -32,6 +32,8 @@
  #define NFSDDBG_REPCACHE	0x0080
  #define NFSDDBG_XDR		0x0100
@@ -29506,7 +29506,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
 --- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-24 14:17:49.030665000 -0400
 @@ -100,6 +100,7 @@ struct svc_export {
  	uid_t			ex_anon_uid;
  	gid_t			ex_anon_gid;
@@ -29516,8 +29516,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarc
  	struct nfsd4_fs_locations ex_fslocs;
  	int			ex_nflavors;
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-24 14:17:49.033666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-24 14:17:49.034665000 -0400
 @@ -0,0 +1,132 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29652,8 +29652,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.3
 +
 +#endif /* NFSD_NFS4LAYOUTXDR_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-24 14:17:49.037666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-24 14:17:49.039665000 -0400
 @@ -0,0 +1,54 @@
 +/******************************************************************************
 + *
@@ -29710,8 +29710,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-24 14:17:49.042666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-24 14:17:49.044665000 -0400
 @@ -0,0 +1,271 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29986,7 +29986,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.n
 +#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
 --- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-24 14:17:49.049665000 -0400
 @@ -29,6 +29,7 @@
  /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
  #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
@@ -30024,8 +30024,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noar
  
  union nfsctl_res {
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
---- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-24 14:14:13.201710000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-24 14:17:49.063666000 -0400
 @@ -72,13 +72,20 @@ struct nfs_access_entry {
  	int			mask;
  };
@@ -30124,8 +30124,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/inc
  
  #ifdef __KERNEL__
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
---- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-24 14:14:13.206708000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-24 14:17:49.077665000 -0400
 @@ -15,6 +15,7 @@ struct nlm_host;
  struct nfs4_sequence_args;
  struct nfs4_sequence_res;
@@ -30200,7 +30200,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/
  	atomic_t active; /* Keep trace of any activity to this server */
 diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
 --- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-24 14:17:49.089668000 -0400
 @@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
  	NFSIOS_SHORTREAD,
  	NFSIOS_SHORTWRITE,
@@ -30213,7 +30213,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
 --- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-24 14:17:49.103665000 -0400
 @@ -39,6 +39,7 @@ struct nfs_page {
  	struct list_head	wb_list;	/* Defines state of page: */
  	struct page		*wb_page;	/* page to read in/write out */
@@ -30262,8 +30262,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/i
  			     struct inode *inode,
  			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
 diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
---- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-24 14:14:13.211708000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-24 14:17:49.116665000 -0400
 @@ -3,6 +3,8 @@
  
  #include <linux/nfsacl.h>
@@ -30415,8 +30415,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/in
  extern struct rpc_version	nfs_version3;
  extern struct rpc_version	nfs_version4;
 diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
---- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
-+++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-24 14:17:49.128664000 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-24 14:17:49.129670000 -0400
 @@ -0,0 +1,57 @@
 +#ifndef _PANFS_SHIM_API_H
 +#define _PANFS_SHIM_API_H
@@ -30476,8 +30476,8 @@ diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.no
 +
 +#endif /* _PANFS_SHIM_API_H */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-24 14:17:49.141664000 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-24 14:17:49.142670000 -0400
 @@ -0,0 +1,440 @@
 +/*
 + *  pnfs_osd_xdr.h
@@ -30920,8 +30920,8 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noar
 +
 +#endif /* __PNFS_OSD_XDR_H__ */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-24 14:17:49.153666000 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-24 14:17:49.155665000 -0400
 @@ -0,0 +1,134 @@
 +/*
 + *  include/linux/pnfs_xdr.h
@@ -31059,7 +31059,7 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/i
 +#endif /* LINUX_PNFS_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
 --- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-24 14:17:49.168668000 -0400
 @@ -8,6 +8,7 @@
  #ifndef __LINUX_POSIX_ACL_H
  #define __LINUX_POSIX_ACL_H
@@ -31070,7 +31070,7 @@ diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/
  #define ACL_UNDEFINED_ID	(-1)
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-24 14:17:49.174665000 -0400
 @@ -14,6 +14,8 @@
  /* size of an XDR encoding unit in bytes, i.e. 32bit */
  #define XDR_UNIT	(4)
@@ -31082,7 +31082,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.n
  
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-24 14:17:49.179667000 -0400
 @@ -3,6 +3,7 @@
  
  #ifdef __KERNEL__
@@ -31103,8 +31103,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.3
  
  struct rpc_pipe_ops {
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
---- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-24 14:17:49.183664000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-24 14:17:49.184674000 -0400
 @@ -0,0 +1,111 @@
 +/*
 + *  Copyright (c) 2008 The Regents of the University of Michigan.
@@ -31219,7 +31219,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux
 +#endif /* _SIMPLE_RPC_PIPEFS_H_ */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-24 14:17:49.190665000 -0400
 @@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
  
  	return buf;
@@ -31263,8 +31263,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.n
 +}
  #endif /* SUNRPC_SVC_XPRT_H */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
---- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-24 14:14:13.258707000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-24 14:17:49.195672000 -0400
 @@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
  	return p + 2;
  }
@@ -31287,14 +31287,9 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch
  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
  		unsigned int base, unsigned int len);
  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
---- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
-+++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
-@@ -0,0 +1 @@
-+-pnfs
 diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
 --- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-24 14:17:49.204668000 -0400
 @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
  	    addr.o rpcb_clnt.o timer.o xdr.o \
@@ -31305,8 +31300,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/su
  sunrpc-$(CONFIG_PROC_FS) += stats.o
  sunrpc-$(CONFIG_SYSCTL) += sysctl.o
 diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
---- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-24 14:17:49.208664000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-24 14:17:49.209670000 -0400
 @@ -0,0 +1,424 @@
 +/*
 + *  net/sunrpc/simple_rpc_pipefs.c
@@ -31733,8 +31728,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.no
 +}
 +EXPORT_SYMBOL(pipefs_generic_destroy_msg);
 diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
---- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-24 14:14:13.447705000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-24 14:17:49.215665000 -0400
 @@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
  
  	/* Shift the tail first */

From f578792412bcedf3ba25d53774b683fc57dfbcdb Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 24 Aug 2010 15:13:05 -0400
Subject: [PATCH 08/20] set the kernel flags

--with firmware
--with debuginfo
--without vdso_install
--without debug
--without headers

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel.spec b/kernel.spec
index 4fb3481f3..48f821659 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -101,23 +101,23 @@ Summary: The Linux kernel
 # kernel-smp (only valid for ppc 32-bit)
 %define with_smp       %{?_without_smp:       0} %{?!_without_smp:       1}
 # kernel-debug
-%define with_debug     %{?_without_debug:     0} %{?!_without_debug:     1}
+%define with_debug     %{?_without_debug:     0} %{?!_without_debug:     0}
 # kernel-doc
-%define with_doc       %{?_without_doc:       0} %{?!_without_doc:       1}
+%define with_doc       %{?_without_doc:       0} %{?!_without_doc:       0}
 # kernel-headers
-%define with_headers   %{?_without_headers:   0} %{?!_without_headers:   1}
+%define with_headers   %{?_without_headers:   0} %{?!_without_headers:   0}
 # kernel-firmware
 %define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     1}
 # tools/perf
-%define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  1}
+%define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  0}
 # perf noarch subpkg
-%define with_perf      %{?_without_perf:      0} %{?!_without_perf:      1}
+%define with_perf      %{?_without_perf:      0} %{?!_without_perf:      0}
 # kernel-debuginfo
-%define with_debuginfo %{?_without_debuginfo: 0} %{?!_without_debuginfo: 1}
+%define with_debuginfo %{?_without_debuginfo: 1} %{?!_without_debuginfo: 1}
 # kernel-bootwrapper (for creating zImages from kernel + initrd)
 %define with_bootwrapper %{?_without_bootwrapper: 0} %{?!_without_bootwrapper: 1}
 # Want to build a the vsdo directories installed
-%define with_vdso_install %{?_without_vdso_install: 0} %{?!_without_vdso_install: 1}
+%define with_vdso_install %{?_without_vdso_install: 0} %{?!_without_vdso_install: 0}
 
 # Build the kernel-doc package, but don't fail the build if it botches.
 # Here "true" means "continue" and "false" means "fail the build".

From dcf28a529829080f523c5c4b5b01b25720a928ea Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 31 Aug 2010 20:57:01 -0400
Subject: [PATCH 09/20] - Updated to the latest pNFS tag:
 pnfs-all-2.6.35-2010-08-24

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |     9 +-
 pnfs-all-2.6.35-2010-08-24-f13.patch | 31778 +++++++++++++++++++++++++
 2 files changed, 31784 insertions(+), 3 deletions(-)
 create mode 100644 pnfs-all-2.6.35-2010-08-24-f13.patch

diff --git a/kernel.spec b/kernel.spec
index 48f821659..094922072 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs34.2010.08.19
+%define buildid .pnfs34.2010.08.24
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
@@ -768,7 +768,7 @@ Patch12480: kprobes-x86-fix-kprobes-to-skip-prefixes-correctly.patch
 
 Patch30000: nfs-35-fc.patch
 Patch30001: nfsd-35-fc.patch
-Patch30002: pnfs-all-2.6.35-2010-08-19-f13.patch
+Patch30002: pnfs-all-2.6.35-2010-08-24-f13.patch
 Patch30003: linux-2.6-pnfs-compile.patch
 Patch30004: linux-2.6.35-inline.patch
 
@@ -1432,7 +1432,7 @@ ApplyPatch kprobes-x86-fix-kprobes-to-skip-prefixes-correctly.patch
 
 ApplyPatch nfs-35-fc.patch  
 ApplyPatch nfsd-35-fc.patch  
-ApplyPatch pnfs-all-2.6.35-2010-08-19-f13.patch
+ApplyPatch pnfs-all-2.6.35-2010-08-24-f13.patch
 ApplyPatch linux-2.6-pnfs-compile.patch
 ApplyPatch linux-2.6.35-inline.patch
 # END OF PATCH APPLICATIONS
@@ -2056,6 +2056,9 @@ fi
 
 
 %changelog
+* Tue Aug 31 2010 Steve Dickson <steved@redhat.com>
+- Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-24
+
 * Fri Aug 27 2010 Chuck Ebbert <cebbert@redhat.com>  2.6.34.6-47
 - Linux 2.6.34.6
 - drivers-hwmon-coretemp-c-detect-the-thermal-sensors-by-cpuid.patch (#625734)
diff --git a/pnfs-all-2.6.35-2010-08-24-f13.patch b/pnfs-all-2.6.35-2010-08-24-f13.patch
new file mode 100644
index 000000000..17d1c844d
--- /dev/null
+++ b/pnfs-all-2.6.35-2010-08-24-f13.patch
@@ -0,0 +1,31778 @@
+diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-31 20:41:16.924243041 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-31 20:42:05.486160576 -0400
+@@ -13,6 +13,7 @@
+ #include <sys/stat.h>
+ #include <sys/mman.h>
+ #include <sys/param.h>
++#include <sys/stat.h>
+ #include "init.h"
+ #include "kern_constants.h"
+ #include "os.h"
+diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
+--- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-31 20:42:05.487160201 -0400
+@@ -1009,6 +1009,7 @@ static void disk_release(struct device *
+ struct class block_class = {
+ 	.name		= "block",
+ };
++EXPORT_SYMBOL(block_class);
+ 
+ static char *block_devnode(struct device *dev, mode_t *mode)
+ {
+diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-31 20:42:05.486160576 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-31 20:42:05.486160576 -0400
+@@ -0,0 +1,211 @@
++(c) 2007 Network Appliance Inc.
++
++spNFS
++-----
++
++An spNFS system consists of a Meta Data Server (MDS), a number of Client machines (C) and a number of Data Servers (DS).
++
++A file system is mounted by the clients from the MDS, and all file data
++is striped across the DSs.
++
++Identify the machines that will be filling each of these roles.
++
++The spnfs kernel will be installed on all machines: clients, the MDS and DSs.
++
++
++Building and installing the spNFS kernel
++----------------------------------------
++
++Get the spNFS kernel from:
++
++	git://linux-nfs.org/~bhalevy/linux-pnfs.git
++
++Use the pnfs-all-latest branch and add these options to your .config file
++
++	CONFIG_NETWORK_FILESYSTEMS=y
++	CONFIG_NFS_FS=m
++	CONFIG_NFS_V4=y
++	CONFIG_NFS_V4_1=y
++	CONFIG_PNFS=y
++	CONFIG_NFSD=m
++	CONFIG_PNFSD=y
++	# CONFIG_PNFSD_LOCAL_EXPORT is not set
++	CONFIG_SPNFS=y
++
++By default, spNFS uses whole-file layouts.  Layout segments can be enabled
++by adding:
++
++	CONFIG_SPNFS_LAYOUTSEGMENTS=y
++
++to your .config file.
++
++Building and installation of kernel+modules is as usual.
++This kernel should be installed and booted on the client, MDS and DSs.
++
++Note that CONFIG_PNFSD_LOCAL_EXPORT must be disabled for spnfs as it
++takes over the pnfs export interface.
++
++Building nfs-utils
++------------------
++
++Get the nfs-utils package containing spnfsd from:
++
++	git://linux-nfs.org/~bhalevy/pnfs-nfs-utils.git
++
++Follow the standard instructions for building nfs-utils.
++
++After building, the spnfsd daemon will be located in utils/spnfsd.  The spnfsd
++daemon will only be needed on the MDS.
++
++
++Installation
++------------
++
++The nfs-utils package contains a default spnfsd.conf file in
++utils/spnfsd/spnfsd.conf.  Copy this file to /etc/spnfsd.conf.
++
++By default, the DS-Mount-Directory is set to /spnfs (see spnfsd.conf).  Under
++this directory, mount points must be created for each DS to
++be used for pNFS data stripes.  These mount points are named by the ip address
++of the corresponding DS.  In the sample spnfsd.conf, there are two
++DSs defined (172.16.28.134 and 172.16.28.141).
++
++Following the sample spnfsd.conf,
++
++	mkdir /spnfs
++
++on the MDS (corresponding to DS-Mount-Directory).  Then
++
++	mkdir /spnfs/172.16.28.134
++	mkdir /spnfs/172.16.28.141
++
++to create the mount points for the DSs.
++
++On the DSs, chose a directory where data stripes will be created by the MDS.
++For the sample file, this directory is /pnfs, so on each DS execute:
++
++	mkdir /pnfs
++
++This directory is specified in the spnfsd.conf file by the DS*_ROOT option
++(where * is replaced by the DS number).  DS_ROOT is specified relative to
++the directory being exported by the DSs.  In our example, our DSs are exporting
++the root directory (/) and therefore our DS_ROOT is /pnfs.  On the DSs, we have
++the following entry in /etc/exports:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check)
++
++N.B. If we had created a /exports directory and a /pnfs directory under
++/exports, and if we were exporting /exports, then DS_ROOT would still be /pnfs
++(not /exports/pnfs).
++
++It may be useful to add entries to /etc/fstab on the MDS to automatically
++mount the DS_ROOT file systems.  For this example, our MDS fstab would
++contain:
++
++	172.17.84.128:/pnfs /spnfs/172.17.84.128 nfs    defaults        1 2
++	172.17.84.122:/pnfs /spnfs/172.17.84.122 nfs    defaults        1 2
++
++The DS mounts must be performed manually or via fstab at this time (automatic
++mounting, directory creation, etc. are on the todo list).  To perform I/O
++through the MDS, the DS mounts MUST use NFSv3 at this time (this restriction
++will eventually be removed).
++
++
++On the MDS, choose a file system to use with spNFS and export it, e.g.:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check,pnfs)
++
++Make sure nfsd and all supporting processes are running on the MDS and DSs.
++
++
++Running
++-------
++
++If rpc_pipefs is not already mounted (if you're running idmapd it probably is),
++you may want to add the following line to /etc/fstab:
++
++	rpc_pipefs    /var/lib/nfs/rpc_pipefs rpc_pipefs defaults     0 0
++
++to automatically mount rpc_pipefs.
++
++With spnfsd.conf configured for your environment and the mounts mounted as
++described above, spnfsd can now be started.
++
++On the MDS, execute spnfsd:
++
++	spnfsd
++
++The executable is located in the directory where it was built, and
++may also have been installed elsewhere depending on how you built nfs-utils.
++It will run in the foreground by default, and in fact will do so despite
++any options suggesting the contrary (it's still a debugging build).
++
++On the client, make sure the nfslayoutdriver module is loaded:
++
++	modprobe nfslayoutdriver
++
++Then mount the file system from the MDS:
++
++	mount -t nfs4 -o minorversion=1 mds:/ /mnt
++
++I/O through the MDS is now supported.  To use it, do not load the
++nfslayoutdriver on the client, and mount the MDS using NFSv4 or 4.1
++(NFSv2 and v3 are not yet supported).
++
++You may now use spNFS by performing file system activities in /mnt.
++If you create files in /mnt, you should see stripe files corresponding to
++new files being created on the DSs.  The current implementation names the
++stripe files based on the inode number of the file on the MDS.  For example,
++if you create a file foo in /mnt and do an 'ls -li /mnt/foo':
++
++	# ls -li foo
++	1233 -rw-r--r-- 1 root root 0 Nov 29 15:54 foo
++
++You should see stripe files on each under /pnfs (per the sample) named
++1233.  The file /pnfs/1233 on DS1 will contain the first <stripe size> bytes
++of data written to foo, DS2 will contain the next <stripe size> bytes, etc.
++Removing /mnt/foo will remove the corresponding stripe files on the DSs.
++Other file system operations should behave (mostly :-) as expected.
++
++
++Layout Segments
++---------------
++
++If the kernel is compiled to support layout segments, there will
++be two files created under /proc/fs/spnfs for controlling layout
++segment functionality.
++
++To enable layout segments, write a '1' to /proc/fs/spnfs/layoutseg, e.g.:
++
++	echo 1 > /proc/fs/spnfs/layoutseg
++
++Layout segments can be disabled (returning to whole-file layouts) by
++writing a '0' to /proc/fs/spnfs/layoutseg:
++
++	echo 0 > /proc/fs/spnfs/layoutseg
++
++When layout segments are enabled, the size of the layouts returned can
++be specified by writing a decimal number (ascii representation) to
++/proc/fs/spnfs/layoutsegsize:
++
++	echo 1024 > /proc/fs/spnfs/layoutsegsize
++
++The value'0' has a special meaning--it causes the server to return a
++layout that is exactly the size requested by the client:
++
++	echo 0 > /proc/fs/spnfs/layoutsegsize
++
++
++Troubleshooting
++---------------
++
++If you see data being written to the files on the MDS rather than
++the stripe files, make sure the nfslayoutdriver is loaded on the client
++(see above).
++
++If you get a "permission denied" error, make sure mountd is running on the mds
++(it occasionally fails to start).
++
++Bugs, enhancements, compliments, complaints to: dmuntz@netapp.com
++
++
+diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-08-31 20:41:17.063232968 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-31 20:42:05.488160560 -0400
+@@ -657,6 +657,12 @@ static int dev_create(struct dm_ioctl *p
+ 	return r;
+ }
+ 
++int dm_dev_create(struct dm_ioctl *param)
++{
++	return dev_create(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_create);
++
+ /*
+  * Always use UUID for lookups if it's present, otherwise use name or dev.
+  */
+@@ -751,6 +757,12 @@ static int dev_remove(struct dm_ioctl *p
+ 	return 0;
+ }
+ 
++int dm_dev_remove(struct dm_ioctl *param)
++{
++	return dev_remove(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_remove);
++
+ /*
+  * Check a string doesn't overrun the chunk of
+  * memory we copied from userland.
+@@ -923,6 +935,12 @@ static int do_resume(struct dm_ioctl *pa
+ 	return r;
+ }
+ 
++int dm_do_resume(struct dm_ioctl *param)
++{
++	return do_resume(param);
++}
++EXPORT_SYMBOL(dm_do_resume);
++
+ /*
+  * Set or unset the suspension state of a device.
+  * If the device already is in the requested state we just return its status.
+@@ -1200,6 +1218,12 @@ out:
+ 	return r;
+ }
+ 
++int dm_table_load(struct dm_ioctl *param, size_t param_size)
++{
++	return table_load(param, param_size);
++}
++EXPORT_SYMBOL(dm_table_load);
++
+ static int table_clear(struct dm_ioctl *param, size_t param_size)
+ {
+ 	int r;
+diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
+--- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-31 20:42:05.489160594 -0400
+@@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
+ 	put_device(&class_to_shost(dev)->shost_gendev);
+ }
+ 
+-static struct class shost_class = {
++struct class shost_class = {
+ 	.name		= "scsi_host",
+ 	.dev_release	= scsi_host_cls_release,
+ };
+diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
+--- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-31 20:42:05.492243039 -0400
+@@ -36,13 +36,9 @@
+ #include <linux/fs.h>
+ #include <linux/time.h>
+ #include <linux/backing-dev.h>
++#include <linux/pnfs_osd_xdr.h>
+ #include "common.h"
+ 
+-/* FIXME: Remove once pnfs hits mainline
+- * #include <linux/exportfs/pnfs_osd_xdr.h>
+- */
+-#include "pnfs.h"
+-
+ #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
+ 
+ #ifdef CONFIG_EXOFS_DEBUG
+@@ -103,6 +99,7 @@ struct exofs_sb_info {
+ struct exofs_i_info {
+ 	struct inode   vfs_inode;          /* normal in-memory inode          */
+ 	wait_queue_head_t i_wq;            /* wait queue for inode            */
++	spinlock_t     i_layout_lock;      /* lock for layout/return/recall   */
+ 	unsigned long  i_flags;            /* various atomic flags            */
+ 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
+ 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
+@@ -166,6 +163,9 @@ static inline unsigned exofs_io_state_si
+  */
+ #define OBJ_2BCREATED	0	/* object will be created soon*/
+ #define OBJ_CREATED	1	/* object has been created on the osd*/
++/* Below are not used atomic but reuse the same i_flags */
++#define OBJ_LAYOUT_IS_GIVEN  2  /* inode has given layouts to clients*/
++#define OBJ_IN_LAYOUT_RECALL 3  /* inode is in the middle of a layout recall*/
+ 
+ static inline int obj_2bcreated(struct exofs_i_info *oi)
+ {
+@@ -304,4 +304,20 @@ extern const struct inode_operations exo
+ extern const struct inode_operations exofs_symlink_inode_operations;
+ extern const struct inode_operations exofs_fast_symlink_inode_operations;
+ 
++/* export.c */
++typedef int (exofs_recall_fn)(struct inode *inode);
++#ifdef CONFIG_PNFSD
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo);
++void exofs_init_export(struct super_block *sb);
++#else
++static inline int exofs_inode_recall_layout(struct inode *inode,
++				enum pnfs_iomode iomode, exofs_recall_fn todo)
++{
++	return todo(inode);
++}
++
++static inline void exofs_init_export(struct super_block *sb) {}
++#endif
++
+ #endif
+diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-31 20:42:05.493222759 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-31 20:42:05.493222759 -0400
+@@ -0,0 +1,396 @@
++/*
++ * export.c - Implementation of the pnfs_export_operations
++ *
++ * Copyright (C) 2009 Panasas Inc.
++ * All rights reserved.
++ *
++ * Boaz Harrosh <bharrosh@panasas.com>
++ *
++ * This file is part of exofs.
++ *
++ * exofs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation.  Since it is based on ext2, and the only
++ * valid version of GPL for the Linux kernel is version 2, the only valid
++ * version of GPL for exofs is version 2.
++ *
++ * exofs is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with exofs; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include "exofs.h"
++
++static int exofs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_OSD2_OBJECTS;
++}
++
++static void set_dev_id(struct pnfs_deviceid *pnfs_devid, u64 sbid, u64 devid)
++{
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)pnfs_devid;
++
++	dev_id->sbid  = sbid;
++	dev_id->devid = devid;
++}
++
++static int cb_layout_recall(struct inode *inode, enum pnfs_iomode iomode,
++			    u64 offset, u64 length, void *cookie)
++{
++	struct nfsd4_pnfs_cb_layout cbl;
++	struct pnfsd_cb_ctl cb_ctl;
++	int status;
++
++	memset(&cb_ctl, 0, sizeof(cb_ctl));
++	status = pnfsd_get_cb_op(&cb_ctl);
++	if (unlikely(status)) {
++		EXOFS_ERR("%s: nfsd unloaded!! inode (0x%lx) status=%d\n",
++			  __func__, inode->i_ino, status);
++		goto err;
++	}
++
++	memset(&cbl, 0, sizeof(cbl));
++	cbl.cbl_recall_type = RETURN_FILE;
++	cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
++	cbl.cbl_seg.iomode = iomode;
++	cbl.cbl_seg.offset = offset;
++	cbl.cbl_seg.length = length;
++	cbl.cbl_cookie = cookie;
++
++	status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
++	pnfsd_put_cb_op(&cb_ctl);
++
++err:
++	return status;
++}
++
++static enum nfsstat4 exofs_layout_get(
++	struct inode *inode,
++	struct exp_xdr_stream *xdr,
++	const struct nfsd4_pnfs_layoutget_arg *args,
++	struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
++	struct exofs_layout *el = &sbi->layout;
++	struct pnfs_osd_object_cred *creds = NULL;
++	struct pnfs_osd_layout layout;
++	__be32 *start;
++	bool in_recall;
++	int i, err;
++	enum nfsstat4 nfserr;
++
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	res->lg_seg.iomode = IOMODE_RW;
++	res->lg_return_on_close = true; /* TODO: unused but will be soon */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	creds = kcalloc(el->s_numdevs, sizeof(*creds), GFP_KERNEL);
++	if (!creds) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto out;
++	}
++
++	/* Fill in a pnfs_osd_layout struct */
++	layout.olo_map = sbi->data_map;
++
++	for (i = 0; i < el->s_numdevs; i++) {
++		struct pnfs_osd_object_cred *cred = &creds[i];
++		osd_id id = exofs_oi_objno(oi);
++		unsigned dev = exofs_layout_od_id(el, id, i);
++
++		set_dev_id(&cred->oc_object_id.oid_device_id, args->lg_sbid,
++			   dev);
++		cred->oc_object_id.oid_partition_id = el->s_pid;
++		cred->oc_object_id.oid_object_id = id;
++		cred->oc_osd_version = osd_dev_is_ver1(el->s_ods[dev]) ?
++						PNFS_OSD_VERSION_1 :
++						PNFS_OSD_VERSION_2;
++		cred->oc_cap_key_sec = PNFS_OSD_CAP_KEY_SEC_NONE;
++
++		cred->oc_cap_key.cred_len	= 0;
++		cred->oc_cap_key.cred		= NULL;
++
++		cred->oc_cap.cred_len	= OSD_CAP_LEN;
++		cred->oc_cap.cred	= oi->i_cred;
++	}
++
++	layout.olo_comps_index = 0;
++	layout.olo_num_comps = el->s_numdevs;
++	layout.olo_comps = creds;
++
++	err = pnfs_osd_xdr_encode_layout(xdr, &layout);
++	if (err) {
++		nfserr = NFS4ERR_TOOSMALL; /* FIXME: Change osd_xdr error codes */
++		goto out;
++	}
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	if (!in_recall) {
++		__set_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		nfserr = NFS4_OK;
++	} else {
++		nfserr = NFS4ERR_RECALLCONFLICT;
++	}
++	spin_unlock(&oi->i_layout_lock);
++
++out:
++	kfree(creds);
++	EXOFS_DBGMSG("(0x%lx) nfserr=%u xdr_bytes=%zu\n",
++		     inode->i_ino, nfserr, exp_xdr_qbytes(xdr->p - start));
++	return nfserr;
++}
++
++/* NOTE: inode mutex must NOT be held */
++static int exofs_layout_commit(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutcommit_arg *args,
++	struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct timespec mtime;
++	loff_t i_size;
++	int in_recall;
++
++	/* In case of a recall we ignore the new size and mtime since they
++	 * are going to be changed again by truncate, and since we cannot take
++	 * the inode lock in that case.
++	 */
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	if (in_recall) {
++		EXOFS_DBGMSG("(0x%lx) commit was called during recall\n",
++			     inode->i_ino);
++		return 0;
++	}
++
++	/* NOTE: I would love to call inode_setattr here
++	 *	 but i cannot since this will cause an eventual vmtruncate,
++	 *	 which will cause a layout_recall. So open code the i_size
++	 *	 and mtime/atime changes under i_mutex.
++	 */
++	mutex_lock_nested(&inode->i_mutex, I_MUTEX_NORMAL);
++
++	if (args->lc_mtime.seconds) {
++		mtime.tv_sec = args->lc_mtime.seconds;
++		mtime.tv_nsec = args->lc_mtime.nseconds;
++
++		/* layout commit may only make time bigger, since there might
++		 * be reordering of the notifications and it might arrive after
++		 * A local change.
++		 * TODO: if mtime > ctime then we know set_attr did an mtime
++		 * in the future. and we can let this update through
++		 */
++		if (0 <= timespec_compare(&mtime, &inode->i_mtime))
++			mtime = inode->i_mtime;
++	} else {
++		mtime = current_fs_time(inode->i_sb);
++	}
++
++	/* TODO: Will below work? since mark_inode_dirty has it's own
++	 *       Time handling
++	 */
++	inode->i_atime = inode->i_mtime = mtime;
++
++	i_size = i_size_read(inode);
++	if (args->lc_newoffset) {
++		loff_t new_size = args->lc_last_wr + 1;
++
++		if (i_size < new_size) {
++			i_size_write(inode, i_size = new_size);
++			res->lc_size_chg = 1;
++			res->lc_newsize = new_size;
++		}
++	}
++	/* TODO: else { i_size = osd_get_object_length() } */
++
++/* TODO: exofs does not currently use the osd_xdr part of the layout_commit */
++
++	mark_inode_dirty_sync(inode);
++
++	mutex_unlock(&inode->i_mutex);
++	EXOFS_DBGMSG("(0x%lx) i_size=0x%llx lcp->off=0x%llx\n",
++		     inode->i_ino, i_size, args->lc_last_wr);
++	return 0;
++}
++
++static void exofs_handle_error(struct pnfs_osd_ioerr *ioerr)
++{
++	EXOFS_ERR("exofs_handle_error: errno=%d is_write=%d obj=0x%llx "
++		  "offset=0x%llx length=0x%llx\n",
++		  ioerr->oer_errno, ioerr->oer_iswrite,
++		  _LLU(ioerr->oer_component.oid_object_id),
++		  _LLU(ioerr->oer_comp_offset),
++		  _LLU(ioerr->oer_comp_length));
++}
++
++static int exofs_layout_return(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	__be32 *p = args->lrf_body;
++	unsigned len = exp_xdr_qwords(args->lrf_body_len);
++
++	EXOFS_DBGMSG("(0x%lx) cookie %p xdr_len %d\n",
++		     inode->i_ino, args->lr_cookie, len);
++
++	while (len >= pnfs_osd_ioerr_xdr_sz()) {
++		struct pnfs_osd_ioerr ioerr;
++
++		p = pnfs_osd_xdr_decode_ioerr(&ioerr, p);
++		len -= pnfs_osd_ioerr_xdr_sz();
++		exofs_handle_error(&ioerr);
++	}
++
++	if (args->lr_cookie) {
++		struct exofs_i_info *oi = exofs_i(inode);
++		bool in_recall;
++
++		spin_lock(&oi->i_layout_lock);
++		in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++		__clear_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		spin_unlock(&oi->i_layout_lock);
++
++		/* TODO: how to communicate cookie with the waiter */
++		if (in_recall)
++			wake_up(&oi->i_wq); /* wakeup any recalls */
++	}
++
++	return 0;
++}
++
++int exofs_get_device_info(struct super_block *sb, struct exp_xdr_stream *xdr,
++			  u32 layout_type,
++			  const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct exofs_sb_info *sbi = sb->s_fs_info;
++	struct pnfs_osd_deviceaddr devaddr;
++	const struct osd_dev_info *odi;
++	u64 devno = devid->devid;
++	__be32 *start;
++	int err;
++
++	memset(&devaddr, 0, sizeof(devaddr));
++
++	if (unlikely(devno >= sbi->layout.s_numdevs))
++		return -ENODEV;
++
++	odi = osduld_device_info(sbi->layout.s_ods[devno]);
++
++	devaddr.oda_systemid.len = odi->systemid_len;
++	devaddr.oda_systemid.data = (void *)odi->systemid; /* !const cast */
++
++	devaddr.oda_osdname.len = odi->osdname_len ;
++	devaddr.oda_osdname.data = (void *)odi->osdname;/* !const cast */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		err = -E2BIG;
++		goto err;
++	}
++
++	err = pnfs_osd_xdr_encode_deviceaddr(xdr, &devaddr);
++	if (err)
++		goto err;
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	EXOFS_DBGMSG("xdr_bytes=%Zu devno=%lld osdname-%s\n",
++		     exp_xdr_qbytes(xdr->p - start), devno, odi->osdname);
++	return 0;
++
++err:
++	EXOFS_DBGMSG("Error: err=%d at_byte=%zu\n",
++		     err, exp_xdr_qbytes(xdr->p - start));
++	return err;
++}
++
++struct pnfs_export_operations exofs_pnfs_ops = {
++	.layout_type	= exofs_layout_type,
++	.layout_get	= exofs_layout_get,
++	.layout_commit	= exofs_layout_commit,
++	.layout_return	= exofs_layout_return,
++	.get_device_info = exofs_get_device_info,
++};
++
++static bool is_layout_returned(struct exofs_i_info *oi)
++{
++	bool layout_given;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	return !layout_given;
++}
++
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	int layout_given;
++	int error = 0;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	__set_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	if (!layout_given)
++		goto exec;
++
++	for (;;) {
++		EXOFS_DBGMSG("(0x%lx) has_layout issue a recall\n",
++			     inode->i_ino);
++		error = cb_layout_recall(inode, iomode, 0, NFS4_MAX_UINT64,
++					 &oi->i_wq);
++		switch (error) {
++		case 0:
++		case -EAGAIN:
++			break;
++		case -ENOENT:
++			goto exec;
++		default:
++			goto err;
++		}
++
++		error = wait_event_interruptible(oi->i_wq,
++						 is_layout_returned(oi));
++		if (error)
++			goto err;
++	}
++
++exec:
++	error = todo(inode);
++
++err:
++	spin_lock(&oi->i_layout_lock);
++	__clear_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	EXOFS_DBGMSG("(0x%lx) return=>%d\n", inode->i_ino, error);
++	return error;
++}
++
++void exofs_init_export(struct super_block *sb)
++{
++	sb->s_pnfs_op = &exofs_pnfs_ops;
++}
+diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
+--- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-31 20:42:05.494222756 -0400
+@@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
+ 	if (unlikely(wait_obj_created(oi)))
+ 		goto fail;
+ 
+-	ret = _do_truncate(inode);
++	ret = exofs_inode_recall_layout(inode, IOMODE_ANY, _do_truncate);
+ 	if (ret)
+ 		goto fail;
+ 
+@@ -964,6 +964,7 @@ static void __oi_init(struct exofs_i_inf
+ {
+ 	init_waitqueue_head(&oi->i_wq);
+ 	oi->i_flags = 0;
++	spin_lock_init(&oi->i_layout_lock);
+ }
+ /*
+  * Fill in an inode read from the OSD and set it up for use
+diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
+--- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-31 20:42:05.490222933 -0400
+@@ -13,4 +13,5 @@
+ #
+ 
+ exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
++exofs-$(CONFIG_PNFSD) +=  export.o
+ obj-$(CONFIG_EXOFS_FS) += exofs.o
+diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
+--- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-31 20:42:05.491232880 -0400
+@@ -1,6 +1,7 @@
+ config EXOFS_FS
+ 	tristate "exofs: OSD based file system support"
+ 	depends on SCSI_OSD_ULD
++	select EXPORTFS_OSD_LAYOUT if PNFSD
+ 	help
+ 	  EXOFS is a file system that uses an OSD storage device,
+ 	  as its backing storage.
+diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
+--- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-31 20:42:05.496073173 -0400
+@@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
+ 	sb->s_fs_info = sbi;
+ 	sb->s_op = &exofs_sops;
+ 	sb->s_export_op = &exofs_export_ops;
++	exofs_init_export(sb);
+ 	root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
+ 	if (IS_ERR(root)) {
+ 		EXOFS_ERR("ERROR: exofs_iget failed\n");
+diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
+--- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-31 20:42:05.497212975 -0400
+@@ -16,6 +16,13 @@
+ #include <linux/namei.h>
+ #include <linux/sched.h>
+ 
++#if defined(CONFIG_PNFSD)
++struct pnfsd_cb_ctl pnfsd_cb_ctl = {
++	.lock = __SPIN_LOCK_UNLOCKED(pnfsd_cb_ctl.lock)
++};
++EXPORT_SYMBOL(pnfsd_cb_ctl);
++#endif /* CONFIG_PNFSD */
++
+ #define dprintk(fmt, args...) do{}while(0)
+ 
+ 
+diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
+--- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-31 20:42:05.496073173 -0400
+@@ -3,4 +3,7 @@
+ 
+ obj-$(CONFIG_EXPORTFS) += exportfs.o
+ 
+-exportfs-objs := expfs.o
++exportfs-y				:= expfs.o
++exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT)	+= nfs4filelayoutxdr.o
++exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
++exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-31 20:42:05.497212975 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+@@ -0,0 +1,158 @@
++/*
++ *  linux/fs/nfsd/nfs4blocklayoutxdr.c
++ *
++ *
++ *  Created by Rick McNeal on 3/31/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++#include <linux/module.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++static int
++bl_encode_simple(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  12 + 4 + bld->u.simple.bld_sig_len);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u32(p, 1);
++	p = exp_xdr_encode_u64(p, bld->u.simple.bld_offset);
++	exp_xdr_encode_opaque(p, bld->u.simple.bld_sig,
++			      bld->u.simple.bld_sig_len);
++
++	return 0;
++}
++
++static int
++bl_encode_slice(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2 + 2 + 1);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_start);
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_len);
++	exp_xdr_encode_u32(p, bld->u.slice.bld_index);
++
++	return 0;
++}
++
++static int
++bl_encode_concat(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	return -ENOTSUPP;
++}
++
++static int
++bl_encode_stripe(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	int i;
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  2 + 1 + bld->u.stripe.bld_stripes);
++
++	p = exp_xdr_encode_u64(p, bld->u.stripe.bld_chunk_size);
++	p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripes);
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++)
++		p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripe_indexs[i]);
++
++	return 0;
++}
++
++int
++blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			   const struct list_head *volumes)
++{
++	u32				num_vols	= 0,
++					*layoutlen_p	= xdr->p;
++	pnfs_blocklayout_devinfo_t	*bld;
++	int				status		= 0;
++	__be32 *p;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -ETOOSMALL;
++	p += 2;
++
++	/*
++	 * All simple volumes with their signature are required to be listed
++	 * first.
++	 */
++	list_for_each_entry(bld, volumes, bld_list) {
++		num_vols++;
++		p = exp_xdr_reserve_qwords(xdr, 1);
++		if (!p)
++			return -ETOOSMALL;
++		p = exp_xdr_encode_u32(p, bld->bld_type);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				status = bl_encode_simple(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_SLICE:
++				status = bl_encode_slice(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				status = bl_encode_concat(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				status = bl_encode_stripe(xdr, bld);
++				break;
++			default:
++				BUG();
++		}
++		if (status)
++			goto error;
++	}
++
++	/* ---- Fill in the overall length and number of volumes ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (xdr->p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, num_vols);
++
++error:
++	return status;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_devinfo);
++
++enum nfsstat4
++blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++			  const struct list_head *bl_head)
++{
++	struct pnfs_blocklayout_layout	*b;
++	u32				*layoutlen_p	= xdr->p,
++					extents		= 0;
++	__be32 *p;
++
++	/*
++	 * Save spot for opaque block layout length and number of extents,
++	 * fill-in later.
++	 */
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return NFS4ERR_TOOSMALL;
++	p += 2;
++
++	list_for_each_entry(b, bl_head, bll_list) {
++		extents++;
++		p = exp_xdr_reserve_qwords(xdr, 5 * 2 + 1);
++		if (!p)
++			return NFS4ERR_TOOSMALL;
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.sbid);
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.devid);
++		p = exp_xdr_encode_u64(p, b->bll_foff);
++		p = exp_xdr_encode_u64(p, b->bll_len);
++		p = exp_xdr_encode_u64(p, b->bll_soff);
++		p = exp_xdr_encode_u32(p, b->bll_es);
++	}
++
++	/* ---- Fill in the overall length and number of extents ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, extents);
++
++	return NFS4_OK;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-31 20:42:05.498113655 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+@@ -0,0 +1,218 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/exp_xdr.h>
++#include <linux/module.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++/* We do our-own dprintk so filesystems are not dependent on sunrpc */
++#ifdef dprintk
++#undef dprintk
++#endif
++#define dprintk(fmt, args, ...)	do { } while (0)
++
++/* Calculate the XDR length of the GETDEVICEINFO4resok structure
++ * excluding the gdir_notification and the gdir_device_addr da_layout_type.
++ */
++static int fl_devinfo_xdr_words(const struct pnfs_filelayout_device *fdev)
++{
++	struct pnfs_filelayout_devaddr *fl_addr;
++	struct pnfs_filelayout_multipath *mp;
++	int i, j, nwords;
++
++	/* da_addr_body length, indice length, indices,
++	 * multipath_list4 length */
++	nwords = 1 + 1 + fdev->fl_stripeindices_length + 1;
++	for (i = 0; i < fdev->fl_device_length; i++) {
++		mp = &fdev->fl_device_list[i];
++		nwords++; /* multipath list length */
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			fl_addr = mp->fl_multipath_list;
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_netid.len);
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_addr.len);
++		}
++	}
++	dprintk("<-- %s nwords %d\n", __func__, nwords);
++	return nwords;
++}
++
++/* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++int
++filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			  const struct pnfs_filelayout_device *fdev)
++{
++	unsigned int i, j, len = 0, opaque_words;
++	u32 *p_in;
++	u32 index_count = fdev->fl_stripeindices_length;
++	u32 dev_count = fdev->fl_device_length;
++	int error = 0;
++	__be32 *p;
++
++	opaque_words = fl_devinfo_xdr_words(fdev);
++	dprintk("%s: Begin indx_cnt: %u dev_cnt: %u total size %u\n",
++		__func__,
++		index_count,
++		dev_count,
++		opaque_words*4);
++
++	/* check space for opaque length */
++	p = p_in = exp_xdr_reserve_qwords(xdr, opaque_words);
++	if (!p) {
++		error =  -ETOOSMALL;
++		goto out;
++	}
++
++	/* Fill in length later */
++	p++;
++
++	/* encode device list indices */
++	p = exp_xdr_encode_u32(p, index_count);
++	for (i = 0; i < index_count; i++)
++		p = exp_xdr_encode_u32(p, fdev->fl_stripeindices_list[i]);
++
++	/* encode device list */
++	p = exp_xdr_encode_u32(p, dev_count);
++	for (i = 0; i < dev_count; i++) {
++		struct pnfs_filelayout_multipath *mp = &fdev->fl_device_list[i];
++
++		p = exp_xdr_encode_u32(p, mp->fl_multipath_length);
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			struct pnfs_filelayout_devaddr *da =
++						&mp->fl_multipath_list[j];
++
++			/* Encode device info */
++			p = exp_xdr_encode_opaque(p, da->r_netid.data,
++						     da->r_netid.len);
++			p = exp_xdr_encode_opaque(p, da->r_addr.data,
++						     da->r_addr.len);
++		}
++	}
++
++	/* backfill in length. Subtract 4 for da_addr_body size */
++	len = (char *)p - (char *)p_in;
++	exp_xdr_encode_u32(p_in, len - 4);
++
++	error = 0;
++out:
++	dprintk("%s: End err %d xdrlen %d\n",
++		__func__, error, len);
++	return error;
++}
++EXPORT_SYMBOL(filelayout_encode_devinfo);
++
++/* Encodes the loc_body structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++enum nfsstat4
++filelayout_encode_layout(struct exp_xdr_stream *xdr,
++			 const struct pnfs_filelayout_layout *flp)
++{
++	u32 len = 0, nfl_util, fhlen, i;
++	u32 *layoutlen_p;
++	enum nfsstat4 nfserr;
++	__be32 *p;
++
++	dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
++		__func__,
++		flp->device_id.pnfs_fsid,
++		flp->device_id.pnfs_devid,
++		flp->lg_first_stripe_index,
++		flp->lg_fh_length);
++
++	/* Ensure file system added at least one file handle */
++	if (flp->lg_fh_length <= 0) {
++		dprintk("%s: File Layout has no file handles!!\n", __func__);
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto out;
++	}
++
++	/* Ensure room for len, devid, util, first_stripe_index,
++	 * pattern_offset, number of filehandles */
++	p = layoutlen_p = exp_xdr_reserve_qwords(xdr, 1+2+2+1+1+2+1);
++	if (!p) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	/* save spot for opaque file layout length, fill-in later*/
++	p++;
++
++	/* encode device id */
++	p = exp_xdr_encode_u64(p, flp->device_id.sbid);
++	p = exp_xdr_encode_u64(p, flp->device_id.devid);
++
++	/* set and encode flags */
++	nfl_util = flp->lg_stripe_unit;
++	if (flp->lg_commit_through_mds)
++		nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
++	if (flp->lg_stripe_type == STRIPE_DENSE)
++		nfl_util |= NFL4_UFLG_DENSE;
++	p = exp_xdr_encode_u32(p, nfl_util);
++
++	/* encode first stripe index */
++	p = exp_xdr_encode_u32(p, flp->lg_first_stripe_index);
++
++	/* encode striping pattern start */
++	p = exp_xdr_encode_u64(p, flp->lg_pattern_offset);
++
++	/* encode number of file handles */
++	p = exp_xdr_encode_u32(p, flp->lg_fh_length);
++
++	/* encode file handles */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		fhlen = flp->lg_fh_list[i].fh_size;
++		p = exp_xdr_reserve_space(xdr, 4 + fhlen);
++		if (!p) {
++			nfserr = NFS4ERR_TOOSMALL;
++			goto out;
++		}
++		p = exp_xdr_encode_opaque(p, &flp->lg_fh_list[i].fh_base, fhlen);
++	}
++
++	/* Set number of bytes encoded =  total_bytes_encoded - length var */
++	len = (char *)p - (char *)layoutlen_p;
++	exp_xdr_encode_u32(layoutlen_p, len - 4);
++
++	nfserr = NFS4_OK;
++out:
++	dprintk("%s: End err %u xdrlen %d\n",
++		__func__, nfserr, len);
++	return nfserr;
++}
++EXPORT_SYMBOL(filelayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-31 20:42:05.499125509 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-31 20:42:05.499125509 -0400
+@@ -0,0 +1,289 @@
++/*
++ *  pnfs_osd_xdr_enc.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static int pnfs_osd_xdr_encode_data_map(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_data_map *data_map)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 1+2+1+1+1+1);
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, data_map->odm_num_comps);
++	p = exp_xdr_encode_u64(p, data_map->odm_stripe_unit);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_width);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_depth);
++	p = exp_xdr_encode_u32(p, data_map->odm_mirror_cnt);
++	p = exp_xdr_encode_u32(p, data_map->odm_raid_algorithm);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline int pnfs_osd_xdr_encode_objid(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_objid *object_id)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2+2+2+2);
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)&object_id->oid_device_id;
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u64(p, dev_id->sbid);
++	p = exp_xdr_encode_u64(p, dev_id->devid);
++	p = exp_xdr_encode_u64(p, object_id->oid_partition_id);
++	p = exp_xdr_encode_u64(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * enum pnfs_osd_cap_key_sec4 {
++ * 	PNFS_OSD_CAP_KEY_SEC_NONE = 0,
++ * 	PNFS_OSD_CAP_KEY_SEC_SSV  = 1
++ * };
++ *
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static int pnfs_osd_xdr_encode_object_cred(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_object_cred *olo_comp)
++{
++	__be32 *p;
++	int err;
++
++	err = pnfs_osd_xdr_encode_objid(xdr, &olo_comp->oc_object_id);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_space(xdr, 3*4 + 4+olo_comp->oc_cap.cred_len);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, olo_comp->oc_osd_version);
++
++	/* No sec for now */
++	p = exp_xdr_encode_u32(p, PNFS_OSD_CAP_KEY_SEC_NONE);
++	p = exp_xdr_encode_u32(p, 0); /* opaque oc_capability_key<> */
++
++	exp_xdr_encode_opaque(p, olo_comp->oc_cap.cred,
++			      olo_comp->oc_cap.cred_len);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_layout {
++ * 	struct pnfs_osd_data_map	olo_map;
++ * 	u32				olo_comps_index;
++ * 	u32				olo_num_comps;
++ * 	struct pnfs_osd_object_cred	*olo_comps;
++ * };
++ */
++int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *pol)
++{
++	__be32 *p;
++	u32 i;
++	int err;
++
++	err = pnfs_osd_xdr_encode_data_map(xdr, &pol->olo_map);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, pol->olo_comps_index);
++	p = exp_xdr_encode_u32(p, pol->olo_num_comps);
++
++	for (i = 0; i < pol->olo_num_comps; i++) {
++		err = pnfs_osd_xdr_encode_object_cred(xdr, &pol->olo_comps[i]);
++		if (err)
++			return err;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_layout);
++
++static int _encode_string(struct exp_xdr_stream *xdr,
++			  const struct nfs4_string *str)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr, 4 + str->len);
++
++	if (!p)
++		return -E2BIG;
++	exp_xdr_encode_opaque(p, str->data, str->len);
++	return 0;
++}
++
++/* struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr)
++{
++	__be32 *p;
++	int err;
++
++	p = exp_xdr_reserve_space(xdr, 4 + 4 + sizeof(devaddr->oda_lun));
++	if (!p)
++		return -E2BIG;
++
++	/* Empty oda_targetid */
++	p = exp_xdr_encode_u32(p, OBJ_TARGET_ANON);
++
++	/* Empty oda_targetaddr for now */
++	p = exp_xdr_encode_u32(p, 0);
++
++	/* oda_lun */
++	exp_xdr_encode_bytes(p, devaddr->oda_lun, sizeof(devaddr->oda_lun));
++
++	err = _encode_string(xdr, &devaddr->oda_systemid);
++	if (err)
++		return err;
++
++	err = pnfs_osd_xdr_encode_object_cred(xdr,
++					      &devaddr->oda_root_obj_cred);
++	if (err)
++		return err;
++
++	err = _encode_string(xdr, &devaddr->oda_osdname);
++	if (err)
++		return err;
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_deviceaddr);
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p)
++{
++	lou->dsu_valid = be32_to_cpu(*p++);
++	if (lou->dsu_valid)
++		p = xdr_decode_hyper(p, &lou->dsu_delta);
++	lou->olu_ioerr_flag = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_layoutupdate);
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline __be32 *
++pnfs_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
++{
++	/* FIXME: p = xdr_decode_fixed(...) */
++	memcpy(objid->oid_device_id.data, p, sizeof(objid->oid_device_id.data));
++	p += XDR_QUADLEN(sizeof(objid->oid_device_id.data));
++
++	p = xdr_decode_hyper(p, &objid->oid_partition_id);
++	p = xdr_decode_hyper(p, &objid->oid_object_id);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p)
++{
++	p = pnfs_osd_xdr_decode_objid(p, &ioerr->oer_component);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_offset);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_length);
++	ioerr->oer_iswrite = be32_to_cpu(*p++);
++	ioerr->oer_errno = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
+diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
+--- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-31 20:42:05.500123860 -0400
+@@ -19,6 +19,7 @@
+ #include <linux/gfs2_ondisk.h>
+ #include <linux/slow-work.h>
+ #include <linux/quotaops.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "gfs2.h"
+ #include "incore.h"
+@@ -1146,6 +1147,9 @@ static int fill_super(struct super_block
+ 	sb->s_magic = GFS2_MAGIC;
+ 	sb->s_op = &gfs2_super_ops;
+ 	sb->s_export_op = &gfs2_export_ops;
++#if defined(CONFIG_PNFSD)
++	sb->s_pnfs_op = &pnfs_dlm_export_ops;
++#endif /* CONFIG_PNFSD */
+ 	sb->s_xattr = gfs2_xattr_handlers;
+ 	sb->s_qcop = &gfs2_quotactl_ops;
+ 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
+--- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-31 20:42:05.490222933 -0400
+@@ -224,6 +224,31 @@ config LOCKD_V4
+ config EXPORTFS
+ 	tristate
+ 
++config EXPORTFS_FILE_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 files layout type.
++	  Must be automatically selected by supporting filesystems.
++
++config EXPORTFS_OSD_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 objects layout type.
++	  Must be automatically selected by supporting osd
++	  filesystems.
++
++	  If unsure, say N.
++
++config EXPORTFS_BLOCK_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 blocks layout type.
++	  Must be automatically selected by supporting filesystems.
++
++
+ config NFS_ACL_SUPPORT
+ 	tristate
+ 	select FS_POSIX_ACL
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-31 20:42:05.503222878 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-31 20:42:05.503222878 -0400
+@@ -0,0 +1,66 @@
++#include <linux/module.h>
++#include <linux/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/sched.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY NFSDBG_PNFS_LD
++
++struct pipefs_list bl_device_list;
++struct dentry *bl_device_pipe;
++
++ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t len)
++{
++	int err;
++	struct pipefs_hdr *msg;
++
++	dprintk("Entering %s...\n", __func__);
++
++	msg = pipefs_readmsg(filp, src, len);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: unable to read pipefs message.\n");
++		return PTR_ERR(msg);
++	}
++
++	/* now assign the result, which wakes the blocked thread */
++	err = pipefs_assign_upcall_reply(msg, &bl_device_list);
++	if (err) {
++		dprintk("ERROR: failed to assign upcall with id %u\n",
++			msg->msgid);
++		kfree(msg);
++	}
++	return len;
++}
++
++static const struct rpc_pipe_ops bl_pipe_ops = {
++	.upcall         = pipefs_generic_upcall,
++	.downcall       = bl_pipe_downcall,
++	.destroy_msg    = pipefs_generic_destroy_msg,
++};
++
++int bl_pipe_init(void)
++{
++	dprintk("%s: block_device pipefs registering...\n", __func__);
++	bl_device_pipe = pipefs_mkpipe("bl_device_pipe", &bl_pipe_ops, 1);
++	if (IS_ERR(bl_device_pipe))
++		dprintk("ERROR, unable to make block_device pipe\n");
++
++	if (!bl_device_pipe)
++		dprintk("bl_device_pipe is NULL!\n");
++	else
++	dprintk("bl_device_pipe created!\n");
++	pipefs_init_list(&bl_device_list);
++	return 0;
++}
++
++void bl_pipe_exit(void)
++{
++	dprintk("%s: block_device pipefs unregistering...\n", __func__);
++	if (IS_ERR(bl_device_pipe))
++		return ;
++	pipefs_closepipe(bl_device_pipe);
++	return;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-31 20:42:05.504232855 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-31 20:42:05.504232855 -0400
+@@ -0,0 +1,1160 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++
++#include <linux/buffer_head.h> /* various write calls */
++#include <linux/bio.h> /* struct bio */
++#include <linux/vmalloc.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
++
++/* Callback operations to the pNFS client */
++static struct pnfs_client_operations *pnfs_block_callback_ops;
++
++static void print_page(struct page *page)
++{
++	dprintk("PRINTPAGE page %p\n", page);
++	dprintk("        PagePrivate %d\n", PagePrivate(page));
++	dprintk("        PageUptodate %d\n", PageUptodate(page));
++	dprintk("        PageError %d\n", PageError(page));
++	dprintk("        PageDirty %d\n", PageDirty(page));
++	dprintk("        PageReferenced %d\n", PageReferenced(page));
++	dprintk("        PageLocked %d\n", PageLocked(page));
++	dprintk("        PageWriteback %d\n", PageWriteback(page));
++	dprintk("        PageMappedToDisk %d\n", PageMappedToDisk(page));
++	dprintk("\n");
++}
++
++/* Given the be associated with isect, determine if page data needs to be
++ * initialized.
++ */
++static int is_hole(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_NONE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return !is_sector_initialized(be->be_inval, isect);
++}
++
++/* Given the be associated with isect, determine if page data can be
++ * written to disk.
++ */
++static int is_writable(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return is_sector_initialized(be->be_inval, isect);
++}
++
++static int
++dont_like_caller(struct nfs_page *req)
++{
++	if (atomic_read(&req->wb_complete)) {
++		/* Called by _multi */
++		return 1;
++	} else {
++		/* Called by _one */
++		return 0;
++	}
++}
++
++static enum pnfs_try_status
++bl_commit(struct nfs_write_data *nfs_data,
++	  int sync)
++{
++	dprintk("%s enter\n", __func__);
++	return PNFS_NOT_ATTEMPTED;
++}
++
++/* The data we are handed might be spread across several bios.  We need
++ * to track when the last one is finished.
++ */
++struct parallel_io {
++	struct kref refcnt;
++	struct rpc_call_ops call_ops;
++	void (*pnfs_callback) (void *data);
++	void *data;
++};
++
++static inline struct parallel_io *alloc_parallel(void *data)
++{
++	struct parallel_io *rv;
++
++	rv  = kmalloc(sizeof(*rv), GFP_KERNEL);
++	if (rv) {
++		rv->data = data;
++		kref_init(&rv->refcnt);
++	}
++	return rv;
++}
++
++static inline void get_parallel(struct parallel_io *p)
++{
++	kref_get(&p->refcnt);
++}
++
++static void destroy_parallel(struct kref *kref)
++{
++	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
++
++	dprintk("%s enter\n", __func__);
++	p->pnfs_callback(p->data);
++	kfree(p);
++}
++
++static inline void put_parallel(struct parallel_io *p)
++{
++	kref_put(&p->refcnt, destroy_parallel);
++}
++
++static struct bio *
++bl_submit_bio(int rw, struct bio *bio)
++{
++	if (bio) {
++		get_parallel(bio->bi_private);
++		dprintk("%s submitting %s bio %u@%llu\n", __func__,
++			rw == READ ? "read" : "write",
++			bio->bi_size, (u64)bio->bi_sector);
++		submit_bio(rw, bio);
++	}
++	return NULL;
++}
++
++static inline void
++bl_done_with_rpage(struct page *page, const int ok)
++{
++	if (ok) {
++		ClearPagePnfsErr(page);
++		SetPageUptodate(page);
++	} else {
++		ClearPageUptodate(page);
++		SetPageError(page);
++		SetPagePnfsErr(page);
++	}
++	/* Page is unlocked via rpc_release.  Should really be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_read(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_rpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++static void bl_read_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	pnfs_block_callback_ops->nfs_readlist_complete(rdata);
++}
++
++static void
++bl_end_par_io_read(void *data)
++{
++	struct nfs_read_data *rdata = data;
++
++	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
++	schedule_work(&rdata->task.u.tk_work);
++}
++
++/* We don't want normal .rpc_call_done callback used, so we replace it
++ * with this stub.
++ */
++static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
++{
++	return;
++}
++
++static enum pnfs_try_status
++bl_read_pagelist(struct nfs_read_data *rdata,
++		 unsigned nr_pages)
++{
++	int i, hole;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t f_offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct page **pages = rdata->args.pages;
++	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
++	       nr_pages, f_offset, count);
++
++	if (dont_like_caller(rdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		goto use_mds;
++	}
++	if ((nr_pages == 1) && PagePnfsErr(rdata->req->wb_page)) {
++		/* We want to fall back to mds in case of read_page
++		 * after error on read_pages.
++		 */
++		dprintk("%s PG_pnfserr set\n", __func__);
++		goto use_mds;
++	}
++	par = alloc_parallel(rdata);
++	if (!par)
++		goto use_mds;
++	par->call_ops = *rdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_read;
++	/* At this point, we can no longer jump to use_mds */
++
++	isect = (sector_t) (f_offset >> 9);
++	/* Code assumes extents are page-aligned */
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			put_extent(cow_read);
++			bio = bl_submit_bio(READ, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(rdata->pdata.lseg),
++					     isect, &cow_read);
++			if (!be) {
++				/* Error out this page */
++				bl_done_with_rpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++			if (cow_read) {
++				sector_t cow_length = cow_read->be_length -
++					(isect - cow_read->be_f_offset);
++				extent_length = min(extent_length, cow_length);
++			}
++		}
++		hole = is_hole(be, isect);
++		if (hole && !cow_read) {
++			bio = bl_submit_bio(READ, bio);
++			/* Fill hole w/ zeroes w/o accessing device */
++			dprintk("%s Zeroing page for hole\n", __func__);
++			zero_user(pages[i], 0,
++				  min_t(int, PAGE_CACHE_SIZE, count));
++			print_page(pages[i]);
++			bl_done_with_rpage(pages[i], 1);
++		} else {
++			struct pnfs_block_extent *be_read;
++
++			be_read = (hole && cow_read) ? cow_read : be;
++			for (;;) {
++				if (!bio) {
++					bio = bio_alloc(GFP_NOIO, nr_pages - i);
++					if (!bio) {
++						/* Error out this page */
++						bl_done_with_rpage(pages[i], 0);
++						break;
++					}
++					bio->bi_sector = isect -
++						be_read->be_f_offset +
++						be_read->be_v_offset;
++					bio->bi_bdev = be_read->be_mdev;
++					bio->bi_end_io = bl_end_io_read;
++					bio->bi_private = par;
++				}
++				if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++					break;
++				bio = bl_submit_bio(READ, bio);
++			}
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	if ((isect << 9) >= rdata->inode->i_size) {
++		rdata->res.eof = 1;
++		rdata->res.count = rdata->inode->i_size - f_offset;
++	} else {
++		rdata->res.count = (isect << 9) - f_offset;
++	}
++	put_extent(be);
++	put_extent(cow_read);
++	bl_submit_bio(READ, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++
++ use_mds:
++	dprintk("Giving up and using normal NFS\n");
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static void mark_extents_written(struct pnfs_block_layout *bl,
++				 __u64 offset, __u32 count)
++{
++	sector_t isect, end;
++	struct pnfs_block_extent *be;
++
++	dprintk("%s(%llu, %u)\n", __func__, offset, count);
++	if (count == 0)
++		return;
++	isect = (offset & (long)(PAGE_CACHE_MASK)) >> 9;
++	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
++	end >>= 9;
++	while (isect < end) {
++		sector_t len;
++		be = find_get_extent(bl, isect, NULL);
++		BUG_ON(!be); /* FIXME */
++		len = min(end, be->be_f_offset + be->be_length) - isect;
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			mark_for_commit(be, isect, len); /* What if fails? */
++		isect += len;
++		put_extent(be);
++	}
++}
++
++/* STUB - this needs thought */
++static inline void
++bl_done_with_wpage(struct page *page, const int ok)
++{
++	if (!ok) {
++		SetPageError(page);
++		SetPagePnfsErr(page);
++		/* This is an inline copy of nfs_zap_mapping */
++		/* This is oh so fishy, and needs deep thought */
++		if (page->mapping->nrpages != 0) {
++			struct inode *inode = page->mapping->host;
++			spin_lock(&inode->i_lock);
++			NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	/* end_page_writeback called in rpc_release.  Should be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_write(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_wpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++/* Function scheduled for call during bl_end_par_io_write,
++ * it marks sectors as written and extends the commitlist.
++ */
++static void bl_write_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	if (!wdata->task.tk_status) {
++		/* Marks for LAYOUTCOMMIT */
++		/* BUG - this should be called after each bio, not after
++		 * all finish, unless have some way of storing success/failure
++		 */
++		mark_extents_written(BLK_LSEG2EXT(wdata->pdata.lseg),
++				     wdata->args.offset, wdata->args.count);
++	}
++	pnfs_block_callback_ops->nfs_writelist_complete(wdata);
++}
++
++/* Called when last of bios associated with a bl_write_pagelist call finishes */
++static void
++bl_end_par_io_write(void *data)
++{
++	struct nfs_write_data *wdata = data;
++
++	/* STUB - ignoring error handling */
++	wdata->task.tk_status = 0;
++	wdata->verf.committed = NFS_FILE_SYNC;
++	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
++	schedule_work(&wdata->task.u.tk_work);
++}
++
++static enum pnfs_try_status
++bl_write_pagelist(struct nfs_write_data *wdata,
++		  unsigned nr_pages,
++		  int sync)
++{
++	int i;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t offset = wdata->args.offset;
++	size_t count = wdata->args.count;
++	struct page **pages = wdata->args.pages;
++	int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
++	if (!wdata->req->wb_lseg) {
++		dprintk("%s no lseg, falling back to MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	if (dont_like_caller(wdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
++	 * We want to write each, and if there is an error remove it from
++	 * list and call
++	 * nfs_retry_request(req) to have it redone using nfs.
++	 * QUEST? Do as block or per req?  Think have to do per block
++	 * as part of end_bio
++	 */
++	par = alloc_parallel(wdata);
++	if (!par)
++		return PNFS_NOT_ATTEMPTED;
++	par->call_ops = *wdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_write;
++	/* At this point, have to be more careful with error handling */
++
++	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> 9);
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			bio = bl_submit_bio(WRITE, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(wdata->pdata.lseg),
++					     isect, NULL);
++			if (!be || !is_writable(be, isect)) {
++				/* FIXME */
++				bl_done_with_wpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++		}
++		for (;;) {
++			if (!bio) {
++				bio = bio_alloc(GFP_NOIO, nr_pages - i);
++				if (!bio) {
++					/* Error out this page */
++					/* FIXME */
++					bl_done_with_wpage(pages[i], 0);
++					break;
++				}
++				bio->bi_sector = isect - be->be_f_offset +
++					be->be_v_offset;
++				bio->bi_bdev = be->be_mdev;
++				bio->bi_end_io = bl_end_io_write;
++				bio->bi_private = par;
++			}
++			if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++				break;
++			bio = bl_submit_bio(WRITE, bio);
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	wdata->res.count = (isect << 9) - (offset & (long)PAGE_CACHE_MASK);
++	put_extent(be);
++	bl_submit_bio(WRITE, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++}
++
++/* FIXME - range ignored */
++static void
++release_extents(struct pnfs_block_layout *bl,
++		struct pnfs_layout_range *range)
++{
++	int i;
++	struct pnfs_block_extent *be;
++
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		while (!list_empty(&bl->bl_extents[i])) {
++			be = list_first_entry(&bl->bl_extents[i],
++					      struct pnfs_block_extent,
++					      be_node);
++			list_del(&be->be_node);
++			put_extent(be);
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++}
++
++static void
++release_inval_marks(struct pnfs_inval_markings *marks)
++{
++	struct pnfs_inval_tracking *pos, *temp;
++
++	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
++		list_del(&pos->it_link);
++		kfree(pos);
++	}
++	return;
++}
++
++/* Note we are relying on caller locking to prevent nasty races. */
++static void
++bl_free_layout(struct pnfs_layout_hdr *lo)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++
++	dprintk("%s enter\n", __func__);
++	release_extents(bl, NULL);
++	release_inval_marks(&bl->bl_inval);
++	kfree(bl);
++}
++
++static struct pnfs_layout_hdr *
++bl_alloc_layout(struct inode *inode)
++{
++	struct pnfs_block_layout	*bl;
++
++	dprintk("%s enter\n", __func__);
++	bl = kzalloc(sizeof(*bl), GFP_KERNEL);
++	if (!bl)
++		return NULL;
++	spin_lock_init(&bl->bl_ext_lock);
++	INIT_LIST_HEAD(&bl->bl_extents[0]);
++	INIT_LIST_HEAD(&bl->bl_extents[1]);
++	INIT_LIST_HEAD(&bl->bl_commit);
++	bl->bl_count = 0;
++	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> 9;
++	INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
++	return &bl->bl_layout;
++}
++
++static void
++bl_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter\n", __func__);
++	kfree(lseg);
++}
++
++/* Because the generic infrastructure does not correctly merge layouts,
++ * we pretty much ignore lseg, and store all data layout wide, so we
++ * can correctly merge.  Eventually we should push some correct merge
++ * behavior up to the generic code, as the current behavior tends to
++ * cause lots of unnecessary overlapping LAYOUTGET requests.
++ */
++static struct pnfs_layout_segment *
++bl_alloc_lseg(struct pnfs_layout_hdr *lo,
++	      struct nfs4_layoutget_res *lgr)
++{
++	struct pnfs_layout_segment *lseg;
++	int status;
++
++	dprintk("%s enter\n", __func__);
++	lseg = kzalloc(sizeof(*lseg) + 0, GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++	status = nfs4_blk_process_layoutget(lo, lgr);
++	if (status) {
++		/* We don't want to call the full-blown bl_free_lseg,
++		 * since on error extents were not touched.
++		 */
++		/* STUB - we really want to distinguish between 2 error
++		 * conditions here.  This lseg failed, but lo data structures
++		 * are OK, or we hosed the lo data structures.  The calling
++		 * code probably needs to distinguish this too.
++		 */
++		kfree(lseg);
++		return ERR_PTR(status);
++	}
++	return lseg;
++}
++
++static int
++bl_setup_layoutcommit(struct pnfs_layout_hdr *lo,
++		      struct nfs4_layoutcommit_args *arg)
++{
++	struct nfs_server *nfss = PNFS_NFS_SERVER(lo);
++	struct bl_layoutupdate_data *layoutupdate_data;
++
++	dprintk("%s enter\n", __func__);
++	/* Need to ensure commit is block-size aligned */
++	if (nfss->pnfs_blksize) {
++		u64 mask = nfss->pnfs_blksize - 1;
++		u64 offset = arg->range.offset & mask;
++
++		arg->range.offset -= offset;
++		arg->range.length += offset + mask;
++		arg->range.length &= ~mask;
++	}
++
++	layoutupdate_data = kmalloc(sizeof(struct bl_layoutupdate_data),
++					 GFP_KERNEL);
++	if (unlikely(!layoutupdate_data))
++		return -ENOMEM;
++	INIT_LIST_HEAD(&layoutupdate_data->ranges);
++	arg->layoutdriver_data = layoutupdate_data;
++
++	return 0;
++}
++
++static void
++bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
++		       const struct nfs4_layoutcommit_args *arg)
++{
++	dprintk("%s enter\n", __func__);
++	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
++}
++
++static void
++bl_cleanup_layoutcommit(struct pnfs_layout_hdr *lo,
++			struct nfs4_layoutcommit_args *arg, int status)
++{
++	dprintk("%s enter\n", __func__);
++	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), arg, status);
++	kfree(arg->layoutdriver_data);
++}
++
++static void free_blk_mountid(struct block_mount_id *mid)
++{
++	if (mid) {
++		struct pnfs_block_dev *dev;
++		spin_lock(&mid->bm_lock);
++		while (!list_empty(&mid->bm_devlist)) {
++			dev = list_first_entry(&mid->bm_devlist,
++					       struct pnfs_block_dev,
++					       bm_node);
++			list_del(&dev->bm_node);
++			free_block_dev(dev);
++		}
++		spin_unlock(&mid->bm_lock);
++		kfree(mid);
++	}
++}
++
++/* This is mostly copied form the filelayout's get_device_info function.
++ * It seems much of this should be at the generic pnfs level.
++ */
++static struct pnfs_block_dev *
++nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
++			struct pnfs_deviceid *d_id,
++			struct list_head *sdlist)
++{
++	struct pnfs_device *dev;
++	struct pnfs_block_dev *rv = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	int i, rc;
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s max_resp_sz %u max_pages %d\n",
++		__func__, max_resp_sz, max_pages);
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev) {
++		dprintk("%s kmalloc failed\n", __func__);
++		return NULL;
++	}
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(dev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set dev->area */
++	dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!dev->area)
++		goto out_free;
++
++	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
++	dev->layout_type = LAYOUT_BLOCK_VOLUME;
++	dev->dev_notify_types = 0;
++	dev->pages = pages;
++	dev->pgbase = 0;
++	dev->pglen = PAGE_SIZE * max_pages;
++	dev->mincount = 0;
++
++	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
++	rc = pnfs_block_callback_ops->nfs_getdeviceinfo(server, dev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	rv = nfs4_blk_decode_device(server, dev, sdlist);
++ out_free:
++	if (dev->area != NULL)
++		vunmap(dev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(dev);
++	return rv;
++}
++
++
++/*
++ * Retrieve the list of available devices for the mountpoint.
++ */
++static int
++bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
++{
++	struct block_mount_id *b_mt_id = NULL;
++	struct pnfs_mount_type *mtype = NULL;
++	struct pnfs_devicelist *dlist = NULL;
++	struct pnfs_block_dev *bdev;
++	LIST_HEAD(block_disklist);
++	int status = 0, i;
++
++	dprintk("%s enter\n", __func__);
++
++	if (server->pnfs_blksize == 0) {
++		dprintk("%s Server did not return blksize\n", __func__);
++		return -EINVAL;
++	}
++	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
++	if (!b_mt_id) {
++		status = -ENOMEM;
++		goto out_error;
++	}
++	/* Initialize nfs4 block layout mount id */
++	spin_lock_init(&b_mt_id->bm_lock);
++	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
++
++	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
++	if (!dlist)
++		goto out_error;
++	dlist->eof = 0;
++	while (!dlist->eof) {
++		status = pnfs_block_callback_ops->nfs_getdevicelist(
++							server, fh, dlist);
++		if (status)
++			goto out_error;
++		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
++			__func__, dlist->num_devs, dlist->eof);
++		/* For each device returned in dlist, call GETDEVICEINFO, and
++		 * decode the opaque topology encoding to create a flat
++		 * volume topology, matching VOLUME_SIMPLE disk signatures
++		 * to disks in the visible block disk list.
++		 * Construct an LVM meta device from the flat volume topology.
++		 */
++		for (i = 0; i < dlist->num_devs; i++) {
++			bdev = nfs4_blk_get_deviceinfo(server, fh,
++						     &dlist->dev_id[i],
++						     &block_disklist);
++			if (!bdev)
++				goto out_error;
++			spin_lock(&b_mt_id->bm_lock);
++			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
++			spin_unlock(&b_mt_id->bm_lock);
++		}
++	}
++	dprintk("%s SUCCESS\n", __func__);
++	server->pnfs_ld_data = b_mt_id;
++
++ out_return:
++	kfree(dlist);
++	return status;
++
++ out_error:
++	free_blk_mountid(b_mt_id);
++	kfree(mtype);
++	goto out_return;
++}
++
++static int
++bl_uninitialize_mountpoint(struct nfs_server *server)
++{
++	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
++
++	dprintk("%s enter\n", __func__);
++	free_blk_mountid(b_mt_id);
++	dprintk("%s RETURNS\n", __func__);
++	return 0;
++}
++
++/* STUB - mark intersection of layout and page as bad, so is not
++ * used again.
++ */
++static void mark_bad_read(void)
++{
++	return;
++}
++
++/* Copied from buffer.c */
++static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
++{
++	if (uptodate) {
++		set_buffer_uptodate(bh);
++	} else {
++		/* This happens, due to failed READA attempts. */
++		clear_buffer_uptodate(bh);
++	}
++	unlock_buffer(bh);
++}
++
++/* Copied from buffer.c */
++static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
++{
++	__end_buffer_read_notouch(bh, uptodate);
++}
++
++/*
++ * map_block:  map a requested I/0 block (isect) into an offset in the LVM
++ * meta block_device
++ */
++static void
++map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
++{
++	dprintk("%s enter be=%p\n", __func__, be);
++
++	set_buffer_mapped(bh);
++	bh->b_bdev = be->be_mdev;
++	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
++		(be->be_mdev->bd_inode->i_blkbits - 9);
++
++	dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
++				__func__, (long)isect,
++				(long)bh->b_blocknr,
++				bh->b_size);
++	return;
++}
++
++/* Given an unmapped page, zero it (or read in page for COW),
++ * and set appropriate flags/markings, but it is safe to not initialize
++ * the range given in [from, to).
++ */
++/* This is loosely based on nobh_write_begin */
++static int
++init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
++		    unsigned from, unsigned to, sector_t **pages_to_mark)
++{
++	struct buffer_head *bh;
++	int inval, ret = -EIO;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect;
++
++	dprintk("%s enter, %p\n", __func__, page);
++	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
++	if (!bh) {
++		ret = -ENOMEM;
++		goto cleanup;
++	}
++
++	isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
++	be = find_get_extent(bl, isect, &cow_read);
++	if (!be)
++		goto cleanup;
++	inval = is_hole(be, isect);
++	dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
++	if (inval) {
++		if (be->be_state == PNFS_BLOCK_NONE_DATA) {
++			dprintk("%s PANIC - got NONE_DATA extent %p\n",
++				__func__, be);
++			goto cleanup;
++		}
++		map_block(isect, be, bh);
++		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
++	}
++	if (PageUptodate(page)) {
++		/* Do nothing */
++	} else if (inval & !cow_read) {
++		zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
++	} else if (0 < from || PAGE_CACHE_SIZE > to) {
++		struct pnfs_block_extent *read_extent;
++
++		read_extent = (inval && cow_read) ? cow_read : be;
++		map_block(isect, read_extent, bh);
++		lock_buffer(bh);
++		bh->b_end_io = end_buffer_read_nobh;
++		submit_bh(READ, bh);
++		dprintk("%s: Waiting for buffer read\n", __func__);
++		/* XXX Don't really want to hold layout lock here */
++		wait_on_buffer(bh);
++		if (!buffer_uptodate(bh))
++			goto cleanup;
++	}
++	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		/* There is a BUG here if is a short copy after write_begin,
++		 * but I think this is a generic fs bug.  The problem is that
++		 * we have marked the page as initialized, but it is possible
++		 * that the section not copied may never get copied.
++		 */
++		ret = mark_initialized_sectors(be->be_inval, isect,
++					       PAGE_CACHE_SECTORS,
++					       pages_to_mark);
++		/* Want to preallocate mem so above can't fail */
++		if (ret)
++			goto cleanup;
++	}
++	SetPageMappedToDisk(page);
++	ret = 0;
++
++cleanup:
++	free_buffer_head(bh);
++	put_extent(be);
++	put_extent(cow_read);
++	if (ret) {
++		/* Need to mark layout with bad read...should now
++		 * just use nfs4 for reads and writes.
++		 */
++		mark_bad_read();
++	}
++	return ret;
++}
++
++static int
++bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
++	       unsigned count, struct pnfs_fsdata *fsdata)
++{
++	unsigned from, to;
++	int ret;
++	sector_t *pages_to_mark = NULL;
++	struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
++
++	dprintk("%s enter, %u@%lld\n", __func__, count, pos);
++	print_page(page);
++	/* The following code assumes blocksize >= PAGE_CACHE_SIZE */
++	if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
++		dprintk("%s Can't handle blocksize %llu\n", __func__,
++			(u64)bl->bl_blocksize);
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		return 0;
++	}
++	if (PageMappedToDisk(page)) {
++		/* Basically, this is a flag that says we have
++		 * successfully called write_begin already on this page.
++		 */
++		/* NOTE - there are cache consistency issues here.
++		 * For example, what if the layout is recalled, then regained?
++		 * If the file is closed and reopened, will the page flags
++		 * be reset?  If not, we'll have to use layout info instead of
++		 * the page flag.
++		 */
++		return 0;
++	}
++	from = pos & (PAGE_CACHE_SIZE - 1);
++	to = from + count;
++	ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
++	if (ret) {
++		dprintk("%s init page failed with %i", __func__, ret);
++		/* Revert back to plain NFS and just continue on with
++		 * write.  This assumes there is no request attached, which
++		 * should be true if we get here.
++		 */
++		BUG_ON(PagePrivate(page));
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		kfree(pages_to_mark);
++		ret = 0;
++	} else {
++		fsdata->private = pages_to_mark;
++	}
++	return ret;
++}
++
++/* CAREFUL - what happens if copied < count??? */
++static int
++bl_write_end(struct inode *inode, struct page *page, loff_t pos,
++	     unsigned count, unsigned copied, struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter, %u@%lld, lseg=%p\n", __func__, count, pos, lseg);
++	print_page(page);
++	if (lseg)
++		SetPageUptodate(page);
++	return 0;
++}
++
++/* Return any memory allocated to fsdata->private, and take advantage
++ * of no page locks to mark pages noted in write_begin as needing
++ * initialization.
++ */
++static void
++bl_write_end_cleanup(struct file *filp, struct pnfs_fsdata *fsdata)
++{
++	struct page *page;
++	pgoff_t index;
++	sector_t *pos;
++	struct address_space *mapping = filp->f_mapping;
++	struct pnfs_fsdata *fake_data;
++	struct pnfs_layout_segment *lseg;
++
++	if (!fsdata)
++		return;
++	lseg = fsdata->lseg;
++	if (!lseg)
++		return;
++	pos = fsdata->private;
++	if (!pos)
++		return;
++	dprintk("%s enter with pos=%llu\n", __func__, (u64)(*pos));
++	for (; *pos != ~0; pos++) {
++		index = *pos >> (PAGE_CACHE_SHIFT - 9);
++		/* XXX How do we properly deal with failures here??? */
++		page = grab_cache_page_write_begin(mapping, index, 0);
++		if (!page) {
++			printk(KERN_ERR "%s BUG BUG BUG NoMem\n", __func__);
++			continue;
++		}
++		dprintk("%s: Examining block page\n", __func__);
++		print_page(page);
++		if (!PageMappedToDisk(page)) {
++			/* XXX How do we properly deal with failures here??? */
++			dprintk("%s Marking block page\n", __func__);
++			init_page_for_write(BLK_LSEG2EXT(fsdata->lseg), page,
++					    PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
++					    NULL);
++			print_page(page);
++			fake_data = kzalloc(sizeof(*fake_data), GFP_KERNEL);
++			if (!fake_data) {
++				printk(KERN_ERR "%s BUG BUG BUG NoMem\n",
++				       __func__);
++				unlock_page(page);
++				continue;
++			}
++			get_lseg(lseg);
++			fake_data->lseg = lseg;
++			fake_data->bypass_eof = 1;
++			mapping->a_ops->write_end(filp, mapping,
++						  index << PAGE_CACHE_SHIFT,
++						  PAGE_CACHE_SIZE,
++						  PAGE_CACHE_SIZE,
++						  page, fake_data);
++			/* Note fake_data is freed by nfs_write_end */
++		} else
++			unlock_page(page);
++	}
++	kfree(fsdata->private);
++	fsdata->private = NULL;
++}
++
++static ssize_t
++bl_get_stripesize(struct pnfs_layout_hdr *lo)
++{
++	dprintk("%s enter\n", __func__);
++	return 0;
++}
++
++/* This is called by nfs_can_coalesce_requests via nfs_pageio_do_add_request.
++ * Should return False if there is a reason requests can not be coalesced,
++ * otherwise, should default to returning True.
++ */
++static int
++bl_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++	   struct nfs_page *req)
++{
++	dprintk("%s enter\n", __func__);
++	if (pgio->pg_iswrite)
++		return prev->wb_lseg == req->wb_lseg;
++	else
++		return 1;
++}
++
++static struct layoutdriver_io_operations blocklayout_io_operations = {
++	.commit				= bl_commit,
++	.read_pagelist			= bl_read_pagelist,
++	.write_pagelist			= bl_write_pagelist,
++	.write_begin			= bl_write_begin,
++	.write_end			= bl_write_end,
++	.write_end_cleanup		= bl_write_end_cleanup,
++	.alloc_layout			= bl_alloc_layout,
++	.free_layout			= bl_free_layout,
++	.alloc_lseg			= bl_alloc_lseg,
++	.free_lseg			= bl_free_lseg,
++	.setup_layoutcommit		= bl_setup_layoutcommit,
++	.encode_layoutcommit		= bl_encode_layoutcommit,
++	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
++	.initialize_mountpoint		= bl_initialize_mountpoint,
++	.uninitialize_mountpoint	= bl_uninitialize_mountpoint,
++};
++
++static struct layoutdriver_policy_operations blocklayout_policy_operations = {
++	.get_stripesize			= bl_get_stripesize,
++	.pg_test			= bl_pg_test,
++};
++
++static struct pnfs_layoutdriver_type blocklayout_type = {
++	.id = LAYOUT_BLOCK_VOLUME,
++	.name = "LAYOUT_BLOCK_VOLUME",
++	.ld_io_ops = &blocklayout_io_operations,
++	.ld_policy_ops = &blocklayout_policy_operations,
++};
++
++static int __init nfs4blocklayout_init(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
++
++	pnfs_block_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
++	bl_pipe_init();
++	return 0;
++}
++
++static void __exit nfs4blocklayout_exit(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
++	       __func__);
++
++	pnfs_unregister_layoutdriver(&blocklayout_type);
++	bl_pipe_exit();
++}
++
++module_init(nfs4blocklayout_init);
++module_exit(nfs4blocklayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-31 20:42:05.506119071 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-31 20:42:05.506119071 -0400
+@@ -0,0 +1,335 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/buffer_head.h> /* __bread */
++
++#include <linux/genhd.h>
++#include <linux/blkdev.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
++{
++	uint32_t *q = p + XDR_QUADLEN(nbytes);
++	if (unlikely(q > end || q < p))
++		return NULL;
++	return p;
++}
++EXPORT_SYMBOL(blk_overflow);
++
++/* Open a block_device by device number. */
++struct block_device *nfs4_blkdev_get(dev_t dev)
++{
++	struct block_device *bd;
++
++	dprintk("%s enter\n", __func__);
++	bd = open_by_devnum(dev, FMODE_READ);
++	if (IS_ERR(bd))
++		goto fail;
++	return bd;
++fail:
++	dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++	return NULL;
++}
++
++/*
++ * Release the block device
++ */
++int nfs4_blkdev_put(struct block_device *bdev)
++{
++	dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
++			MINOR(bdev->bd_dev));
++	bd_release(bdev);
++	return blkdev_put(bdev, FMODE_READ);
++}
++
++/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
++ * in dev->dev_addr_buf.
++ */
++struct pnfs_block_dev *
++nfs4_blk_decode_device(struct nfs_server *server,
++		       struct pnfs_device *dev,
++		       struct list_head *sdlist)
++{
++	struct pnfs_block_dev *rv = NULL;
++	struct block_device *bd = NULL;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint32_t major, minor;
++
++	dprintk("%s enter\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return NULL;
++	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
++	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
++		dev->mincount);
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
++				    dev->mincount);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out_err;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out_err;
++	}
++	if (reply->status != BL_DEVICE_REQUEST_PROC) {
++		dprintk("%s failed to open device: %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++	memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
++	memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
++		sizeof(uint32_t));
++	bd = nfs4_blkdev_get(MKDEV(major, minor));
++	if (IS_ERR(bd)) {
++		dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++
++	rv = kzalloc(sizeof(*rv), GFP_KERNEL);
++	if (!rv)
++		goto out_err;
++
++	rv->bm_mdev = bd;
++	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
++	dprintk("%s Created device %s with bd_block_size %u\n",
++		__func__,
++		bd->bd_disk->disk_name,
++		bd->bd_block_size);
++	kfree(reply);
++	kfree(msg);
++	return rv;
++
++out_err:
++	kfree(rv);
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return NULL;
++}
++
++/* Map deviceid returned by the server to constructed block_device */
++static struct block_device *translate_devid(struct pnfs_layout_hdr *lo,
++					    struct pnfs_deviceid *id)
++{
++	struct block_device *rv = NULL;
++	struct block_mount_id *mid;
++	struct pnfs_block_dev *dev;
++
++	dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
++	mid = BLK_ID(lo);
++	spin_lock(&mid->bm_lock);
++	list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
++		if (memcmp(id->data, dev->bm_mdevid.data,
++			   NFS4_PNFS_DEVICEID4_SIZE) == 0) {
++			rv = dev->bm_mdev;
++			goto out;
++		}
++	}
++ out:
++	spin_unlock(&mid->bm_lock);
++	dprintk("%s returning %p\n", __func__, rv);
++	return rv;
++}
++
++/* Tracks info needed to ensure extents in layout obey constraints of spec */
++struct layout_verification {
++	u32 mode;	/* R or RW */
++	u64 start;	/* Expected start of next non-COW extent */
++	u64 inval;	/* Start of INVAL coverage */
++	u64 cowread;	/* End of COW read coverage */
++};
++
++/* Verify the extent meets the layout requirements of the pnfs-block draft,
++ * section 2.3.1.
++ */
++static int verify_extent(struct pnfs_block_extent *be,
++			 struct layout_verification *lv)
++{
++	if (lv->mode == IOMODE_READ) {
++		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
++		    be->be_state == PNFS_BLOCK_INVALID_DATA)
++			return -EIO;
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	}
++	/* lv->mode == IOMODE_RW */
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		if (lv->cowread > lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		lv->inval = lv->start;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
++		if (be->be_f_offset > lv->start)
++			return -EIO;
++		if (be->be_f_offset < lv->inval)
++			return -EIO;
++		if (be->be_f_offset < lv->cowread)
++			return -EIO;
++		/* It looks like you might want to min this with lv->start,
++		 * but you really don't.
++		 */
++		lv->inval = lv->inval + be->be_length;
++		lv->cowread = be->be_f_offset + be->be_length;
++		return 0;
++	} else
++		return -EIO;
++}
++
++/* XDR decode pnfs_block_layout4 structure */
++int
++nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
++			   struct nfs4_layoutget_res *lgr)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t *end = (uint32_t *)((char *)lgr->layout.buf + lgr->layout.len);
++	int i, status = -EIO;
++	uint32_t count;
++	struct pnfs_block_extent *be = NULL, *save;
++	uint64_t tmp; /* Used by READSECTOR */
++	struct layout_verification lv = {
++		.mode = lgr->range.iomode,
++		.start = lgr->range.offset >> 9,
++		.inval = lgr->range.offset >> 9,
++		.cowread = lgr->range.offset >> 9,
++	};
++
++	LIST_HEAD(extents);
++
++	BLK_READBUF(p, end, 4);
++	READ32(count);
++
++	dprintk("%s enter, number of extents %i\n", __func__, count);
++	BLK_READBUF(p, end, (28 + NFS4_PNFS_DEVICEID4_SIZE) * count);
++
++	/* Decode individual extents, putting them in temporary
++	 * staging area until whole layout is decoded to make error
++	 * recovery easier.
++	 */
++	for (i = 0; i < count; i++) {
++		be = alloc_extent();
++		if (!be) {
++			status = -ENOMEM;
++			goto out_err;
++		}
++		READ_DEVID(&be->be_devid);
++		be->be_mdev = translate_devid(lo, &be->be_devid);
++		if (!be->be_mdev)
++			goto out_err;
++		/* The next three values are read in as bytes,
++		 * but stored as 512-byte sector lengths
++		 */
++		READ_SECTOR(be->be_f_offset);
++		READ_SECTOR(be->be_length);
++		READ_SECTOR(be->be_v_offset);
++		READ32(be->be_state);
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			be->be_inval = &bl->bl_inval;
++		if (verify_extent(be, &lv)) {
++			dprintk("%s verify failed\n", __func__);
++			goto out_err;
++		}
++		list_add_tail(&be->be_node, &extents);
++	}
++	if (p != end) {
++		dprintk("%s Undecoded cruft at end of opaque\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lgr->range.offset + lgr->range.length != lv.start << 9) {
++		dprintk("%s Final length mismatch\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lv.start < lv.cowread) {
++		dprintk("%s Final uncovered COW extent\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	/* Extents decoded properly, now try to merge them in to
++	 * existing layout extents.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	list_for_each_entry_safe(be, save, &extents, be_node) {
++		list_del(&be->be_node);
++		status = add_and_merge_extent(bl, be);
++		if (status) {
++			spin_unlock(&bl->bl_ext_lock);
++			/* This is a fairly catastrophic error, as the
++			 * entire layout extent lists are now corrupted.
++			 * We should have some way to distinguish this.
++			 */
++			be = NULL;
++			goto out_err;
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	status = 0;
++ out:
++	dprintk("%s returns %i\n", __func__, status);
++	return status;
++
++ out_err:
++	put_extent(be);
++	while (!list_empty(&extents)) {
++		be = list_first_entry(&extents, struct pnfs_block_extent,
++				      be_node);
++		list_del(&be->be_node);
++		put_extent(be);
++	}
++	goto out;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-31 20:42:05.506119071 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-31 20:42:05.506119071 -0400
+@@ -0,0 +1,120 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdm.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2007 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Fred Isaman <iisaman@umich.edu>
++ *  Andy Adamson <andros@citi.umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include <linux/genhd.h> /* gendisk - used in a dprintk*/
++#include <linux/sched.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Defines used for calculating memory usage in nfs4_blk_flatten() */
++#define ARGSIZE   24    /* Max bytes needed for linear target arg string */
++#define SPECSIZE (sizeof8(struct dm_target_spec) + ARGSIZE)
++#define SPECS_PER_PAGE (PAGE_SIZE / SPECSIZE)
++#define SPEC_HEADER_ADJUST (SPECS_PER_PAGE - \
++			    (PAGE_SIZE - sizeof8(struct dm_ioctl)) / SPECSIZE)
++#define roundup8(x) (((x)+7) & ~7)
++#define sizeof8(x) roundup8(sizeof(x))
++
++static int dev_remove(dev_t dev)
++{
++	int ret = 1;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint64_t bl_dev;
++	uint32_t major = MAJOR(dev), minor = MINOR(dev);
++
++	dprintk("Entering %s\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return ret;
++
++	memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
++	memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
++				    sizeof(uint64_t));
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out;
++	}
++
++	if (reply->status == BL_DEVICE_REQUEST_PROC)
++		ret = 0; /*TODO: what to return*/
++out:
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return ret;
++}
++
++/*
++ * Release meta device
++ */
++static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
++{
++	int rv;
++
++	dprintk("%s Releasing\n", __func__);
++	/* XXX Check return? */
++	rv = nfs4_blkdev_put(bdev->bm_mdev);
++	dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);
++
++	rv = dev_remove(bdev->bm_mdev->bd_dev);
++	dprintk("%s Returns %d\n", __func__, rv);
++	return rv;
++}
++
++void free_block_dev(struct pnfs_block_dev *bdev)
++{
++	if (bdev) {
++		if (bdev->bm_mdev) {
++			dprintk("%s Removing DM device: %d:%d\n",
++				__func__,
++				MAJOR(bdev->bm_mdev->bd_dev),
++				MINOR(bdev->bm_mdev->bd_dev));
++			/* XXX Check status ?? */
++			nfs4_blk_metadev_release(bdev);
++		}
++		kfree(bdev);
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-31 20:42:05.505169618 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-31 20:42:05.505169618 -0400
+@@ -0,0 +1,302 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#ifndef FS_NFS_NFS4BLOCKLAYOUT_H
++#define FS_NFS_NFS4BLOCKLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/dm-ioctl.h> /* Needed for struct dm_ioctl*/
++
++#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
++
++#define PG_pnfserr PG_owner_priv_1
++#define PagePnfsErr(page)	test_bit(PG_pnfserr, &(page)->flags)
++#define SetPagePnfsErr(page)	set_bit(PG_pnfserr, &(page)->flags)
++#define ClearPagePnfsErr(page)	clear_bit(PG_pnfserr, &(page)->flags)
++
++extern int dm_dev_create(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_dev_remove(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_do_resume(struct dm_ioctl *param);
++extern int dm_table_load(struct dm_ioctl *param, size_t param_size);
++
++struct block_mount_id {
++	spinlock_t			bm_lock;    /* protects list */
++	struct list_head		bm_devlist; /* holds pnfs_block_dev */
++};
++
++struct pnfs_block_dev {
++	struct list_head		bm_node;
++	struct pnfs_deviceid		bm_mdevid;    /* associated devid */
++	struct block_device		*bm_mdev;     /* meta device itself */
++};
++
++/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
++struct visible_block_device {
++	struct list_head	vi_node;
++	struct block_device	*vi_bdev;
++	int			vi_mapped;
++	int			vi_put_done;
++};
++
++enum blk_vol_type {
++	PNFS_BLOCK_VOLUME_SIMPLE   = 0,	/* maps to a single LU */
++	PNFS_BLOCK_VOLUME_SLICE    = 1,	/* slice of another volume */
++	PNFS_BLOCK_VOLUME_CONCAT   = 2,	/* concatenation of multiple volumes */
++	PNFS_BLOCK_VOLUME_STRIPE   = 3	/* striped across multiple volumes */
++};
++
++/* All disk offset/lengths are stored in 512-byte sectors */
++struct pnfs_blk_volume {
++	uint32_t		bv_type;
++	sector_t 		bv_size;
++	struct pnfs_blk_volume 	**bv_vols;
++	int 			bv_vol_n;
++	union {
++		dev_t			bv_dev;
++		sector_t		bv_stripe_unit;
++		sector_t 		bv_offset;
++	};
++};
++
++/* Since components need not be aligned, cannot use sector_t */
++struct pnfs_blk_sig_comp {
++	int64_t 	bs_offset;  /* In bytes */
++	uint32_t   	bs_length;  /* In bytes */
++	char 		*bs_string;
++};
++
++/* Maximum number of signatures components in a simple volume */
++# define PNFS_BLOCK_MAX_SIG_COMP 16
++
++struct pnfs_blk_sig {
++	int 				si_num_comps;
++	struct pnfs_blk_sig_comp	si_comps[PNFS_BLOCK_MAX_SIG_COMP];
++};
++
++enum exstate4 {
++	PNFS_BLOCK_READWRITE_DATA	= 0,
++	PNFS_BLOCK_READ_DATA		= 1,
++	PNFS_BLOCK_INVALID_DATA		= 2, /* mapped, but data is invalid */
++	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
++};
++
++#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
++
++struct my_tree_t {
++	sector_t		mtt_step_size;	/* Internal sector alignment */
++	struct list_head	mtt_stub; /* Should be a radix tree */
++};
++
++struct pnfs_inval_markings {
++	spinlock_t	im_lock;
++	struct my_tree_t im_tree;	/* Sectors that need LAYOUTCOMMIT */
++	sector_t	im_block_size;	/* Server blocksize in sectors */
++};
++
++struct pnfs_inval_tracking {
++	struct list_head it_link;
++	int		 it_sector;
++	int		 it_tags;
++};
++
++/* sector_t fields are all in 512-byte sectors */
++struct pnfs_block_extent {
++	struct kref	be_refcnt;
++	struct list_head be_node;	/* link into lseg list */
++	struct pnfs_deviceid be_devid;  /* STUB - remevable??? */
++	struct block_device *be_mdev;
++	sector_t	be_f_offset;	/* the starting offset in the file */
++	sector_t	be_length;	/* the size of the extent */
++	sector_t	be_v_offset;	/* the starting offset in the volume */
++	enum exstate4	be_state;	/* the state of this extent */
++	struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */
++};
++
++/* Shortened extent used by LAYOUTCOMMIT */
++struct pnfs_block_short_extent {
++	struct list_head bse_node;
++	struct pnfs_deviceid bse_devid;	/* STUB - removable??? */
++	struct block_device *bse_mdev;
++	sector_t	bse_f_offset;	/* the starting offset in the file */
++	sector_t	bse_length;	/* the size of the extent */
++};
++
++static inline void
++INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
++{
++	spin_lock_init(&marks->im_lock);
++	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
++	marks->im_block_size = blocksize;
++	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
++					   blocksize);
++}
++
++enum extentclass4 {
++	RW_EXTENT	= 0, /* READWRTE and INVAL */
++	RO_EXTENT	= 1, /* READ and NONE */
++	EXTENT_LISTS	= 2,
++};
++
++static inline int choose_list(enum exstate4 state)
++{
++	if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA)
++		return RO_EXTENT;
++	else
++		return RW_EXTENT;
++}
++
++struct pnfs_block_layout {
++	struct pnfs_layout_hdr bl_layout;
++	struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */
++	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
++	struct list_head	bl_extents[EXTENT_LISTS]; /* R and RW extents */
++	struct list_head	bl_commit;	/* Needs layout commit */
++	unsigned int		bl_count;	/* entries in bl_commit */
++	sector_t		bl_blocksize;  /* Server blocksize in sectors */
++};
++
++/* this struct is comunicated between:
++ * bl_setup_layoutcommit && bl_encode_layoutcommit && bl_cleanup_layoutcommit
++ */
++struct bl_layoutupdate_data {
++	struct list_head ranges;
++};
++
++#define BLK_ID(lo) ((struct block_mount_id *)(PNFS_NFS_SERVER(lo)->pnfs_ld_data))
++
++static inline struct pnfs_block_layout *
++BLK_LO2EXT(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct pnfs_block_layout, bl_layout);
++}
++
++static inline struct pnfs_block_layout *
++BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
++{
++	return BLK_LO2EXT(lseg->layout);
++}
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
++
++#define BLK_READBUF(p, e, nbytes)  do { \
++	p = blk_overflow(p, e, nbytes); \
++	if (!p) { \
++		printk(KERN_WARNING \
++			"%s: reply buffer overflowed in line %d.\n", \
++			__func__, __LINE__); \
++		goto out_err; \
++	} \
++} while (0)
++
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {                  \
++	(x) = (uint64_t)ntohl(*p++) << 32;           \
++	(x) |= ntohl(*p++);                     \
++} while (0)
++#define COPYMEM(x, nbytes) do {                 \
++	memcpy((x), p, nbytes);                 \
++	p += XDR_QUADLEN(nbytes);               \
++} while (0)
++#define READ_DEVID(x)	COPYMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++#define READ_SECTOR(x)     do { \
++	READ64(tmp); \
++	if (tmp & 0x1ff) { \
++		printk(KERN_WARNING \
++		       "%s Value not 512-byte aligned at line %d\n", \
++		       __func__, __LINE__);			     \
++		goto out_err; \
++	} \
++	(x) = tmp >> 9; \
++} while (0)
++
++#define WRITE32(n)               do { \
++	*p++ = htonl(n); \
++	} while (0)
++#define WRITE64(n)               do {                           \
++	*p++ = htonl((uint32_t)((n) >> 32));			\
++	*p++ = htonl((uint32_t)(n));				\
++} while (0)
++#define WRITEMEM(ptr, nbytes)     do {                          \
++	p = xdr_encode_opaque_fixed(p, ptr, nbytes);	\
++} while (0)
++#define WRITE_DEVID(x)  WRITEMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++
++/* blocklayoutdev.c */
++struct block_device *nfs4_blkdev_get(dev_t dev);
++int nfs4_blkdev_put(struct block_device *bdev);
++struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
++					      struct pnfs_device *dev,
++					      struct list_head *sdlist);
++int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
++			       struct nfs4_layoutget_res *lgr);
++int nfs4_blk_create_block_disk_list(struct list_head *);
++void nfs4_blk_destroy_disk_list(struct list_head *);
++/* blocklayoutdm.c */
++int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
++void free_block_dev(struct pnfs_block_dev *bdev);
++/* extents.c */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++		struct pnfs_block_extent **cow_read);
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages);
++void put_extent(struct pnfs_block_extent *be);
++struct pnfs_block_extent *alloc_extent(void);
++struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
++int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   struct xdr_stream *xdr,
++				   const struct nfs4_layoutcommit_args *arg);
++void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   const struct nfs4_layoutcommit_args *arg,
++				   int status);
++int add_and_merge_extent(struct pnfs_block_layout *bl,
++			 struct pnfs_block_extent *new);
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length);
++
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++extern struct pipefs_list bl_device_list;
++extern struct dentry *bl_device_pipe;
++
++int bl_pipe_init(void);
++void bl_pipe_exit(void);
++
++#define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
++#define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
++#define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
++#define BL_DEVICE_REQUEST_PROC         0x1 /* User level process succeeds */
++#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
++
++#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-31 20:42:05.507113260 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-31 20:42:05.508119925 -0400
+@@ -0,0 +1,948 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include "blocklayout.h"
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Bit numbers */
++#define EXTENT_INITIALIZED 0
++#define EXTENT_WRITTEN     1
++#define EXTENT_IN_COMMIT   2
++#define INTERNAL_EXISTS    MY_MAX_TAGS
++#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
++
++/* Returns largest t<=s s.t. t%base==0 */
++static inline sector_t normalize(sector_t s, int base)
++{
++	sector_t tmp = s; /* Since do_div modifies its argument */
++	return s - do_div(tmp, base);
++}
++
++static inline sector_t normalize_up(sector_t s, int base)
++{
++	return normalize(s + base - 1, base);
++}
++
++/* Complete stub using list while determine API wanted */
++
++/* Returns tags, or negative */
++static int32_t _find_entry(struct my_tree_t *tree, u64 s)
++{
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu) enter\n", __func__, s);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s)
++			return pos->it_tags & INTERNAL_MASK;
++		else
++			break;
++	}
++	return -ENOENT;
++}
++
++static inline
++int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
++{
++	int32_t tags;
++
++	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
++	s = normalize(s, tree->mtt_step_size);
++	tags = _find_entry(tree, s);
++	if ((tags < 0) || !(tags & (1 << tag)))
++		return 0;
++	else
++		return 1;
++}
++
++/* Creates entry with tag, or if entry already exists, unions tag to it.
++ * If storage is not NULL, newly created entry will use it.
++ * Returns number of entries added, or negative on error.
++ */
++static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
++		      struct pnfs_inval_tracking *storage)
++{
++	int found = 0;
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s) {
++			found = 1;
++			break;
++		} else
++			break;
++	}
++	if (found) {
++		pos->it_tags |= (1 << tag);
++		return 0;
++	} else {
++		struct pnfs_inval_tracking *new;
++		if (storage)
++			new = storage;
++		else {
++			new = kmalloc(sizeof(*new), GFP_KERNEL);
++			if (!new)
++				return -ENOMEM;
++		}
++		new->it_sector = s;
++		new->it_tags = (1 << tag);
++		list_add(&new->it_link, &pos->it_link);
++		return 1;
++	}
++}
++
++/* XXXX Really want option to not create */
++/* Over range, unions tag with existing entries, else creates entry with tag */
++static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
++{
++	u64 i;
++
++	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
++	for (i = normalize(s, tree->mtt_step_size); i < s + length;
++	     i += tree->mtt_step_size)
++		if (_add_entry(tree, i, tag, NULL))
++			return -ENOMEM;
++	return 0;
++}
++
++/* Ensure that future operations on given range of tree will not malloc */
++static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
++{
++	u64 start, end, s;
++	int count, i, used = 0, status = -ENOMEM;
++	struct pnfs_inval_tracking **storage;
++
++	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
++	start = normalize(offset, tree->mtt_step_size);
++	end = normalize_up(offset + length, tree->mtt_step_size);
++	count = (int)(end - start) / (int)tree->mtt_step_size;
++
++	/* Pre-malloc what memory we might need */
++	storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
++	if (!storage)
++		return -ENOMEM;
++	for (i = 0; i < count; i++) {
++		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
++				     GFP_KERNEL);
++		if (!storage[i])
++			goto out_cleanup;
++	}
++
++	/* Now need lock - HOW??? */
++
++	for (s = start; s < end; s += tree->mtt_step_size)
++		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
++
++	/* Unlock - HOW??? */
++	status = 0;
++
++ out_cleanup:
++	for (i = used; i < count; i++) {
++		if (!storage[i])
++			break;
++		kfree(storage[i]);
++	}
++	kfree(storage);
++	return status;
++}
++
++static void set_needs_init(sector_t *array, sector_t offset)
++{
++	sector_t *p = array;
++
++	dprintk("%s enter\n", __func__);
++	if (!p)
++		return;
++	while (*p < offset)
++		p++;
++	if (*p == offset)
++		return;
++	else if (*p == ~0) {
++		*p++ = offset;
++		*p = ~0;
++		return;
++	} else {
++		sector_t *save = p;
++		dprintk("%s Adding %llu\n", __func__, (u64)offset);
++		while (*p != ~0)
++			p++;
++		p++;
++		memmove(save + 1, save, (char *)p - (char *)save);
++		*save = offset;
++		return;
++	}
++}
++
++/* We are relying on page lock to serialize this */
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Assume start, end already sector aligned */
++static int
++_range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
++{
++	struct pnfs_inval_tracking *pos;
++	u64 expect = 0;
++
++	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector >= end)
++			continue;
++		if (!expect) {
++			if ((pos->it_sector == end - tree->mtt_step_size) &&
++			    (pos->it_tags & (1 << tag))) {
++				expect = pos->it_sector - tree->mtt_step_size;
++				if (expect < start)
++					return 1;
++				continue;
++			} else {
++				return 0;
++			}
++		}
++		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
++			return 0;
++		expect -= tree->mtt_step_size;
++		if (expect < start)
++			return 1;
++	}
++	return 0;
++}
++
++static int is_range_written(struct pnfs_inval_markings *marks,
++			    sector_t start, sector_t end)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Marks sectors in [offest, offset_length) as having been initialized.
++ * All lengths are step-aligned, where step is min(pagesize, blocksize).
++ * Notes where partial block is initialized, and helps prepare it for
++ * complete initialization later.
++ */
++/* Currently assumes offset is page-aligned */
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages)
++{
++	sector_t s, start, end;
++	sector_t *array = NULL; /* Pages to mark */
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n",
++		__func__, (u64)offset, (u64)length);
++	s = max((sector_t) 3,
++		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
++	dprintk("%s set max=%llu\n", __func__, (u64)s);
++	if (pages) {
++		array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
++		if (!array)
++			goto outerr;
++		array[0] = ~0;
++	}
++
++	start = normalize(offset, marks->im_block_size);
++	end = normalize_up(offset + length, marks->im_block_size);
++	if (_preload_range(&marks->im_tree, start, end - start))
++		goto outerr;
++
++	spin_lock(&marks->im_lock);
++
++	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
++	     s < offset; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s pre-area pages\n", __func__);
++		/* Portion of used block is not initialized */
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
++		goto out_unlock;
++	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
++	     s < end; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s post-area pages\n", __func__);
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++
++	spin_unlock(&marks->im_lock);
++
++	if (pages) {
++		if (array[0] == ~0) {
++			kfree(array);
++			*pages = NULL;
++		} else
++			*pages = array;
++	}
++	return 0;
++
++ out_unlock:
++	spin_unlock(&marks->im_lock);
++ outerr:
++	if (pages) {
++		kfree(array);
++		*pages = NULL;
++	}
++	return -ENOMEM;
++}
++
++/* Marks sectors in [offest, offset+length) as having been written to disk.
++ * All lengths should be block aligned.
++ */
++int mark_written_sectors(struct pnfs_inval_markings *marks,
++			 sector_t offset, sector_t length)
++{
++	int status;
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
++		(u64)offset, (u64)length);
++	spin_lock(&marks->im_lock);
++	status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
++	spin_unlock(&marks->im_lock);
++	return status;
++}
++
++static void print_short_extent(struct pnfs_block_short_extent *be)
++{
++	dprintk("PRINT SHORT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->bse_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->bse_length);
++	}
++}
++
++void print_clist(struct list_head *list, unsigned int count)
++{
++	struct pnfs_block_short_extent *be;
++	unsigned int i = 0;
++
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, bse_node) {
++		i++;
++		print_short_extent(be);
++	}
++	if (i != count)
++		dprintk("\n\nExpected %u entries\n\n\n", count);
++	dprintk("****************\n");
++}
++
++/* Note: In theory, we should do more checking that devid's match between
++ * old and new, but if they don't, the lists are too corrupt to salvage anyway.
++ */
++/* Note this is very similar to add_and_merge_extent */
++static void add_to_commitlist(struct pnfs_block_layout *bl,
++			      struct pnfs_block_short_extent *new)
++{
++	struct list_head *clist = &bl->bl_commit;
++	struct pnfs_block_short_extent *old, *save;
++	sector_t end = new->bse_f_offset + new->bse_length;
++
++	dprintk("%s enter\n", __func__);
++	print_short_extent(new);
++	print_clist(clist, bl->bl_count);
++	bl->bl_count++;
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe(old, save, clist, bse_node) {
++		if (new->bse_f_offset < old->bse_f_offset)
++			break;
++		if (end <= old->bse_f_offset + old->bse_length) {
++			/* Range is already in list */
++			bl->bl_count--;
++			kfree(new);
++			return;
++		} else if (new->bse_f_offset <=
++				old->bse_f_offset + old->bse_length) {
++			/* new overlaps or abuts existing be */
++			if (new->bse_mdev == old->bse_mdev) {
++				/* extend new to fully replace old */
++				new->bse_length += new->bse_f_offset -
++						old->bse_f_offset;
++				new->bse_f_offset = old->bse_f_offset;
++				list_del(&old->bse_node);
++				bl->bl_count--;
++				kfree(old);
++			}
++		}
++	}
++	/* Note that if we never hit the above break, old will not point to a
++	 * valid extent.  However, in that case &old->bse_node==list.
++	 */
++	list_add_tail(&new->bse_node, &old->bse_node);
++	/* Scan forward for overlaps.  If we find any, extend new and
++	 * remove the overlapped extent.
++	 */
++	old = list_prepare_entry(new, clist, bse_node);
++	list_for_each_entry_safe_continue(old, save, clist, bse_node) {
++		if (end < old->bse_f_offset)
++			break;
++		/* new overlaps or abuts old */
++		if (new->bse_mdev == old->bse_mdev) {
++			if (end < old->bse_f_offset + old->bse_length) {
++				/* extend new to fully cover old */
++				end = old->bse_f_offset + old->bse_length;
++				new->bse_length = end - new->bse_f_offset;
++			}
++			list_del(&old->bse_node);
++			bl->bl_count--;
++			kfree(old);
++		}
++	}
++	dprintk("%s: after merging\n", __func__);
++	print_clist(clist, bl->bl_count);
++}
++
++/* Note the range described by offset, length is guaranteed to be contained
++ * within be.
++ */
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length)
++{
++	sector_t new_end, end = offset + length;
++	struct pnfs_block_short_extent *new;
++	struct pnfs_block_layout *bl = container_of(be->be_inval,
++						    struct pnfs_block_layout,
++						    bl_inval);
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return -ENOMEM;
++
++	mark_written_sectors(be->be_inval, offset, length);
++	/* We want to add the range to commit list, but it must be
++	 * block-normalized, and verified that the normalized range has
++	 * been entirely written to disk.
++	 */
++	new->bse_f_offset = offset;
++	offset = normalize(offset, bl->bl_blocksize);
++	if (offset < new->bse_f_offset) {
++		if (is_range_written(be->be_inval, offset, new->bse_f_offset))
++			new->bse_f_offset = offset;
++		else
++			new->bse_f_offset = offset + bl->bl_blocksize;
++	}
++	new_end = normalize_up(end, bl->bl_blocksize);
++	if (end < new_end) {
++		if (is_range_written(be->be_inval, end, new_end))
++			end = new_end;
++		else
++			end = new_end - bl->bl_blocksize;
++	}
++	if (end <= new->bse_f_offset) {
++		kfree(new);
++		return 0;
++	}
++	new->bse_length = end - new->bse_f_offset;
++	new->bse_devid = be->be_devid;
++	new->bse_mdev = be->be_mdev;
++
++	spin_lock(&bl->bl_ext_lock);
++	/* new will be freed, either by add_to_commitlist if it decides not
++	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
++	 */
++	add_to_commitlist(bl, new);
++	spin_unlock(&bl->bl_ext_lock);
++	return 0;
++}
++
++static void print_bl_extent(struct pnfs_block_extent *be)
++{
++	dprintk("PRINT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->be_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->be_length);
++		dprintk("        be_v_offset %llu\n", (u64)be->be_v_offset);
++		dprintk("        be_state    %d\n", be->be_state);
++	}
++}
++
++static void
++destroy_extent(struct kref *kref)
++{
++	struct pnfs_block_extent *be;
++
++	be = container_of(kref, struct pnfs_block_extent, be_refcnt);
++	dprintk("%s be=%p\n", __func__, be);
++	kfree(be);
++}
++
++void
++put_extent(struct pnfs_block_extent *be)
++{
++	if (be) {
++		dprintk("%s enter %p (%i)\n", __func__, be,
++			atomic_read(&be->be_refcnt.refcount));
++		kref_put(&be->be_refcnt, destroy_extent);
++	}
++}
++
++struct pnfs_block_extent *alloc_extent(void)
++{
++	struct pnfs_block_extent *be;
++
++	be = kmalloc(sizeof(struct pnfs_block_extent), GFP_KERNEL);
++	if (!be)
++		return NULL;
++	INIT_LIST_HEAD(&be->be_node);
++	kref_init(&be->be_refcnt);
++	be->be_inval = NULL;
++	return be;
++}
++
++struct pnfs_block_extent *
++get_extent(struct pnfs_block_extent *be)
++{
++	if (be)
++		kref_get(&be->be_refcnt);
++	return be;
++}
++
++void print_elist(struct list_head *list)
++{
++	struct pnfs_block_extent *be;
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, be_node) {
++		print_bl_extent(be);
++	}
++	dprintk("****************\n");
++}
++
++static inline int
++extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new)
++{
++	/* Note this assumes new->be_f_offset >= old->be_f_offset */
++	return (new->be_state == old->be_state) &&
++		((new->be_state == PNFS_BLOCK_NONE_DATA) ||
++		 ((new->be_v_offset - old->be_v_offset ==
++		   new->be_f_offset - old->be_f_offset) &&
++		  new->be_mdev == old->be_mdev));
++}
++
++/* Adds new to appropriate list in bl, modifying new and removing existing
++ * extents as appropriate to deal with overlaps.
++ *
++ * See find_get_extent for list constraints.
++ *
++ * Refcount on new is already set.  If end up not using it, or error out,
++ * need to put the reference.
++ *
++ * Lock is held by caller.
++ */
++int
++add_and_merge_extent(struct pnfs_block_layout *bl,
++		     struct pnfs_block_extent *new)
++{
++	struct pnfs_block_extent *be, *tmp;
++	sector_t end = new->be_f_offset + new->be_length;
++	struct list_head *list;
++
++	dprintk("%s enter with be=%p\n", __func__, new);
++	print_bl_extent(new);
++	list = &bl->bl_extents[choose_list(new->be_state)];
++	print_elist(list);
++
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
++		if (new->be_f_offset >= be->be_f_offset + be->be_length)
++			break;
++		if (new->be_f_offset >= be->be_f_offset) {
++			if (end <= be->be_f_offset + be->be_length) {
++				/* new is a subset of existing be*/
++				if (extents_consistent(be, new)) {
++					dprintk("%s: new is subset, ignoring\n",
++						__func__);
++					put_extent(new);
++					return 0;
++				} else {
++					goto out_err;
++				}
++			} else {
++				/* |<--   be   -->|
++				 *          |<--   new   -->| */
++				if (extents_consistent(be, new)) {
++					/* extend new to fully replace be */
++					new->be_length += new->be_f_offset -
++						be->be_f_offset;
++					new->be_f_offset = be->be_f_offset;
++					new->be_v_offset = be->be_v_offset;
++					dprintk("%s: removing %p\n", __func__, be);
++					list_del(&be->be_node);
++					put_extent(be);
++				} else {
++					goto out_err;
++				}
++			}
++		} else if (end >= be->be_f_offset + be->be_length) {
++			/* new extent overlap existing be */
++			if (extents_consistent(be, new)) {
++				/* extend new to fully replace be */
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		} else if (end > be->be_f_offset) {
++			/*           |<--   be   -->|
++			 *|<--   new   -->| */
++			if (extents_consistent(new, be)) {
++				/* extend new to fully replace be */
++				new->be_length += be->be_f_offset + be->be_length -
++					new->be_f_offset - new->be_length;
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		}
++	}
++	/* Note that if we never hit the above break, be will not point to a
++	 * valid extent.  However, in that case &be->be_node==list.
++	 */
++	list_add(&new->be_node, &be->be_node);
++	dprintk("%s: inserting new\n", __func__);
++	print_elist(list);
++	/* STUB - The per-list consistency checks have all been done,
++	 * should now check cross-list consistency.
++	 */
++	return 0;
++
++ out_err:
++	put_extent(new);
++	return -EIO;
++}
++
++/* Returns extent, or NULL.  If a second READ extent exists, it is returned
++ * in cow_read, if given.
++ *
++ * The extents are kept in two seperate ordered lists, one for READ and NONE,
++ * one for READWRITE and INVALID.  Within each list, we assume:
++ * 1. Extents are ordered by file offset.
++ * 2. For any given isect, there is at most one extents that matches.
++ */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++	    struct pnfs_block_extent **cow_read)
++{
++	struct pnfs_block_extent *be, *cow, *ret;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	cow = ret = NULL;
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret &&
++		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				if (!ret)
++					ret = be;
++				else if (be->be_state != PNFS_BLOCK_READ_DATA)
++					put_extent(be);
++				else
++					cow = be;
++				break;
++			}
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	if (cow_read)
++		*cow_read = cow;
++	print_bl_extent(ret);
++	return ret;
++}
++
++/* Similar to find_get_extent, but called with lock held, and ignores cow */
++static struct pnfs_block_extent *
++find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
++{
++	struct pnfs_block_extent *be, *ret = NULL;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret)
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				ret = be;
++				break;
++			}
++		}
++	}
++	print_bl_extent(ret);
++	return ret;
++}
++
++int
++encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			       struct xdr_stream *xdr,
++			       const struct nfs4_layoutcommit_args *arg)
++{
++	sector_t start, end;
++	struct pnfs_block_short_extent *lce, *save;
++	unsigned int count = 0;
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct list_head *ranges = &bld->ranges;
++	__be32 *p, *xdr_start;
++
++	dprintk("%s enter\n", __func__);
++	start = arg->range.offset >> 9;
++	end = start + (arg->range.length >> 9);
++	dprintk("%s set start=%llu, end=%llu\n",
++		__func__, (u64)start, (u64)end);
++
++	/* BUG - creation of bl_commit is buggy - need to wait for
++	 * entire block to be marked WRITTEN before it can be added.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	/* Want to adjust for possible truncate */
++	/* We now want to adjust argument range */
++
++	/* XDR encode the ranges found */
++	xdr_start = xdr_reserve_space(xdr, 8);
++	if (!xdr_start)
++		goto out;
++	list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) {
++		p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data));
++		if (!p)
++			break;
++		WRITE_DEVID(&lce->bse_devid);
++		WRITE64(lce->bse_f_offset << 9);
++		WRITE64(lce->bse_length << 9);
++		WRITE64(0LL);
++		WRITE32(PNFS_BLOCK_READWRITE_DATA);
++		list_del(&lce->bse_node);
++		list_add_tail(&lce->bse_node, ranges);
++		bl->bl_count--;
++		count++;
++	}
++	xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
++	xdr_start[1] = cpu_to_be32(count);
++out:
++	spin_unlock(&bl->bl_ext_lock);
++	dprintk("%s found %i ranges\n", __func__, count);
++	return 0;
++}
++
++/* Helper function to set_to_rw that initialize a new extent */
++static void
++_prep_new_extent(struct pnfs_block_extent *new,
++		 struct pnfs_block_extent *orig,
++		 sector_t offset, sector_t length, int state)
++{
++	kref_init(&new->be_refcnt);
++	/* don't need to INIT_LIST_HEAD(&new->be_node) */
++	memcpy(&new->be_devid, &orig->be_devid, sizeof(struct pnfs_deviceid));
++	new->be_mdev = orig->be_mdev;
++	new->be_f_offset = offset;
++	new->be_length = length;
++	new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset;
++	new->be_state = state;
++	new->be_inval = orig->be_inval;
++}
++
++/* Tries to merge be with extent in front of it in list.
++ * Frees storage if not used.
++ */
++static struct pnfs_block_extent *
++_front_merge(struct pnfs_block_extent *be, struct list_head *head,
++	     struct pnfs_block_extent *storage)
++{
++	struct pnfs_block_extent *prev;
++
++	if (!storage)
++		goto no_merge;
++	if (&be->be_node == head || be->be_node.prev == head)
++		goto no_merge;
++	prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node);
++	if ((prev->be_f_offset + prev->be_length != be->be_f_offset) ||
++	    !extents_consistent(prev, be))
++		goto no_merge;
++	_prep_new_extent(storage, prev, prev->be_f_offset,
++			 prev->be_length + be->be_length, prev->be_state);
++	list_replace(&prev->be_node, &storage->be_node);
++	put_extent(prev);
++	list_del(&be->be_node);
++	put_extent(be);
++	return storage;
++
++ no_merge:
++	kfree(storage);
++	return be;
++}
++
++static u64
++set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length)
++{
++	u64 rv = offset + length;
++	struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old;
++	struct pnfs_block_extent *children[3];
++	struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL;
++	int i = 0, j;
++
++	dprintk("%s(%llu, %llu)\n", __func__, offset, length);
++	/* Create storage for up to three new extents e1, e2, e3 */
++	e1 = kmalloc(sizeof(*e1), GFP_KERNEL);
++	e2 = kmalloc(sizeof(*e2), GFP_KERNEL);
++	e3 = kmalloc(sizeof(*e3), GFP_KERNEL);
++	/* BUG - we are ignoring any failure */
++	if (!e1 || !e2 || !e3)
++		goto out_nosplit;
++
++	spin_lock(&bl->bl_ext_lock);
++	be = find_get_extent_locked(bl, offset);
++	rv = be->be_f_offset + be->be_length;
++	if (be->be_state != PNFS_BLOCK_INVALID_DATA) {
++		spin_unlock(&bl->bl_ext_lock);
++		goto out_nosplit;
++	}
++	/* Add e* to children, bumping e*'s krefs */
++	if (be->be_f_offset != offset) {
++		_prep_new_extent(e1, be, be->be_f_offset,
++				 offset - be->be_f_offset,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e1;
++		print_bl_extent(e1);
++	} else
++		merge1 = e1;
++	_prep_new_extent(e2, be, offset,
++			 min(length, be->be_f_offset + be->be_length - offset),
++			 PNFS_BLOCK_READWRITE_DATA);
++	children[i++] = e2;
++	print_bl_extent(e2);
++	if (offset + length < be->be_f_offset + be->be_length) {
++		_prep_new_extent(e3, be, e2->be_f_offset + e2->be_length,
++				 be->be_f_offset + be->be_length -
++				 offset - length,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e3;
++		print_bl_extent(e3);
++	} else
++		merge2 = e3;
++
++	/* Remove be from list, and insert the e* */
++	/* We don't get refs on e*, since this list is the base reference
++	 * set when init'ed.
++	 */
++	if (i < 3)
++		children[i] = NULL;
++	new = children[0];
++	list_replace(&be->be_node, &new->be_node);
++	put_extent(be);
++	new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1);
++	for (j = 1; j < i; j++) {
++		old = new;
++		new = children[j];
++		list_add(&new->be_node, &old->be_node);
++	}
++	if (merge2) {
++		/* This is a HACK, should just create a _back_merge function */
++		new = list_entry(new->be_node.next,
++				 struct pnfs_block_extent, be_node);
++		new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2);
++	}
++	spin_unlock(&bl->bl_ext_lock);
++
++	/* Since we removed the base reference above, be is now scheduled for
++	 * destruction.
++	 */
++	put_extent(be);
++	dprintk("%s returns %llu after split\n", __func__, rv);
++	return rv;
++
++ out_nosplit:
++	kfree(e1);
++	kfree(e2);
++	kfree(e3);
++	dprintk("%s returns %llu without splitting\n", __func__, rv);
++	return rv;
++}
++
++void
++clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			      const struct nfs4_layoutcommit_args *arg,
++			      int status)
++{
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct pnfs_block_short_extent *lce, *save;
++
++	dprintk("%s status %d\n", __func__, status);
++	list_for_each_entry_safe_reverse(lce, save, &bld->ranges, bse_node) {
++		if (likely(!status)) {
++			u64 offset = lce->bse_f_offset;
++			u64 end = offset + lce->bse_length;
++
++			do {
++				offset = set_to_rw(bl, offset, end - offset);
++			} while (offset < end);
++
++			kfree(lce);
++		} else {
++			spin_lock(&bl->bl_ext_lock);
++			add_to_commitlist(bl, lce);
++			spin_unlock(&bl->bl_ext_lock);
++		}
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-31 20:42:05.502212803 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-31 20:42:05.502212803 -0400
+@@ -0,0 +1,6 @@
++#
++# Makefile for the pNFS block layout driver kernel module
++#
++obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
++blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o \
++			extents.o block-device-discovery-pipe.o
+diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
+--- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-31 20:42:05.508119925 -0400
+@@ -111,6 +111,13 @@ extern int nfs41_validate_delegation_sta
+ 
+ #define RCA4_TYPE_MASK_RDATA_DLG	0
+ #define RCA4_TYPE_MASK_WDATA_DLG	1
++#define RCA4_TYPE_MASK_DIR_DLG         2
++#define RCA4_TYPE_MASK_FILE_LAYOUT     3
++#define RCA4_TYPE_MASK_BLK_LAYOUT      4
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN  8
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+ 
+ struct cb_recallanyargs {
+ 	struct sockaddr	*craa_addr;
+@@ -127,6 +134,39 @@ struct cb_recallslotargs {
+ extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ 					  void *dummy);
+ 
++struct cb_layoutrecallargs {
++	struct sockaddr		*cbl_addr;
++	struct nfs_fh		cbl_fh;
++	struct pnfs_layout_range cbl_seg;
++	struct nfs_fsid		cbl_fsid;
++	uint32_t		cbl_recall_type;
++	uint32_t		cbl_layout_type;
++	uint32_t		cbl_layoutchanged;
++	nfs4_stateid		cbl_stateid;
++};
++
++extern unsigned nfs4_callback_layoutrecall(
++	struct cb_layoutrecallargs *args,
++	void *dummy);
++
++struct cb_devicenotifyitem {
++	uint32_t		cbd_notify_type;
++	uint32_t		cbd_layout_type;
++	struct pnfs_deviceid	cbd_dev_id;
++	uint32_t		cbd_immediate;
++};
++
++/* XXX: Should be dynamic up to max compound size */
++#define NFS4_DEV_NOTIFY_MAXENTRIES 10
++struct cb_devicenotifyargs {
++	struct sockaddr			*addr;
++	int				 ndevs;
++	struct cb_devicenotifyitem	 devs[NFS4_DEV_NOTIFY_MAXENTRIES];
++};
++
++extern unsigned nfs4_callback_devicenotify(
++	struct cb_devicenotifyargs *args,
++	void *dummy);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
+--- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-31 20:42:05.509093330 -0400
+@@ -8,10 +8,15 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/slab.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/writeback.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #ifdef NFS_DEBUG
+ #define NFSDBG_FACILITY NFSDBG_CALLBACK
+@@ -62,16 +67,6 @@ out:
+ 	return res->status;
+ }
+ 
+-static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
+-{
+-#if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion > 0)
+-		return nfs41_validate_delegation_stateid;
+-#endif
+-	return nfs4_validate_delegation_stateid;
+-}
+-
+-
+ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+@@ -92,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_re
+ 		inode = nfs_delegation_find_inode(clp, &args->fh);
+ 		if (inode != NULL) {
+ 			/* Set up a helper thread to actually return the delegation */
+-			switch (nfs_async_inode_return_delegation(inode, &args->stateid,
+-								  nfs_validate_delegation_stateid(clp))) {
++			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ 				case 0:
+ 					res = 0;
+ 					break;
+@@ -116,24 +110,364 @@ out:
+ 
+ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+-	if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
+-					 sizeof(delegation->stateid.data)) != 0)
++	if (delegation == NULL || memcmp(delegation->stateid.u.data,
++					 stateid->u.data,
++					 sizeof(delegation->stateid.u.data)))
+ 		return 0;
+ 	return 1;
+ }
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static bool
++pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
++			    const nfs4_stateid stateid)
++{
++	int seqlock;
++	bool res;
++	u32 oldseqid, newseqid;
++
++	do {
++		seqlock = read_seqbegin(&lo->seqlock);
++		oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
++		newseqid = be32_to_cpu(stateid.u.stateid.seqid);
++		res = !memcmp(lo->stateid.u.stateid.other,
++			      stateid.u.stateid.other,
++			      NFS4_STATEID_OTHER_SIZE);
++		if (res) { /* comparing layout stateids */
++			if (oldseqid == ~0)
++				res = (newseqid == 1);
++			else
++				res = (newseqid == oldseqid + 1);
++		} else { /* open stateid */
++			res = !memcmp(lo->stateid.u.data,
++				      &zero_stateid,
++				      NFS4_STATEID_SIZE);
++			if (res)
++				res = (newseqid == 1);
++		}
++	} while (read_seqretry(&lo->seqlock, seqlock));
++
++	return res;
++}
++
++/*
++ * Retrieve an inode based on layout recall parameters
++ *
++ * Note: caller must iput(inode) to dereference the inode.
++ */
++static struct inode *
++nfs_layoutrecall_find_inode(struct nfs_client *clp,
++			    const struct cb_layoutrecallargs *args)
++{
++	struct nfs_inode *nfsi;
++	struct pnfs_layout_hdr *lo;
++	struct nfs_server *server;
++	struct inode *ino = NULL;
++
++	dprintk("%s: Begin recall_type=%d clp %p\n",
++		__func__, args->cbl_recall_type, clp);
++
++	spin_lock(&clp->cl_lock);
++	list_for_each_entry(lo, &clp->cl_layouts, layouts) {
++		nfsi = PNFS_NFS_INODE(lo);
++		if (!nfsi)
++			continue;
++
++		dprintk("%s: Searching inode=%lu\n",
++			__func__, nfsi->vfs_inode.i_ino);
++
++		if (args->cbl_recall_type == RETURN_FILE) {
++		    if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
++			continue;
++		} else if (args->cbl_recall_type == RETURN_FSID) {
++			server = NFS_SERVER(&nfsi->vfs_inode);
++			if (server->fsid.major != args->cbl_fsid.major ||
++			    server->fsid.minor != args->cbl_fsid.minor)
++				continue;
++		}
++
++		/* Make sure client didn't clean up layout without
++		 * telling the server */
++		if (!has_layout(nfsi))
++			continue;
++
++		ino = igrab(&nfsi->vfs_inode);
++		dprintk("%s: Found inode=%p\n", __func__, ino);
++		break;
++	}
++	spin_unlock(&clp->cl_lock);
++	return ino;
++}
++
++struct recall_layout_threadargs {
++	struct inode *inode;
++	struct nfs_client *clp;
++	struct completion started;
++	struct cb_layoutrecallargs *rl;
++	int result;
++};
++
++static int pnfs_recall_layout(void *data)
++{
++	struct inode *inode, *ino;
++	struct nfs_client *clp;
++	struct cb_layoutrecallargs rl;
++	struct nfs4_layoutreturn *lrp;
++	struct recall_layout_threadargs *args =
++		(struct recall_layout_threadargs *)data;
++	int status = 0;
++
++	daemonize("nfsv4-layoutreturn");
++
++	dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
++		__func__, args->rl->cbl_recall_type,
++		args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
++
++	clp = args->clp;
++	inode = args->inode;
++	rl = *args->rl;
++
++	/* support whole file layouts only */
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	if (rl.cbl_recall_type == RETURN_FILE) {
++		if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
++						rl.cbl_stateid))
++			status = pnfs_return_layout(inode, &rl.cbl_seg,
++						    &rl.cbl_stateid, RETURN_FILE,
++						    false);
++		else
++			status = cpu_to_be32(NFS4ERR_DELAY);
++		if (status)
++			dprintk("%s RETURN_FILE error: %d\n", __func__, status);
++		else
++			status =  cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++		args->result = status;
++		complete(&args->started);
++		goto out;
++	}
++
++	status = cpu_to_be32(NFS4_OK);
++	args->result = status;
++	complete(&args->started);
++	args = NULL;
++
++	/* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
++	while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
++		/* FIXME: need to check status on pnfs_return_layout */
++		pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
++		iput(ino);
++	}
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (!lrp) {
++		dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
++			__func__);
++		goto out;
++	}
++
++	/* send final layoutreturn */
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = rl.cbl_layout_type;
++	lrp->args.return_type = rl.cbl_recall_type;
++	lrp->args.range = rl.cbl_seg;
++	lrp->args.inode = inode;
++	nfs4_proc_layoutreturn(lrp, true);
++
++out:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	nfs_put_client(clp);
++	module_put_and_exit(0);
++	dprintk("%s: exit status %d\n", __func__, 0);
++	return 0;
++}
++
++/*
++ * Asynchronous layout recall!
++ */
++static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
++				    struct cb_layoutrecallargs *rl)
++{
++	struct recall_layout_threadargs data = {
++		.clp = clp,
++		.inode = inode,
++		.rl = rl,
++	};
++	struct task_struct *t;
++	int status = -EAGAIN;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* FIXME: do not allow two concurrent layout recalls */
++	if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
++		return status;
++
++	init_completion(&data.started);
++	__module_get(THIS_MODULE);
++	if (!atomic_inc_not_zero(&clp->cl_count))
++		goto out_put_no_client;
++
++	t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
++	if (IS_ERR(t)) {
++		printk(KERN_INFO "NFS: Layout recall callback thread failed "
++			"for client (clientid %08x/%08x)\n",
++			(unsigned)(clp->cl_clientid >> 32),
++			(unsigned)(clp->cl_clientid));
++		status = PTR_ERR(t);
++		goto out_module_put;
++	}
++	wait_for_completion(&data.started);
++	return data.result;
++out_module_put:
++	nfs_put_client(clp);
++out_put_no_client:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	module_put(THIS_MODULE);
++	return status;
++}
++
++static int pnfs_recall_all_layouts(struct nfs_client *clp)
++{
++	struct cb_layoutrecallargs rl;
++	struct inode *inode;
++	int status = 0;
++
++	rl.cbl_recall_type = RETURN_ALL;
++	rl.cbl_seg.iomode = IOMODE_ANY;
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	/* we need the inode to get the nfs_server struct */
++	inode = nfs_layoutrecall_find_inode(clp, &rl);
++	if (!inode)
++		return status;
++	status = pnfs_async_return_layout(clp, inode, &rl);
++	iput(inode);
++
++	return status;
++}
++
++__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
++				  void *dummy)
++{
++	struct nfs_client *clp;
++	struct inode *inode = NULL;
++	__be32 res;
++	int status;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
++	clp  = nfs_find_client(args->cbl_addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->cbl_addr));
++		goto out;
++	}
++
++	res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		/* the callback must come from the MDS personality */
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
++			goto loop;
++		if (args->cbl_recall_type == RETURN_FILE) {
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (inode != NULL) {
++				status = pnfs_async_return_layout(clp, inode,
++								  args);
++				if (status)
++					res = cpu_to_be32(NFS4ERR_DELAY);
++				iput(inode);
++			}
++		} else { /* _ALL or _FSID */
++			/* we need the inode to get the nfs_server struct */
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (!inode)
++				goto loop;
++			status = pnfs_async_return_layout(clp, inode, args);
++			if (status)
++				res = cpu_to_be32(NFS4ERR_DELAY);
++			iput(inode);
++		}
++loop:
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
++/* Remove the deviceid(s) from the nfs_client deviceid cache */
++static __be32 pnfs_devicenotify_client(struct nfs_client *clp,
++				       struct cb_devicenotifyargs *args)
++{
++	uint32_t type;
++	int i;
++
++	dprintk("%s: --> clp %p\n", __func__, clp);
++
++	for (i = 0; i < args->ndevs; i++) {
++		struct cb_devicenotifyitem *dev = &args->devs[i];
++		type = dev->cbd_notify_type;
++		if (type == NOTIFY_DEVICEID4_DELETE && clp->cl_devid_cache)
++			nfs4_delete_device(clp->cl_devid_cache,
++					   &dev->cbd_dev_id);
++		else if (type == NOTIFY_DEVICEID4_CHANGE)
++			printk(KERN_ERR "%s: NOTIFY_DEVICEID4_CHANGE "
++					"not supported\n", __func__);
++	}
++	return 0;
++}
++
++__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
++				  void *dummy)
++{
++	struct nfs_client *clp;
++	__be32 res = 0;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = __constant_htonl(NFS4ERR_INVAL);
++	clp = nfs_find_client(args->addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->addr));
++		goto out;
++	}
++
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		res = pnfs_devicenotify_client(clp, args);
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
+ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+ 	if (delegation == NULL)
+ 		return 0;
+ 
+-	/* seqid is 4-bytes long */
+-	if (((u32 *) &stateid->data)[0] != 0)
++	if (stateid->u.stateid.seqid != 0)
+ 		return 0;
+-	if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
+-		   sizeof(stateid->data)-4))
++	if (memcmp(&delegation->stateid.u.stateid.other,
++		   &stateid->u.stateid.other,
++		   NFS4_STATEID_OTHER_SIZE))
+ 		return 0;
+ 
+ 	return 1;
+@@ -335,13 +669,37 @@ out:
+ 	return status;
+ }
+ 
++static inline bool
++validate_bitmap_values(const unsigned long *mask)
++{
++	int i;
++
++	if (*mask == 0)
++		return true;
++	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
++	    test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
++		return true;
++	for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	return false;
++}
++
+ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+ 	__be32 status;
+ 	fmode_t flags = 0;
+ 
+-	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
++	status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ 	clp = nfs_find_client(args->craa_addr, 4);
+ 	if (clp == NULL)
+ 		goto out;
+@@ -349,16 +707,25 @@ __be32 nfs4_callback_recallany(struct cb
+ 	dprintk("NFS: RECALL_ANY callback request from %s\n",
+ 		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ 
++	status = cpu_to_be32(NFS4ERR_INVAL);
++	if (!validate_bitmap_values((const unsigned long *)
++				    &args->craa_type_mask))
++		return status;
++
++	status = cpu_to_be32(NFS4_OK);
+ 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags = FMODE_READ;
+ 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags |= FMODE_WRITE;
++	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
++		     &args->craa_type_mask))
++		if (pnfs_recall_all_layouts(clp) == -EAGAIN)
++			status = cpu_to_be32(NFS4ERR_DELAY);
+ 
+ 	if (flags)
+ 		nfs_expire_all_delegation_types(clp, flags);
+-	status = htonl(NFS4_OK);
+ out:
+ 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ 	return status;
+diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
+--- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-31 20:42:05.510143651 -0400
+@@ -22,6 +22,8 @@
+ #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ 
+ #if defined(CONFIG_NFS_V4_1)
++#define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
++#define CB_OP_DEVICENOTIFY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+ 					4 + 1 + 3)
+ #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+@@ -136,7 +138,7 @@ static __be32 decode_stateid(struct xdr_
+ 	p = read_buf(xdr, 16);
+ 	if (unlikely(p == NULL))
+ 		return htonl(NFS4ERR_RESOURCE);
+-	memcpy(stateid->data, p, 16);
++	memcpy(stateid->u.data, p, 16);
+ 	return 0;
+ }
+ 
+@@ -220,6 +222,148 @@ out:
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
++				       struct xdr_stream *xdr,
++				       struct cb_layoutrecallargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++
++	args->cbl_addr = svc_addr(rqstp);
++	p = read_buf(xdr, 4 * sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_BADXDR);
++		goto out;
++	}
++
++	args->cbl_layout_type = ntohl(*p++);
++	args->cbl_seg.iomode = ntohl(*p++);
++	args->cbl_layoutchanged = ntohl(*p++);
++	args->cbl_recall_type = ntohl(*p++);
++
++	if (likely(args->cbl_recall_type == RETURN_FILE)) {
++		status = decode_fh(xdr, &args->cbl_fh);
++		if (unlikely(status != 0))
++			goto out;
++
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_seg.offset);
++		p = xdr_decode_hyper(p, &args->cbl_seg.length);
++		status = decode_stateid(xdr, &args->cbl_stateid);
++		if (unlikely(status != 0))
++			goto out;
++	} else if (args->cbl_recall_type == RETURN_FSID) {
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
++		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
++	}
++	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d "
++		"fsid %llx-%llx fhsize %d\n", __func__,
++		args->cbl_layout_type, args->cbl_seg.iomode,
++		args->cbl_layoutchanged, args->cbl_recall_type,
++		args->cbl_fsid.major, args->cbl_fsid.minor,
++		args->cbl_fh.size);
++out:
++	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
++	return status;
++}
++
++static
++__be32 decode_devicenotify_args(struct svc_rqst *rqstp,
++				struct xdr_stream *xdr,
++				struct cb_devicenotifyargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++	u32 tmp;
++	int n, i;
++	args->ndevs = 0;
++
++	args->addr = svc_addr(rqstp);
++
++	/* Num of device notifications */
++	p = read_buf(xdr, sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_RESOURCE);
++		goto out;
++	}
++	n = ntohl(*p++);
++	if (n <= 0)
++		goto out;
++
++	/* XXX: need to possibly return error in this case */
++	if (n > NFS4_DEV_NOTIFY_MAXENTRIES) {
++		dprintk("%s: Processing (%d) notifications out of (%d)\n",
++			__func__, NFS4_DEV_NOTIFY_MAXENTRIES, n);
++		n = NFS4_DEV_NOTIFY_MAXENTRIES;
++	}
++
++	/* Decode each dev notification */
++	for (i = 0; i < n; i++) {
++		struct cb_devicenotifyitem *dev = &args->devs[i];
++
++		p = read_buf(xdr, (4 * sizeof(uint32_t))
++			     + NFS4_PNFS_DEVICEID4_SIZE);
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_RESOURCE);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* bitmap size */
++		if (tmp != 1) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_notify_type = ntohl(*p++);
++		if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		    dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* opaque size */
++		if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 8)) ||
++		    ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 4))) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_layout_type = ntohl(*p++);
++		memcpy(dev->cbd_dev_id.data, p, NFS4_PNFS_DEVICEID4_SIZE);
++		p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++
++		if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
++			p = read_buf(xdr, sizeof(uint32_t));
++			if (unlikely(p == NULL)) {
++				status = htonl(NFS4ERR_DELAY);
++				goto out;
++			}
++			dev->cbd_immediate = ntohl(*p++);
++		} else {
++			dev->cbd_immediate = 0;
++		}
++
++		args->ndevs++;
++
++		dprintk("%s: type %d layout 0x%x immediate %d\n",
++			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
++			dev->cbd_immediate);
++	}
++out:
++	dprintk("%s: status %d ndevs %d\n",
++		__func__, ntohl(status), args->ndevs);
++	return status;
++}
++
+ static __be32 decode_sessionid(struct xdr_stream *xdr,
+ 				 struct nfs4_sessionid *sid)
+ {
+@@ -574,11 +718,11 @@ preprocess_nfs41_op(int nop, unsigned in
+ 	case OP_CB_SEQUENCE:
+ 	case OP_CB_RECALL_ANY:
+ 	case OP_CB_RECALL_SLOT:
++	case OP_CB_LAYOUTRECALL:
++	case OP_CB_NOTIFY_DEVICEID:
+ 		*op = &callback_ops[op_nr];
+ 		break;
+ 
+-	case OP_CB_LAYOUTRECALL:
+-	case OP_CB_NOTIFY_DEVICEID:
+ 	case OP_CB_NOTIFY:
+ 	case OP_CB_PUSH_DELEG:
+ 	case OP_CB_RECALLABLE_OBJ_AVAIL:
+@@ -739,6 +883,18 @@ static struct callback_op callback_ops[]
+ 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+ 	},
+ #if defined(CONFIG_NFS_V4_1)
++	[OP_CB_LAYOUTRECALL] = {
++		.process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
++		.decode_args =
++			(callback_decode_arg_t)decode_layoutrecall_args,
++		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
++	},
++	[OP_CB_NOTIFY_DEVICEID] = {
++		.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
++		.decode_args =
++			(callback_decode_arg_t)decode_devicenotify_args,
++		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
++	},
+ 	[OP_CB_SEQUENCE] = {
+ 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
+ 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-31 20:41:19.144140225 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-31 20:42:05.511222861 -0400
+@@ -39,6 +39,7 @@
+ #include <net/ipv6.h>
+ #include <linux/nfs_xdr.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -48,6 +49,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_CLIENT
+ 
+@@ -150,11 +152,14 @@ static struct nfs_client *nfs_alloc_clie
+ 	clp->cl_boot_time = CURRENT_TIME;
+ 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+ 	clp->cl_minorversion = cl_init->minorversion;
++	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ #endif
+ 	cred = rpc_lookup_machine_cred();
+ 	if (!IS_ERR(cred))
+ 		clp->cl_machine_cred = cred;
+-
++#if defined(CONFIG_NFS_V4_1)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++#endif
+ 	nfs_fscache_get_client_cookie(clp);
+ 
+ 	return clp;
+@@ -178,7 +183,7 @@ static void nfs4_clear_client_minor_vers
+ 		clp->cl_session = NULL;
+ 	}
+ 
+-	clp->cl_call_sync = _nfs4_call_sync;
++	clp->cl_mvops = nfs_v4_minor_ops[0];
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
+@@ -188,7 +193,7 @@ static void nfs4_clear_client_minor_vers
+ static void nfs4_destroy_callback(struct nfs_client *clp)
+ {
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+-		nfs_callback_down(clp->cl_minorversion);
++		nfs_callback_down(clp->cl_mvops->minor_version);
+ }
+ 
+ static void nfs4_shutdown_client(struct nfs_client *clp)
+@@ -251,6 +256,7 @@ void nfs_put_client(struct nfs_client *c
+ 		nfs_free_client(clp);
+ 	}
+ }
++EXPORT_SYMBOL(nfs_put_client);
+ 
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ /*
+@@ -343,7 +349,7 @@ static int nfs_sockaddr_match_ipaddr(con
+  * Test if two socket addresses represent the same actual socket,
+  * by comparing (only) relevant fields, including the port number.
+  */
+-static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ 			    const struct sockaddr *sa2)
+ {
+ 	if (sa1->sa_family != sa2->sa_family)
+@@ -357,6 +363,7 @@ static int nfs_sockaddr_cmp(const struct
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs_sockaddr_cmp);
+ 
+ /*
+  * Find a client by IP address and protocol version
+@@ -548,6 +555,7 @@ int nfs4_check_client_ready(struct nfs_c
+ 		return -EPROTONOSUPPORT;
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs4_check_client_ready);
+ 
+ /*
+  * Initialise the timeout values for a connection
+@@ -865,9 +873,34 @@ error:
+ }
+ 
+ /*
++ * Initialize the pNFS layout driver and setup pNFS related parameters
++ */
++static void nfs4_init_pnfs(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
++{
++#if defined(CONFIG_NFS_V4_1)
++	struct nfs_client *clp = server->nfs_client;
++
++	if (nfs4_has_session(clp) &&
++	    (clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
++		server->pnfs_blksize = fsinfo->blksize;
++		set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype);
++		pnfs_set_ds_iosize(server);
++	}
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++static void nfs4_uninit_pnfs(struct nfs_server *server)
++{
++#if defined(CONFIG_NFS_V4_1)
++	if (server->nfs_client && nfs4_has_session(server->nfs_client))
++		unmount_pnfs_layoutdriver(server);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++/*
+  * Load up the server record from information gained in an fsinfo record
+  */
+-static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
++static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
+ {
+ 	unsigned long max_rpc_payload;
+ 
+@@ -897,6 +930,8 @@ static void nfs_server_set_fsinfo(struct
+ 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ 		server->wsize = NFS_MAX_FILE_IO_SIZE;
+ 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++	nfs4_init_pnfs(server, mntfh, fsinfo);
++
+ 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+ 
+ 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+@@ -938,7 +973,7 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	if (error < 0)
+ 		goto out_error;
+ 
+-	nfs_server_set_fsinfo(server, &fsinfo);
++	nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ 
+ 	/* Get some general file system info */
+ 	if (server->namelen == 0) {
+@@ -1016,6 +1051,7 @@ void nfs_free_server(struct nfs_server *
+ {
+ 	dprintk("--> nfs_free_server()\n");
+ 
++	nfs4_uninit_pnfs(server);
+ 	spin_lock(&nfs_client_lock);
+ 	list_del(&server->client_link);
+ 	list_del(&server->master_link);
+@@ -1126,7 +1162,7 @@ static int nfs4_init_callback(struct nfs
+ 				return error;
+ 		}
+ 
+-		error = nfs_callback_up(clp->cl_minorversion,
++		error = nfs_callback_up(clp->cl_mvops->minor_version,
+ 					clp->cl_rpcclient->cl_xprt);
+ 		if (error < 0) {
+ 			dprintk("%s: failed to start callback. Error = %d\n",
+@@ -1143,10 +1179,8 @@ static int nfs4_init_callback(struct nfs
+  */
+ static int nfs4_init_client_minor_version(struct nfs_client *clp)
+ {
+-	clp->cl_call_sync = _nfs4_call_sync;
+-
+ #if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion) {
++	if (clp->cl_mvops->minor_version) {
+ 		struct nfs4_session *session = NULL;
+ 		/*
+ 		 * Create the session and mark it expired.
+@@ -1158,7 +1192,13 @@ static int nfs4_init_client_minor_versio
+ 			return -ENOMEM;
+ 
+ 		clp->cl_session = session;
+-		clp->cl_call_sync = _nfs4_call_sync_session;
++		/*
++		 * The create session reply races with the server back
++		 * channel probe. Mark the client NFS_CS_SESSION_INITING
++		 * so that the client back channel can find the
++		 * nfs_client struct
++		 */
++		clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+@@ -1216,7 +1256,7 @@ error:
+ /*
+  * Set up an NFS4 client
+  */
+-static int nfs4_set_client(struct nfs_server *server,
++int nfs4_set_client(struct nfs_server *server,
+ 		const char *hostname,
+ 		const struct sockaddr *addr,
+ 		const size_t addrlen,
+@@ -1259,6 +1299,7 @@ error:
+ 	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ 	return error;
+ }
++EXPORT_SYMBOL(nfs4_set_client);
+ 
+ 
+ /*
+@@ -1448,7 +1489,7 @@ struct nfs_server *nfs4_create_referral_
+ 				data->authflavor,
+ 				parent_server->client->cl_xprt->prot,
+ 				parent_server->client->cl_timeout,
+-				parent_client->cl_minorversion);
++				parent_client->cl_mvops->minor_version);
+ 	if (error < 0)
+ 		goto error;
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-31 20:42:05.550110844 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-31 20:42:05.550110844 -0400
+@@ -0,0 +1,292 @@
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/sched.h>
++#include <linux/exportfs.h>
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/workqueue.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/nfs_fs.h>
++
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd4_block.h>
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++static ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++    char __user *, size_t);
++static ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
++static void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops bl_upcall_ops = {
++	.upcall		= bl_pipe_upcall,
++	.downcall	= bl_pipe_downcall,
++	.destroy_msg	= bl_pipe_destroy_msg,
++};
++
++bl_comm_t	*bl_comm_global;
++
++int
++nfsd_bl_start(void)
++{
++	bl_comm_t	*bl_comm = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	dprintk("%s: starting pipe\n", __func__);
++	if (bl_comm_global)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	bl_comm = kzalloc(sizeof (*bl_comm), GFP_KERNEL);
++	if (!bl_comm) {
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	/* FIXME: rename to "spnfs_block" */
++	bl_comm->pipe_dentry = rpc_mkpipe(nd.path.dentry, "pnfs_block", bl_comm,
++					 &bl_upcall_ops, 0);
++	if (IS_ERR(bl_comm->pipe_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++	mutex_init(&bl_comm->lock);
++	mutex_init(&bl_comm->pipe_lock);
++	init_waitqueue_head(&bl_comm->pipe_wq);
++
++	bl_comm_global = bl_comm;
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(bl_comm);
++	return rc;
++}
++
++void
++nfsd_bl_stop(void)
++{
++	bl_comm_t	*c = bl_comm_global;
++
++	dprintk("%s: stopping pipe\n", __func__);
++	if (!c)
++		return;
++	rpc_unlink(c->pipe_dentry);
++	rpc_put_mount();
++	bl_comm_global = NULL;
++	kfree(c);
++}
++
++static ssize_t
++bl_pipe_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *dst,
++    size_t buflen)
++{
++	char	*data	= (char *)msg->data + msg->copied;
++	ssize_t	mlen	= msg->len - msg->copied,
++		left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen		-= left;
++	msg->copied	+= mlen;
++	msg->errno	= 0;
++
++	return mlen;
++}
++
++static ssize_t
++bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode	*rpci	= RPC_I(filp->f_dentry->d_inode);
++	bl_comm_t		*bc	= (bl_comm_t *)rpci->private;
++	bl_comm_msg_t		*im	= &bc->msg;
++	int			ret;
++	bl_comm_res_t		*res;
++	
++
++	if (mlen == 0) {
++		im->msg_status = PNFS_BLOCK_FAILURE;
++		im->msg_res = NULL;
++		wake_up(&bc->pipe_wq);
++		return -EFAULT;
++	}
++	
++	if ((res = kmalloc(mlen, GFP_KERNEL)) == NULL)
++		return -ENOMEM;
++	
++	if (copy_from_user(res, src, mlen)) {
++		kfree(res);
++		return -EFAULT;
++	}
++	
++	mutex_lock(&bc->pipe_lock);
++	
++	ret		= mlen;
++	im->msg_status	= res->res_status;
++	im->msg_res	= res;
++	
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++	return ret;
++}
++
++static void
++bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	bl_comm_msg_t	*im = msg->data;
++	bl_comm_t	*bc = container_of(im, struct bl_comm, msg);
++	
++	if (msg->errno >= 0)
++		return;
++
++	mutex_lock(&bc->pipe_lock);
++	im->msg_status = PNFS_BLOCK_FAILURE;
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++}
++
++int
++bl_upcall(bl_comm_t *bc, bl_comm_msg_t *upmsg, bl_comm_res_t **res)
++{
++	struct rpc_pipe_msg	msg;
++	DECLARE_WAITQUEUE(wq, current);
++	int			rval	= 1;
++	bl_comm_msg_t		*m	= &bc->msg;
++	
++	if (bc == NULL) {
++		dprintk("%s: No pNFS block daemon available\n", __func__);
++		return 1;
++	}
++	
++	mutex_lock(&bc->lock);
++	mutex_lock(&bc->pipe_lock);
++	
++	memcpy(m, upmsg, sizeof (*m));
++	
++	memset(&msg, 0, sizeof (msg));
++	msg.data = m;
++	msg.len = sizeof (*m);
++	
++	add_wait_queue(&bc->pipe_wq, &wq);
++	rval = rpc_queue_upcall(bc->pipe_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&bc->pipe_wq, &wq);
++		goto out;
++	}
++	
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&bc->pipe_lock);
++	schedule();
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(&bc->pipe_wq, &wq);
++	mutex_lock(&bc->pipe_lock);
++	
++	if (m->msg_status == PNFS_BLOCK_SUCCESS) {
++		*res = m->msg_res;
++		rval = 0;
++	} else
++		rval = 1;
++	
++out:
++	mutex_unlock(&bc->pipe_lock);
++	mutex_unlock(&bc->lock);
++	return rval;
++}
++
++static ssize_t ctl_write(struct file *file, const char __user *buf, size_t len,
++    loff_t *offset)
++{
++	int		cmd,
++			rc;
++	bl_comm_t	*bc	= bl_comm_global;
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof (int)))
++		return -EFAULT;
++	switch (cmd) {
++	case PNFS_BLOCK_CTL_STOP:
++		msg.msg_type = PNFS_UPCALL_MSG_STOP;
++		(void) bl_upcall(bc, &msg, &res);
++		kfree(res);
++		nfsd_bl_stop();
++		break;
++		
++	case PNFS_BLOCK_CTL_START:
++		rc = nfsd_bl_start();
++		if (rc != 0)
++			return rc;
++		break;
++		
++	case PNFS_BLOCK_CTL_VERS:
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bc, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++			    __func__);
++			return 0;
++		}
++		kfree(res);
++		break;
++		
++	default:
++		dprintk("%s: unknown ctl command %d\n", __func__, cmd);
++		break;
++	}
++	return len;
++}
++
++static struct file_operations ctl_ops = {
++	.write	= ctl_write,
++};
++
++/*
++ * bl_init_proc -- set up proc interfaces
++ *
++ * Creating a pnfs_block directory isn't really required at this point
++ * since we've only got a single node in that directory. If the need for
++ * more nodes doesn't present itself shortly this code should revert
++ * to a single top level node. McNeal 11-Aug-2008.
++ */
++int
++bl_init_proc(void)
++{
++	struct proc_dir_entry *e;
++
++	e = proc_mkdir("fs/pnfs_block", NULL);
++	if (!e)
++		return -ENOMEM;
++
++	e = create_proc_entry("fs/pnfs_block/ctl", 0, NULL);
++	if (!e)
++		return -ENOMEM;
++	e->proc_fops = &ctl_ops;
++
++	return 0;
++}
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-31 20:42:05.551222888 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-31 20:42:05.551222888 -0400
+@@ -0,0 +1,1672 @@
++/*
++ *  bl_ops.c
++ *  spNFS
++ *
++ *  Created by Rick McNeal on 4/1/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++
++/*
++ * Block layout operations.
++ *
++ * These functions, with the exception of pnfs_block_enabled, are assigned to
++ * the super block s_export_op structure.
++ */
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/genhd.h>
++#include <linux/fs.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd/export.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/spinlock_types.h>
++#include <linux/dm-ioctl.h>
++#include <asm/uaccess.h>
++#include <linux/falloc.h>
++#include <linux/nfsd4_block.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY	NFSDDBG_PNFS
++
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++
++#define BL_LAYOUT_HASH_BITS	4
++#define BL_LAYOUT_HASH_SIZE	(1 << BL_LAYOUT_HASH_BITS)
++#define BL_LAYOUT_HASH_MASK	(BL_LAYOUT_HASH_SIZE - 1)
++#define BL_LIST_REQ	(sizeof (struct dm_ioctl) + 256)
++
++#define bl_layout_hashval(id) \
++	((id) & BL_LAYOUT_HASH_MASK)
++
++#define BLL_F_END(p) ((p)->bll_foff + (p)->bll_len)
++#define BLL_S_END(p) ((p)->bll_soff + (p)->bll_len)
++#define _2SECTS(v) ((v) >> 9)
++
++#ifndef READ32
++#define READ32(x)	(x) = ntohl(*p++)
++#define READ64(x)	do {			\
++(x) = (u64)ntohl(*p++) << 32;	\
++(x) |= ntohl(*p++);		\
++} while (0)
++#endif
++
++
++typedef enum {True, False} boolean_t;
++/* ---- block layoutget and commit structure ---- */
++typedef struct bl_layout_rec {
++	struct list_head	blr_hash,
++				blr_layouts;
++	dev_t			blr_rdev;
++	struct inode		*blr_inode;
++	int			blr_recalled;	// debug
++	u64			blr_orig_size,
++				blr_commit_size,
++				blr_ext_size;
++	spinlock_t		blr_lock;	// Protects blr_layouts
++} bl_layout_rec_t;
++
++static struct list_head layout_hash;
++static struct list_head layout_hashtbl[BL_LAYOUT_HASH_SIZE];
++static spinlock_t layout_hashtbl_lock;
++
++/* ---- prototypes ---- */
++static boolean_t device_slice(dev_t devid);
++static boolean_t device_dm(dev_t devid);
++static boolean_t layout_inode_add(struct inode *i, bl_layout_rec_t **);
++static bl_layout_rec_t *layout_inode_find(struct inode *i);
++static void layout_inode_del(struct inode *i);
++static char *map_state2name(enum pnfs_block_extent_state4 s);
++static pnfs_blocklayout_devinfo_t *bld_alloc(struct list_head *volume, int type);
++static void bld_free(pnfs_blocklayout_devinfo_t *bld);
++static pnfs_blocklayout_devinfo_t *bld_simple(struct list_head *volumes,
++    dev_t devid, int local_index);
++static pnfs_blocklayout_devinfo_t *bld_slice(struct list_head *volumes,
++    dev_t devid, int my_loc, int idx);
++static int layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg);
++struct list_head *layout_cache_iter(bl_layout_rec_t *r,
++    struct list_head *bl_possible, struct nfsd4_layout_seg *seg);
++static void layout_cache_merge(bl_layout_rec_t *r, struct list_head *h);
++static int layout_cache_update(bl_layout_rec_t *r, struct list_head *h);
++static void layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg);
++static void print_bll(pnfs_blocklayout_layout_t *b, char *);
++static inline boolean_t layout_cache_fill_from_list(bl_layout_rec_t *r,
++    struct list_head *h, struct nfsd4_layout_seg *seg);
++static inline void bll_collapse(bl_layout_rec_t *r,
++    pnfs_blocklayout_layout_t *c);
++static pnfs_blocklayout_layout_t *bll_alloc(u64 offset, u64 len,
++    enum bl_cache_state state, struct list_head *h);
++static pnfs_blocklayout_layout_t *bll_alloc_dup(pnfs_blocklayout_layout_t *b,
++    enum bl_cache_state c, struct list_head *h);
++static inline boolean_t layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++    enum pnfs_block_extent_state4 *s);
++static void extents_setup(struct fiemap_extent_info *fei);
++static void extents_count(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_get(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_process(struct fiemap_extent_info *fei,
++    struct list_head *bl_candidates, struct nfsd4_layout_seg *, dev_t dev,
++    pnfs_blocklayout_layout_t *b);
++static void extents_cleanup(struct fiemap_extent_info *fei);
++
++void
++nfsd_bl_init(void)
++{
++	int	i;
++	dprintk("%s loaded\n", __func__);
++
++	spin_lock_init(&layout_hashtbl_lock);
++	INIT_LIST_HEAD(&layout_hash);
++	for (i = 0; i < BL_LAYOUT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&layout_hashtbl[i]);
++	bl_init_proc();
++}
++
++/*
++ * pnfs_block_enabled -- check to see if this file system should be export as
++ * block pnfs
++ */
++int
++pnfs_block_enabled(struct inode *inode, int ex_flags)
++{
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res	= NULL;
++	static int bl_comm_once	= 0;
++	
++	dprintk("--> %s\n", __func__);
++	/*
++	 * FIXME: Figure out method to determine if this file system should
++	 * be exported. The following areas need to be checked.
++	 * (1) Validate that this file system was exported as a pNFS
++	 *     block-layout
++	 * (2) Has there been successful communication with the
++	 *     volume daemon?
++	 */
++	/* Check #1 */
++#ifdef notyet
++	if (!(ex_flags & NFSEXP_PNFS_BLOCK)) {
++		dprintk("%s: pnfs_block not set in export\n", __func__);
++		return 0;
++	}
++#endif
++	
++	/* Check #1 */
++	if (!bl_comm_once) {
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bl_comm_global, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++				__func__);
++			return 0;
++		}
++		if (msg.u.msg_vers != res->u.vers) {
++			dprintk("%s: vers mismatch, kernel != daemon\n",
++				__func__);
++			kfree(res);
++			return 0;
++		}
++	}
++	bl_comm_once = 1;
++
++	kfree(res);
++	
++	dprintk("<-- %s okay\n", __func__);
++	return 1;
++}
++
++int
++bl_layout_type(struct super_block *sb)
++{
++	return LAYOUT_BLOCK_VOLUME;
++}
++
++int
++bl_getdeviceiter(struct super_block *sb,
++		 u32 layout_type,
++		 struct nfsd4_pnfs_dev_iter_res *res)
++{
++	res->gd_eof = 1;	
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_devid	= sb->s_dev;
++	res->gd_verf	= 1;
++	res->gd_cookie	= 1;
++	return 0;
++}
++
++static int
++bl_getdeviceinfo_slice(struct super_block *sb, struct exp_xdr_stream *xdr,
++		       const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld_slice_p,
++					*bld_simple_p,
++					*bld;
++	int				status		= -EIO,
++					location	= 0;
++	struct list_head		volumes;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++
++	bld_simple_p = bld_simple(&volumes, devid->devid,
++				  location++);
++	if (!bld_simple_p)
++		goto out;
++	bld_slice_p = bld_slice(&volumes, devid->devid, location++,
++	    bld_simple_p->bld_index_loc);
++
++	if (!bld_slice_p)
++		goto out;
++	
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		if (bld->bld_type == PNFS_BLOCK_VOLUME_SIMPLE)
++			kfree(bld->u.simple.bld_sig);
++		bld_free(bld);
++	}
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++static int
++bl_getdeviceinfo_dm(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld		= NULL;
++	int				status		= -EIO,	// default to error
++					i,
++					location	= 0;
++	struct list_head		volumes;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++	
++	msg.msg_type = PNFS_UPCALL_MSG_DMGET;
++	msg.u.msg_dev = devid->devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: upcall for DMGET failed\n", __func__);
++		goto out;
++	}
++		
++	/*
++	 * Don't use bld_alloc() here. If used this will be the first volume
++	 * type added to the list whereas the protocol requires it to be the
++	 * last.
++	 */
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		goto out;
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type			= PNFS_BLOCK_VOLUME_STRIPE;
++	bld->u.stripe.bld_stripes	= res->u.stripe.num_stripes;
++	bld->u.stripe.bld_chunk_size	= res->u.stripe.stripe_size * 512LL;
++	dprintk("%s: stripes %d, chunk_size %Lu\n", __func__,
++	    bld->u.stripe.bld_stripes, bld->u.stripe.bld_chunk_size / 512LL);
++	
++	bld->u.stripe.bld_stripe_indexs = kmalloc(bld->u.stripe.bld_stripes *
++						  sizeof (int), GFP_KERNEL);
++	if (!bld->u.stripe.bld_stripe_indexs)
++		goto out;
++
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++) {
++		dev_t			dev;
++		pnfs_blocklayout_devinfo_t	*bldp;
++		
++		dev = MKDEV(res->u.stripe.devs[i].major,
++			    res->u.stripe.devs[i].minor);
++		if (dev == 0)
++			goto out;
++		
++		bldp = bld_simple(&volumes, dev, location++);
++		if (!bldp) {
++			dprintk("%s: bld_simple failed\n", __func__);
++			goto out;
++		}
++		bldp = bld_slice(&volumes, dev, location++, bldp->bld_index_loc);
++
++		if (!bldp) {
++			dprintk("%s: bld_slice failed\n", __func__);
++			goto out;
++		}
++		bld->u.stripe.bld_stripe_indexs[i] = bldp->bld_index_loc;
++
++	}
++	list_add_tail(&bld->bld_list, &volumes);
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++	
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SLICE:
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				// No memory to release for these
++				break;
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				kfree(bld->u.simple.bld_sig);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				kfree(bld->u.stripe.bld_stripe_indexs);
++				break;
++		}
++		bld_free(bld);
++	}
++	kfree(res);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++/*
++ * bl_getdeviceinfo -- determine device tree for requested devid
++ */
++int
++bl_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		 u32 layout_type,
++		 const struct nfsd4_pnfs_deviceid *devid)
++{
++	if (device_slice(devid->devid) == True)
++		return bl_getdeviceinfo_slice(sb, xdr, devid);
++	else if (device_dm(devid->devid) == True)
++		return bl_getdeviceinfo_dm(sb, xdr, devid);
++	return -EINVAL;
++}
++
++enum nfsstat4
++bl_layoutget(struct inode *i, struct exp_xdr_stream *xdr,
++	     const struct nfsd4_pnfs_layoutget_arg *arg,
++	     struct nfsd4_pnfs_layoutget_res *res)
++{
++	pnfs_blocklayout_layout_t	*b;
++	bl_layout_rec_t			*r;
++	struct list_head		bl_possible,
++					*bl_candidates	= NULL;
++	boolean_t			del_on_error	= False;
++	int				adj;
++	enum nfsstat4			nfserr		= NFS4_OK;
++	
++	dprintk("--> %s (inode=[0x%x:%lu], offset=%Lu, len=%Lu, iomode=%d)\n",
++	    __func__, i->i_sb->s_dev, i->i_ino, _2SECTS(res->lg_seg.offset),
++	    _2SECTS(res->lg_seg.length), res->lg_seg.iomode);
++
++	if (res->lg_seg.length == 0) {
++		printk("%s: request length of 0, error condition\n", __func__);
++		return NFS4ERR_BADLAYOUT;
++	}
++	
++	/*
++	 * Adjust the length as required per spec.
++	 * - First case is were the length is set to (u64)-1. Cheap means to
++	 *   define the end of the file.
++	 * - Second case is were the I/O mode is read-only, but the request is
++	 *   past the end of the file so the request needs to be trimed.
++	 */
++	if ((res->lg_seg.length == NFS4_MAX_UINT64) ||
++	    (((res->lg_seg.offset + res->lg_seg.length) > i->i_size) &&
++	     (res->lg_seg.iomode == IOMODE_READ)))
++		res->lg_seg.length = i->i_size - res->lg_seg.offset;
++	
++	adj = (res->lg_seg.offset & 511) ? res->lg_seg.offset & 511 : 0;
++	res->lg_seg.offset -= adj;
++	res->lg_seg.length = (res->lg_seg.length + adj + 511) & ~511;
++	
++	if (res->lg_seg.iomode != IOMODE_READ)
++		if (i->i_op->fallocate(i, FALLOC_FL_KEEP_SIZE,
++				       res->lg_seg.offset, res->lg_seg.length))
++			return NFS4ERR_IO;
++		
++	INIT_LIST_HEAD(&bl_possible);
++	
++	if ((r = layout_inode_find(i)) == NULL) {
++		if (layout_inode_add(i, &r) == False) {
++			printk("%s: layout_inode_add failed\n", __func__);
++			return NFS4ERR_IO;
++		}
++		del_on_error = True;
++	}
++	BUG_ON(!r);
++	
++	spin_lock(&r->blr_lock);
++	
++	if (layout_cache_fill_from(r, &bl_possible, &res->lg_seg)) {
++		/*
++		 * This will send LAYOUTTRYAGAIN error to the client.
++		 */
++		dprintk("%s: layout_cache_fill_from() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	res->lg_return_on_close	= 1;
++	res->lg_seg.length	= 0;
++	
++	bl_candidates = layout_cache_iter(r, &bl_possible, &res->lg_seg);
++	if (!bl_candidates) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	layout_cache_merge(r, bl_candidates);
++	if (layout_cache_update(r, bl_candidates)) {
++		/* ---- Failed to allocate memory. ---- */
++		dprintk("%s: layout_cache_update() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	nfserr = blocklayout_encode_layout(xdr, bl_candidates);
++	if (nfserr)
++		dprintk("%s: layoutget xdr routine failed\n", __func__);
++	
++layoutget_cleanup:
++	if (bl_candidates) {
++		while (!list_empty(bl_candidates)) {
++			b = list_entry(bl_candidates->next,
++			    struct pnfs_blocklayout_layout, bll_list);
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++	}
++
++	spin_unlock(&r->blr_lock);
++	if (unlikely(nfserr)) {
++		if (del_on_error == True)
++			layout_inode_del(i);
++		res->lg_seg.length = 0;
++		res->lg_seg.offset = 0;
++	}
++	
++	dprintk("<-- %s (rval %u)\n", __func__, nfserr);
++	return nfserr;
++}
++
++/*
++ * bl_layoutcommit -- commit changes, especially size, to file systemj
++ *
++ * Currently this routine isn't called and everything is handled within
++ * nfsd4_layoutcommit(). By not calling this routine the server doesn't
++ * handle a partial return, a set of extents, of the layout. The extents
++ * are decoded here, but nothing is done with them. If this routine is
++ * be called the interface must change to pass the 'dentry' pointer such
++ * that notify_change() can be called.
++ */
++int
++bl_layoutcommit(struct inode *i,
++		const struct nfsd4_pnfs_layoutcommit_arg *args,
++		struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	bl_layout_rec_t			*r;
++	int				status	= 0;
++	u64				lw_plus;
++	
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	r = layout_inode_find(i);
++	if (r) {
++		lw_plus = args->lc_last_wr + 1;
++		if (args->lc_newoffset) {
++			dprintk("  lc_last_wr %Lu\n", lw_plus);
++			if (r->blr_orig_size < lw_plus) {
++				r->blr_orig_size	= lw_plus;
++				res->lc_size_chg	= 1;
++				res->lc_newsize		= lw_plus;
++			}
++		}
++
++		if (args->lc_up_len) {
++			int	extents,
++				i;
++			struct pnfs_blocklayout_layout *b;
++			__be32 *p = args->lc_up_layout;
++			
++			/*
++			 * Client is returning a set of extents which
++			 * should/could be used to update the file system.
++			 * See section 2.3.2 in draft-ietf-nfsv4-pnfs-block-08
++			 */
++			READ32(extents);
++			dprintk("  Client returning %d extents: data size %d\n",
++			    extents, args->lc_up_len);
++			b = kmalloc(sizeof (struct pnfs_blocklayout_layout) *
++				    extents, GFP_KERNEL);
++			if (b) {
++				for (i = 0; i < extents; i++) {
++					READ64(b[i].bll_vol_id.sbid);
++					READ64(b[i].bll_vol_id.devid);
++					READ64(b[i].bll_foff);
++					READ64(b[i].bll_len);
++					READ64(b[i].bll_soff);
++					READ32(b[i].bll_es);
++					dprintk("  %d: foff %Lu, len %Lu, soff %Lu "
++					    "state %s\n",
++					    i, _2SECTS(b[i].bll_foff),
++					    _2SECTS(b[i].bll_len),
++					    _2SECTS(b[i].bll_soff),
++					    map_state2name(b[i].bll_es));
++				}
++				kfree(b);
++			} else {
++				status = -ENOMEM;
++			}
++		}
++	} else
++		dprintk("%s: Unexpected commit to inode %p\n", __func__, i);
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutreturn(struct inode *i,
++		const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	int				status	= 0;
++	bl_layout_rec_t			*r;
++
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	
++	r = layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		layout_cache_del(r, &args->lr_seg);
++		spin_unlock(&r->blr_lock);
++		dprintk("    ext_size %Lu, i_size %Lu, orig_size %Lu\n",
++		    r->blr_ext_size, i->i_size, r->blr_orig_size);
++	}
++
++	layout_inode_del(i);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block		*sb;
++	struct nfsd4_pnfs_cb_layout	lr;
++	bl_layout_rec_t			*r;
++	pnfs_blocklayout_layout_t	*b;
++	u64				adj;
++	
++	dprintk("--> %s\n", __func__);
++	BUG_ON(!len);
++	switch (type) {
++		case RETURN_FILE:
++			sb = inode->i_sb;
++			dprintk("  recalling layout [0x%x:%lu], %Lu:%Lu\n",
++			    inode->i_sb->s_dev, inode->i_ino,
++				_2SECTS(offset), _2SECTS(len));
++			break;
++		case RETURN_FSID:
++			sb = inode->i_sb;
++			dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++				__func__);
++			return 0;
++		case RETURN_ALL:
++			/*
++			 * XXX figure out how to get a sb since there's no
++			 * inode ptr
++			 */
++			dprintk("%s: recalling all layouts (unimplemented)\n",
++				__func__);
++			return 0;
++		default:
++			return -EINVAL;
++	}
++	
++restart:
++	r = layout_inode_find(inode);
++	if (r && len && !r->blr_recalled) {
++		spin_lock(&r->blr_lock);
++		list_for_each_entry(b, &r->blr_layouts, bll_list) {
++			if (!r->blr_recalled && !b->bll_recalled &&
++			    (offset >= b->bll_foff) && (offset < BLL_F_END(b))) {
++				b->bll_recalled		= 1;
++				lr.cbl_recall_type	= type;
++				lr.cbl_seg.layout_type	= LAYOUT_BLOCK_VOLUME;
++				lr.cbl_seg.clientid	= 0;
++				lr.cbl_seg.offset	= 0;
++				lr.cbl_seg.length	= NFS4_MAX_UINT64;
++				r->blr_recalled		= 1;
++				dprintk("  FULL LAYOUTRECALL\n");
++				lr.cbl_seg.iomode = IOMODE_ANY;
++
++				/*
++				 * Currently there are only two cases where the
++				 * layout is being returned.
++				 *    (1) Someone is issuing a NFS_WRITE operation
++				 *        to this layout.
++				 *    (2) The file has been truncated which means
++				 *        the layout is immediately made invalid.
++				 * In both cases the client must write any
++				 * uncommitted modifications to the server via
++				 * NFS_WRITE.
++				 */
++				lr.cbl_layoutchanged = 1;
++
++				/*
++				 * Need to drop the lock because we'll get a
++				 * layoutreturn which will block waiting for
++				 * the lock. The request will come in on the
++				 * same thread which will cause a deadlock.
++				 */
++				spin_unlock(&r->blr_lock);
++				nfsd_layout_recall_cb(sb, inode, &lr);
++				adj = MIN(b->bll_len - (offset - b->bll_foff),
++				    len);
++				offset += adj;
++				len -= adj;
++				if (!len) {
++					spin_lock(&r->blr_lock);
++					break;
++				}
++				/*
++				 * Since layoutreturn will have been called we
++				 * can't assume blr_layouts is still valid,
++				 * so restart.
++				 */
++				goto restart;
++			}
++		}
++		spin_unlock(&r->blr_lock);
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++/*
++ * []------------------------------------------------------------------[]
++ * | Support functions from here on down.				|
++ * []------------------------------------------------------------------[]
++ */
++
++/*
++ * bld_simple -- given a dev_t build a simple volume structure
++ *
++ * Simple volume contains the device signature and offset to that data in
++ * the storage volume.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_simple(struct list_head *volumes, dev_t devid, int local_index)
++{
++	pnfs_blocklayout_devinfo_t	*bld	= NULL;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res	= NULL;
++	
++	msg.msg_type = PNFS_UPCALL_MSG_GETSIG;
++	msg.u.msg_dev = devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: Failed to get signature information\n", __func__);
++		goto error;
++	}
++	
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SIMPLE);
++	if (!bld)
++		return NULL;
++	
++	bld->u.simple.bld_offset = (res->u.sig.sector * 512LL) + res->u.sig.offset;
++	bld->u.simple.bld_sig_len = res->u.sig.len;
++	bld->u.simple.bld_sig = kmalloc(res->u.sig.len, GFP_KERNEL);
++	if (!bld->u.simple.bld_sig)
++		goto error;
++	
++	memcpy(bld->u.simple.bld_sig, res->u.sig.sig, res->u.sig.len);
++	kfree(res);
++	return bld;
++	
++error:
++	if (bld)
++		bld_free(bld);
++	if (res)
++		kfree(res);
++	dprintk("%s: error in bld_simple\n", __func__);
++	return NULL;
++}
++
++/*
++ * bld_slice -- given a dev_t build a slice volume structure
++ *
++ * A slice volume contains the length of the slice/partition and its offset
++ * from the beginning of the storage volume. There's also a reference to
++ * the "simple" volume which contains this slice.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_slice(struct list_head *volumes, dev_t devid, int my_loc, int simple_loc)
++{
++	pnfs_blocklayout_devinfo_t	*bld;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SLICE);
++	if (!bld)
++		return NULL;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_GETSLICE;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Upcall to get slice info failed\n");
++		bld_free(bld);
++		return NULL;
++	}
++	
++	bld->bld_devid.devid = devid;
++	bld->bld_index_loc	= my_loc;
++	bld->u.slice.bld_start	= res->u.slice.start * 512LL;
++	bld->u.slice.bld_len	= res->u.slice.length * 512LL;
++	bld->u.slice.bld_index	= simple_loc;
++
++	dprintk("%s: start %Lu, len %Lu\n", __func__,
++		bld->u.slice.bld_start / 512LL, bld->u.slice.bld_len / 512LL);
++
++	kfree(res);
++	dprintk("<-- %s (rval %p)\n", __func__, bld);
++	return bld;
++}
++
++static int
++layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	dprintk("--> %s\n", __func__);
++	
++	if (!list_empty(&r->blr_layouts))
++		if (layout_cache_fill_from_list(r, h, seg) == False)
++			return -EIO;
++	
++	/*
++	 * This deals with two conditions.
++	 *    (1) When blr_layouts is empty we need to create the first entry
++	 *    (2) When the range requested falls past the end of any current
++	 *        layout the residual must be taken care of.
++	 */	
++	if (seg->length) {
++		n = bll_alloc(seg->offset, seg->length, BLOCK_LAYOUT_NEW, h);
++		if (!n)
++			return -ENOMEM;
++		dprintk("  remaining at %Lu, len %Lu\n", _2SECTS(n->bll_foff),
++			_2SECTS(n->bll_len));
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++struct list_head *
++layout_cache_iter(bl_layout_rec_t *r, struct list_head *bl_possible,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n		= NULL;
++	struct list_head		*bl_candidates	= NULL;
++	struct fiemap_extent_info	fei;
++	struct inode			*i;
++	dev_t				dev;
++	
++	dev	= r->blr_rdev;
++	i	= r->blr_inode;
++	
++	dprintk("--> %s\n", __func__);
++	bl_candidates = kmalloc(sizeof (*bl_candidates), GFP_KERNEL);
++	if (!bl_candidates)
++		return NULL;
++	INIT_LIST_HEAD(bl_candidates);
++	extents_setup(&fei);
++	
++	list_for_each_entry(b, bl_possible, bll_list) {
++		if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			extents_count(&fei, i, b->bll_foff, b->bll_len);
++			if (fei.fi_extents_mapped) {
++				
++				/*
++				 * Common case here. Got a range which has
++				 * extents. Now get those extents and process
++				 * them into pNFS extents.
++				 */
++				if (extents_get(&fei, i, b->bll_foff,
++				    b->bll_len) == False)
++					goto cleanup;
++				if (extents_process(&fei, bl_candidates,
++				    seg, dev, b) == False)
++					goto cleanup;
++				extents_cleanup(&fei);
++				
++			} else if (seg->iomode == IOMODE_READ) {
++				
++				/*
++				 * Found a hole in a file while reading. No 
++				 * problem, just create a pNFS extent for the
++				 * range and let the client know there's no
++				 * backing store.
++				 */
++				n = bll_alloc(b->bll_foff, b->bll_len,
++				    BLOCK_LAYOUT_NEW, bl_candidates);
++				n->bll_es = PNFS_BLOCK_NONE_DATA;
++				n->bll_vol_id.sbid = 0;
++				n->bll_vol_id.devid = dev;
++				seg->length += b->bll_len;
++			} else {
++				
++				/*
++				 * There's a problem here. Since the iomode
++				 * is read/write fallocate should have allocated
++				 * any necessary storage for the given range.
++				 */
++				dprintk("    Extent count for RW is 0\n");
++				goto cleanup;
++			}
++			
++		} else {
++			n = bll_alloc_dup(b, b->bll_cache_state, bl_candidates);
++			seg->length += n->bll_len;
++		}
++
++		if (r->blr_ext_size < (b->bll_foff + b->bll_len))
++			r->blr_ext_size = b->bll_foff + b->bll_len;
++	}
++	
++	while (!list_empty(bl_possible)) {
++		b = list_entry(bl_possible->next,
++		    struct pnfs_blocklayout_layout, bll_list);
++		list_del(&b->bll_list);
++		kfree(b);
++	}
++		
++	b = list_first_entry(bl_candidates, struct pnfs_blocklayout_layout,
++	    bll_list);
++	seg->offset = b->bll_foff;
++	dprintk("<-- %s okay\n", __func__);
++	return bl_candidates;
++	
++cleanup:
++	extents_cleanup(&fei);
++	if (bl_candidates)
++		kfree(bl_candidates);
++	dprintk("<-- %s, error occurred\n", __func__);
++	return NULL;
++}
++
++/*
++ * layout_cache_merge -- collapse layouts which make up a contiguous range.
++ */
++static void
++layout_cache_merge(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*p;
++	
++	dprintk("--> %s\n", __func__);
++restart:
++	p = NULL;
++	list_for_each_entry(b, h, bll_list) {
++		if (p && (BLL_S_END(p) == b->bll_soff) &&
++		    (p->bll_es == b->bll_es) &&
++		    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++			/*
++			 * We've got a condidate.
++			 */
++#ifdef too_verbose
++			dprintk("  merge %Lu(f):%Lu(l):%Lu(s) into %Lu(f):%Lu(l):%Lu(s)\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff),
++				_2SECTS(p->bll_foff), _2SECTS(p->bll_len),
++				_2SECTS(b->bll_soff));
++#endif
++			
++			if (p->bll_cache_state == BLOCK_LAYOUT_CACHE)
++				p->bll_cache_state = BLOCK_LAYOUT_UPDATE;
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else if (p && (BLL_F_END(p) == b->bll_foff) &&
++			   (p->bll_es == b->bll_es) &&
++			   (b->bll_es == PNFS_BLOCK_NONE_DATA)) {
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else
++			p = b;
++	}
++	dprintk("<-- %s\n", __func__);
++}
++
++static int
++layout_cache_update(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*c,
++					*n;
++	boolean_t			status = 0;
++	
++	dprintk("--> %s\n", __func__);
++	if (list_empty(&r->blr_layouts)) {
++		/* ---- Just add entries and return ---- */
++		dprintk("  cache empty for inode 0x%x:%ld\n", r->blr_rdev,
++			r->blr_inode->i_ino);
++		list_for_each_entry(b, h, bll_list) {
++			c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE,
++					  &r->blr_layouts);
++			if (!c) {
++				status = -ENOMEM;
++				break;
++			}
++			dprintk("    adding %Lu(f):%Lu(l):%Lu(s):%d\n",
++				_2SECTS(c->bll_foff), _2SECTS(c->bll_len),
++				_2SECTS(c->bll_soff), c->bll_es);
++		}
++		return status;
++	}
++	
++	list_for_each_entry(b, h, bll_list) {
++		BUG_ON(!b->bll_vol_id.devid);
++		if (b->bll_cache_state == BLOCK_LAYOUT_UPDATE) {
++			boolean_t found = False;
++			list_for_each_entry(c, &r->blr_layouts, bll_list) {
++				if ((b->bll_soff >= c->bll_soff) &&
++				    (b->bll_soff < BLL_S_END(c)) &&
++				    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++					u64	u;
++					
++					if ((b->bll_foff < c->bll_foff) ||
++					    (b->bll_foff > BLL_F_END(c)))
++						BUG();
++					
++					u = BLL_S_END(b) - BLL_S_END(c);
++					/*
++					 * The updated cache entry has to be
++					 * different than the current.
++					 * Otherwise the cache state for 'b'
++					 * should be BLOCK_LAYOUT_CACHE.
++					 */
++					BUG_ON(BLL_S_END(b) < BLL_S_END(c));
++					
++					dprintk("  "
++						"updating %Lu(f):%Lu(l):%Lu(s) to len %Lu\n",
++						_2SECTS(c->bll_foff),
++						_2SECTS(c->bll_len),
++						_2SECTS(c->bll_soff),
++						_2SECTS(c->bll_len + u));
++					c->bll_len += u;
++					bll_collapse(r, c);
++					found = True;
++					break;
++				}
++			}
++
++			if (found == False) {
++				dprintk("  ERROR Expected to find"
++				    " %Lu(f):%Lu(l):%Lu(s), but didn't\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff));
++				list_for_each_entry(c, &r->blr_layouts, bll_list)
++					print_bll(c, "Cached");
++				BUG();
++			}
++		} else if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			c = list_first_entry(&r->blr_layouts,
++			    struct pnfs_blocklayout_layout, bll_list);
++			if (b->bll_foff < c->bll_foff) {
++				/*
++				 * Special case where new entry is before
++				 * first cached entry.
++				 */
++				c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE, NULL);
++				list_add(&c->bll_list, &r->blr_layouts);
++				dprintk("  new entry at head of list at %Lu, "
++					"len %Lu\n",
++					_2SECTS(c->bll_foff), _2SECTS(c->bll_len));
++			} else {
++				list_for_each_entry(c, &r->blr_layouts,
++				    bll_list) {
++					n = list_entry(c->bll_list.next,
++					    struct pnfs_blocklayout_layout,
++					    bll_list);
++					/*
++					 * This is ugly, but can't think of
++					 * another way to examine this case.
++					 * Consider the following. Need to
++					 * add an entry which starts at 40
++					 * and the cache has the following
++					 * entries:
++					 * Start    Length
++					 * 10       5
++					 * 30       5
++					 * 50       5
++					 * So, need to look and see if the new
++					 * entry starts after the current
++					 * cache, but before the next one.
++					 * There's a catch in that the next
++					 * entry might not be valid as it's
++					 * really just a pointer to the list
++					 * head.
++					 */
++					if (((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (c->bll_list.next == &r->blr_layouts)) ||
++					    ((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (b->bll_foff < n->bll_foff))) {
++						
++						n = bll_alloc_dup(b,
++								  BLOCK_LAYOUT_CACHE, NULL);
++						dprintk("  adding new %Lu:%Lu"
++							" after %Lu:%Lu\n",
++							_2SECTS(n->bll_foff),
++							_2SECTS(n->bll_len),
++							_2SECTS(c->bll_foff),
++							_2SECTS(c->bll_len));
++						list_add(&n->bll_list,
++							 &c->bll_list);
++						break;
++					}
++				}
++			}
++		}
++	}
++	dprintk("<-- %s\n", __func__);
++	return status;
++}
++
++static void
++layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg_in)
++{
++	struct pnfs_blocklayout_layout	*b,
++					*n;
++	u64				len;
++	struct nfsd4_layout_seg		seg = *seg_in;
++	
++	dprintk("--> %s\n", __func__);
++	if (seg.length == NFS4_MAX_UINT64) {
++		r->blr_recalled = 0;
++		dprintk("  Fast return of all layouts\n");
++		while (!list_empty(&r->blr_layouts)) {
++			b = list_entry(r->blr_layouts.next,
++				       struct pnfs_blocklayout_layout, bll_list);
++			dprintk("    foff %Lu, len %Lu, soff %Lu\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff));
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++		dprintk("<-- %s\n", __func__);
++		return;
++	}
++
++restart:
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg.offset == b->bll_foff) {
++			/*
++			 * This handle the following three cases:
++			 * (1) return layout matches entire cache layout
++			 * (2) return layout matches beginning portion of cache
++			 * (3) return layout matches entire cache layout and
++			 *     into next entry. Varies from #1 in end case.
++			 */
++			dprintk("  match on offsets, %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length));
++			len = MIN(seg.length, b->bll_len);
++			b->bll_foff	+= len;
++			b->bll_soff	+= len;
++			b->bll_len	-= len;
++			seg.length	-= len;
++			seg.offset	+= len;
++			if (!b->bll_len) {
++				list_del(&b->bll_list);
++				kfree(b);
++				dprintk("    removing cache line\n");
++				if (!seg.length) {
++					dprintk("    also finished\n");
++					goto complete;
++				}
++				/*
++				 * Since 'b' was freed we can't continue at the
++				 * next entry which is referenced as
++				 * b->bll_list.next by the list_for_each_entry
++				 * macro. Need to restart the loop.
++				 * TODO: Think about creating a dummy 'b' which
++				 *       would keep list_for_each_entry() happy.
++				 */
++				goto restart;
++			}
++			if (!seg.length) {
++				dprintk("    finished, but cache line not"
++					"empty\n");
++				goto complete;
++			}
++		} else if ((seg.offset >= b->bll_foff) &&
++		    (seg.offset < BLL_F_END(b))) {
++			/*
++			 * layout being returned is within this cache line.
++			 */
++			dprintk("  layout %Lu:%Lu within cache line %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length),
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			BUG_ON(!seg.length);
++			if ((seg.offset + seg.length) >= BLL_F_END(b)) {
++				/*
++				 * Layout returned starts in the middle of
++				 * cache entry and just need to trim back
++				 * cache to shorter length.
++				 */
++				dprintk("    trim back cache line\n");
++				len = seg.offset - b->bll_foff;
++				seg.offset += b->bll_len - len;
++				seg.length -= b->bll_len - len;
++				b->bll_len = len;
++				if (!seg.length)
++					return;
++			} else {
++				/*
++				 * Need to split current cache layout because
++				 * chunk is being removed from the middle.
++				 */
++				dprintk("    split cache line\n");
++				len = seg.offset + seg.length;
++				n = bll_alloc(len,
++					      (b->bll_foff + b->bll_len) - len,
++					      BLOCK_LAYOUT_CACHE, NULL);
++				n->bll_soff = b->bll_soff + len;
++				list_add(&n->bll_list, &b->bll_list);
++				b->bll_len = seg.offset - b->bll_foff;
++				return;
++			}
++		}
++	}
++complete:
++	if (list_empty(&r->blr_layouts))
++		r->blr_recalled = 0;
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++ * layout_cache_fill_from_list -- fills from cache list
++ *
++ * NOTE: This routine was only seperated out from layout_cache_file_from()
++ * to reduce the indentation level which makes the code easier to read.
++ */
++static inline boolean_t
++layout_cache_fill_from_list(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n;
++	enum pnfs_block_extent_state4	s;
++	
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg->offset < b->bll_foff) {
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, b->bll_foff - seg->offset),
++			    BLOCK_LAYOUT_NEW, NULL);
++			if (!n)
++				return False;
++			
++			list_add(&n->bll_list, h->prev);
++			dprintk("  new: %Lu:%Lu, added before %Lu:%Lu\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++		
++		if ((seg->offset >= b->bll_foff) &&
++		    (seg->offset < BLL_F_END(b))) {
++			if (layout_conflict(b, seg->iomode, &s) == False) {
++				dprintk("  CONFLICT FOUND: "
++				    "%Lu(f):%Lu(l):%Lu(s) state %d, iomode %d\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff), b->bll_es,
++				    seg->iomode);
++				return False;
++			}
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, BLL_F_END(b) - seg->offset),
++			    BLOCK_LAYOUT_CACHE, h);
++			dprintk("  CACHE hit: Found %Lu(f):%Lu(l): "
++			    "in %Lu(f):%Lu(l):%Lu(s):%d\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++			    _2SECTS(b->bll_soff), b->bll_es);
++			if (!n)
++				return False;
++			
++			n->bll_soff = b->bll_soff + seg->offset - b->bll_foff;
++			n->bll_vol_id.sbid = 0;
++			n->bll_vol_id.devid = b->bll_vol_id.devid;
++			n->bll_es = s;
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++	}
++	return True;
++}
++
++static u64
++bll_alloc_holey(struct list_head *bl_candidates, u64 offset, u64 length,
++    dev_t dev)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	n = bll_alloc(offset, length, BLOCK_LAYOUT_NEW, bl_candidates);
++	if (!n)
++		return 0;
++	n->bll_es = PNFS_BLOCK_NONE_DATA;
++	n->bll_vol_id.sbid = 0;
++	n->bll_vol_id.devid = dev;
++	
++	return n->bll_len;
++}
++
++static void
++extents_setup(struct fiemap_extent_info *fei)
++{
++	fei->fi_extents_start	= NULL;
++}
++
++/*
++ * extents_count -- Determine the number of extents for a given range.
++ *
++ * No need to call set_fs() here because the function
++ * doesn't use copy_to_user() if it's only counting
++ * the number of extents needed.
++ */
++static void
++extents_count(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	dprintk("    Need fiemap of %Ld:%Ld\n", _2SECTS(foff), _2SECTS(len));
++	fei->fi_flags		= FIEMAP_FLAG_SYNC;
++	fei->fi_extents_max	= 0;
++	fei->fi_extents_start	= NULL;
++	fei->fi_extents_mapped	= 0;
++	i->i_op->fiemap(i, fei, foff, len + (1 << i->i_sb->s_blocksize_bits) - 1);
++}
++
++/*
++ * extents_get -- Get list of extents for range
++ *
++ * extents_count() must have been called before this routine such that
++ * fi_extents_mapped is known.
++ */
++static boolean_t
++extents_get(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	int			m_space,
++				rval;
++	struct fiemap_extent	*fe;
++	mm_segment_t		old_fs = get_fs();
++	
++	/*
++	 * Now malloc the correct amount of space
++	 * needed. It's possible for the file to have changed
++	 * between calls which would require more space for
++	 * the extents. If that occurs the last extent will
++	 * not have FIEMAP_EXTENT_LAST set and the error will
++	 * be caught in extents_process().
++	 */
++	m_space = fei->fi_extents_mapped * sizeof (struct fiemap_extent);
++	fe = kmalloc(m_space, GFP_KERNEL);
++	if (!fe)
++		return False;
++	memset(fe, 0, m_space);
++	
++	fei->fi_extents_max	= fei->fi_extents_mapped;
++	fei->fi_extents_mapped	= 0;
++	fei->fi_extents_start	= fe;
++	
++	set_fs(KERNEL_DS);
++	rval = i->i_op->fiemap(i, fei, foff, len +
++	    (1 << i->i_sb->s_blocksize_bits) - 1);
++	set_fs(old_fs);
++	
++	if (rval || !fei->fi_extents_mapped) {
++		dprintk("    No extents. Wanted %d, got %d\n",
++			fei->fi_extents_max, fei->fi_extents_mapped);
++		kfree(fe);
++		fei->fi_extents_start = NULL;
++		return False;
++	} else
++		return True;
++}
++
++/*
++ * extents_process -- runs through the extent returned from the file system and
++ *	 creates block layout entries.
++ */
++static boolean_t
++extents_process(struct fiemap_extent_info *fei, struct list_head *bl_candidates,
++    struct nfsd4_layout_seg *seg, dev_t dev, pnfs_blocklayout_layout_t *b)
++{
++	struct fiemap_extent		*fep,
++					*fep_last	= NULL;
++	int				i;
++	pnfs_blocklayout_layout_t	*n;
++	u64				last_end,
++					rval;
++	
++	dprintk("--> %s\n", __func__);
++	for (fep = fei->fi_extents_start, i = 0; i < fei->fi_extents_mapped;
++	    i++, fep++) {
++		
++		BUG_ON(!fep->fe_physical);
++		/*
++		 * Deal with corner cases of hoel-y files.
++		 */
++		if (fep_last && ((fep_last->fe_logical + fep_last->fe_length) !=
++				 fep->fe_logical)) {
++			
++			/*
++			 * If the last extent doesn't end logically
++			 * at the beginning of the current we've got
++			 * hole and need to create a pNFS extent.
++			 */
++			dprintk("    Got a hole at %Ld:%Ld \n", 
++			    _2SECTS(fep_last->fe_logical),
++			    _2SECTS(fep_last->fe_length));
++			last_end = fep_last->fe_logical + fep_last->fe_length;
++			rval = bll_alloc_holey(bl_candidates, last_end,
++			    fep->fe_logical - last_end, dev);
++			if (!rval)
++				return False;
++			seg->length += rval;
++		}
++		
++		n = bll_alloc(fep->fe_logical, fep->fe_length,
++		    BLOCK_LAYOUT_NEW, bl_candidates);
++		if (unlikely(n == NULL)) {
++			dprintk("%s: bll_alloc failed\n", __func__);
++			return False;
++		}
++		
++		n->bll_soff = fep->fe_physical;
++		n->bll_es = seg->iomode == IOMODE_READ ?
++		    PNFS_BLOCK_READ_DATA : PNFS_BLOCK_READWRITE_DATA;
++		n->bll_vol_id.sbid = 0;
++		n->bll_vol_id.devid = dev;
++		seg->length += fep->fe_length;
++		print_bll(n, "New extent");
++		fep_last = fep;
++	}
++	dprintk("<-- %s (i=%d)\n", __func__, i);
++	
++	return True;
++}
++
++static void
++extents_cleanup(struct fiemap_extent_info *fei)
++{
++	if (fei->fi_extents_start) {
++		kfree(fei->fi_extents_start);
++		fei->fi_extents_start = NULL;
++	}
++}
++
++/*
++ * device_slice -- check to see if device is a slice or DM
++ */
++static boolean_t
++device_slice(dev_t devid)
++{
++	struct block_device	*bd	= open_by_devnum(devid, FMODE_READ);
++	boolean_t		rval	= False;
++	
++	if (bd) {
++		if (bd->bd_disk->minors > 1)
++			rval = True;
++		blkdev_put(bd, FMODE_READ);
++	}
++	return rval;
++}
++
++/*
++ * device_dm -- check to see if device is a Device Mapper volume.
++ *
++ * Returns 1 for DM or 0 if not
++ */
++static boolean_t
++device_dm(dev_t devid)
++{
++	boolean_t		rval = False;
++	bl_comm_msg_t		msg;
++	bl_comm_res_t		*res;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_DMCHK;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Failed upcall to check on DM status\n");
++	} else if (res->u.dm_vol) {
++		rval = True;
++		dprintk("Device is DM volume\n");
++	} else
++		dprintk("Device is not DM volume\n");
++	kfree(res);
++	
++	return rval;
++}
++
++static boolean_t
++layout_inode_add(struct inode *i, bl_layout_rec_t **p)
++{
++	bl_layout_rec_t		*r	= NULL;
++
++	if (!i->i_op->fiemap || !i->i_op->fallocate) {
++		printk("pNFS: file system doesn't support required fiemap or"
++		    "fallocate methods\n");
++		return False;
++	}
++	
++	r = kmalloc(sizeof (*r), GFP_KERNEL);
++	if (!r)
++		goto error;
++
++	r->blr_rdev	= i->i_sb->s_dev;
++	r->blr_inode	= i;
++	r->blr_orig_size = i->i_size;
++	r->blr_ext_size	= 0;
++	r->blr_recalled	= 0;
++	INIT_LIST_HEAD(&r->blr_layouts);
++	spin_lock_init(&r->blr_lock);
++	spin_lock(&layout_hashtbl_lock);
++	list_add_tail(&r->blr_hash, &layout_hash);
++	spin_unlock(&layout_hashtbl_lock);
++	*p = r;
++	return True;
++	
++error:
++	if (r)
++		kfree(r);
++	return False;
++}
++
++static bl_layout_rec_t *
++__layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	if (!list_empty(&layout_hash)) {
++		list_for_each_entry(r, &layout_hash, blr_hash) {
++			if ((r->blr_inode->i_ino == i->i_ino) &&
++			    (r->blr_rdev == i->i_sb->s_dev)) {
++				return r;
++			}
++		}
++	}
++	return NULL;
++}
++
++static bl_layout_rec_t *
++layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	spin_unlock(&layout_hashtbl_lock);
++	
++	return r;
++}
++
++static void
++layout_inode_del(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		if (list_empty(&r->blr_layouts)) {
++			list_del(&r->blr_hash);
++			spin_unlock(&r->blr_lock);
++			kfree(r);
++		} else {
++			spin_unlock(&r->blr_lock);
++		}
++	} else {
++		dprintk("%s: failed to find inode [0x%x:%lu] in table for delete\n",
++			__func__, i->i_sb->s_dev, i->i_ino);
++	}
++	spin_unlock(&layout_hashtbl_lock);
++}
++
++/*
++ * map_state2name -- converts state in ascii string.
++ *
++ * Used for debug messages only.
++ */
++static char *
++map_state2name(enum pnfs_block_extent_state4 s)
++{
++	switch (s) {
++	case PNFS_BLOCK_READWRITE_DATA:	return "     RW";
++	case PNFS_BLOCK_READ_DATA:	return "     RO";
++	case PNFS_BLOCK_INVALID_DATA:	return "INVALID";
++	case PNFS_BLOCK_NONE_DATA:	return "   NONE";
++	default:
++		BUG();
++	}
++}
++
++static pnfs_blocklayout_devinfo_t *
++bld_alloc(struct list_head *volumes, int type)
++{
++	pnfs_blocklayout_devinfo_t *bld;
++	
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		return NULL;
++
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type = type;
++	list_add_tail(&bld->bld_list, volumes);
++
++	return bld;
++}
++
++static void
++bld_free(pnfs_blocklayout_devinfo_t *bld)
++{
++	list_del(&bld->bld_list);
++	kfree(bld);
++}
++
++static void
++print_bll(pnfs_blocklayout_layout_t *b, char *text)
++{
++	dprintk("    BLL: %s\n", text);
++	dprintk("    foff %Lu, soff %Lu, len %Lu, state %s\n",
++	    _2SECTS(b->bll_foff), _2SECTS(b->bll_soff), _2SECTS(b->bll_len),
++	    map_state2name(b->bll_es));
++}
++
++static inline void
++bll_collapse(bl_layout_rec_t *r, pnfs_blocklayout_layout_t *c)
++{
++	pnfs_blocklayout_layout_t	*n;
++	int				dbg_count	= 0;
++	u64				endpoint;
++	
++	BUG_ON(c->bll_es == PNFS_BLOCK_NONE_DATA);
++	while (c->bll_list.next != &r->blr_layouts) {
++		n = list_entry(c->bll_list.next,
++			       struct pnfs_blocklayout_layout, bll_list);
++		endpoint = BLL_S_END(c);
++		if ((n->bll_soff >= c->bll_soff) &&
++		    (n->bll_soff < endpoint)) {
++			if (endpoint < BLL_S_END(n)) {
++				/*
++				 * The following is possible.
++				 *
++				 * 
++				 * Existing: +---+                 +---+
++				 *      New: +-----------------------+
++				 * The client request merge entries together
++				 * but didn't require picking up all of the
++				 * last entry. So, we still need to delete
++				 * the last entry and add the remaining space
++				 * to the new entry.
++				 */
++				c->bll_len += BLL_S_END(n) - endpoint;
++			}
++			dbg_count++;
++			list_del(&n->bll_list);
++			kfree(n);
++		} else {
++			break;
++		}
++	}
++	/* ---- Debug only, remove before integration ---- */
++	if (dbg_count)
++		dprintk("  Collapsed %d cache entries between %Lu(s) and %Lu(s)\n",
++			dbg_count, _2SECTS(c->bll_soff), _2SECTS(BLL_S_END(c)));
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc(u64 offset, u64 len, enum bl_cache_state state, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = kmalloc(sizeof (*n), GFP_KERNEL);
++	if (n) {
++		memset(n, 0, sizeof (*n));
++		n->bll_foff		= offset;
++		n->bll_len		= len;
++		n->bll_cache_state	= state;
++		if (h)
++			list_add_tail(&n->bll_list, h);
++	}
++	return n;
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc_dup(pnfs_blocklayout_layout_t *b, enum bl_cache_state c,
++	      struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = bll_alloc(b->bll_foff, b->bll_len, c, h);
++	if (n) {
++		n->bll_es			= b->bll_es;
++		n->bll_soff			= b->bll_soff;
++		n->bll_vol_id.devid		= b->bll_vol_id.devid;
++	}
++	return n;
++}
++
++static inline boolean_t
++layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++		enum pnfs_block_extent_state4 *s)
++{
++	/* ---- Normal case ---- */
++	*s = b->bll_es;
++	
++	switch (b->bll_es) {
++	case PNFS_BLOCK_READWRITE_DATA:
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_READ_DATA;
++		/* ---- Any use is permitted. ---- */
++		break;
++	case PNFS_BLOCK_READ_DATA:
++		/* ---- Committed as read only data. ---- */
++		if (iomode == IOMODE_RW)
++			return False;
++		break;
++	case PNFS_BLOCK_INVALID_DATA:
++		/* ---- Blocks have been allocated, but not initialized ---- */
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_NONE_DATA;
++		break;
++	case PNFS_BLOCK_NONE_DATA:
++		/* ---- Hole-y file. No backing store avail. ---- */
++		if (iomode != IOMODE_READ)
++			return False;
++		break;
++	default:
++		BUG();
++	}
++	return True;
++}
++
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-31 20:41:19.144140225 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-31 20:42:05.512106042 -0400
+@@ -104,7 +104,8 @@ again:
+ 			continue;
+ 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 			continue;
+-		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
++		if (memcmp(state->stateid.u.data, stateid->u.data,
++			   sizeof(state->stateid.u.data)) != 0)
+ 			continue;
+ 		get_nfs_open_context(ctx);
+ 		spin_unlock(&inode->i_lock);
+@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct
+ 	if (delegation != NULL) {
+ 		spin_lock(&delegation->lock);
+ 		if (delegation->inode != NULL) {
+-			memcpy(delegation->stateid.data, res->delegation.data,
+-			       sizeof(delegation->stateid.data));
++			memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			       sizeof(delegation->stateid.u.data));
+ 			delegation->type = res->delegation_type;
+ 			delegation->maxsize = res->maxsize;
+ 			oldcred = delegation->cred;
+@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach
+ 	if (delegation == NULL)
+ 		goto nomatch;
+ 	spin_lock(&delegation->lock);
+-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+-				sizeof(delegation->stateid.data)) != 0)
++	if (stateid != NULL && memcmp(delegation->stateid.u.data,
++				      stateid->u.data,
++				      sizeof(delegation->stateid.u.data)) != 0)
+ 		goto nomatch_unlock;
+ 	list_del_rcu(&delegation->super_list);
+ 	delegation->inode = NULL;
+@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inod
+ 	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+-	memcpy(delegation->stateid.data, res->delegation.data,
+-			sizeof(delegation->stateid.data));
++	memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			sizeof(delegation->stateid.u.data));
+ 	delegation->type = res->delegation_type;
+ 	delegation->maxsize = res->maxsize;
+ 	delegation->change_attr = nfsi->change_attr;
+@@ -471,9 +473,7 @@ void nfs_expire_unreferenced_delegations
+ /*
+  * Asynchronous delegation recall!
+  */
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid))
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+ {
+ 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ 	struct nfs_delegation *delegation;
+@@ -481,7 +481,7 @@ int nfs_async_inode_return_delegation(st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(NFS_I(inode)->delegation);
+ 
+-	if (!validate_stateid(delegation, stateid)) {
++	if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+ 		rcu_read_unlock();
+ 		return -ENOENT;
+ 	}
+@@ -562,7 +562,8 @@ int nfs4_copy_delegation_stateid(nfs4_st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(nfsi->delegation);
+ 	if (delegation != NULL) {
+-		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
++		memcpy(dst->u.data, delegation->stateid.u.data,
++		       sizeof(dst->u.data));
+ 		ret = 1;
+ 	}
+ 	rcu_read_unlock();
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
+--- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-31 20:42:05.513114811 -0400
+@@ -34,9 +34,7 @@ enum {
+ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ int nfs_inode_return_delegation(struct inode *inode);
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid));
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+ 
+ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-31 20:41:19.196140434 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-31 20:42:05.553222784 -0400
+@@ -17,11 +17,19 @@
+ #include <linux/module.h>
+ #include <linux/exportfs.h>
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++#if defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
++#endif
+ #include <linux/nfsd/syscall.h>
+ #include <net/ipv6.h>
+ 
+ #include "nfsd.h"
+ #include "nfsfh.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
+ 
+@@ -352,6 +360,40 @@ static int svc_export_upcall(struct cach
+ 	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static struct pnfsd_cb_operations pnfsd_cb_op = {
++	.cb_layout_recall = nfsd_layout_recall_cb,
++	.cb_device_notify = nfsd_device_notify_cb,
++
++	.cb_get_state = nfs4_pnfs_cb_get_state,
++	.cb_change_state = nfs4_pnfs_cb_change_state,
++};
++
++#if defined(CONFIG_SPNFS)
++static struct pnfs_export_operations spnfs_export_ops = {
++	.layout_type = spnfs_layout_type,
++	.get_device_info = spnfs_getdeviceinfo,
++	.get_device_iter = spnfs_getdeviceiter,
++	.layout_get = spnfs_layoutget,
++	.layout_return = spnfs_layoutreturn,
++};
++
++static struct pnfs_export_operations spnfs_ds_export_ops = {
++	.get_state = spnfs_get_state,
++};
++
++#if defined(CONFIG_SPNFS_BLOCK)
++static struct pnfs_export_operations bl_export_ops = {
++	.layout_type = bl_layout_type,
++	.get_device_info = bl_getdeviceinfo,
++	.get_device_iter = bl_getdeviceiter,
++	.layout_get = bl_layoutget,
++	.layout_return = bl_layoutreturn,
++};
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_SPNFS */
++#endif /* CONFIG_PNFSD */
++
+ static struct svc_export *svc_export_update(struct svc_export *new,
+ 					    struct svc_export *old);
+ static struct svc_export *svc_export_lookup(struct svc_export *);
+@@ -395,6 +437,47 @@ static int check_export(struct inode *in
+ 		return -EINVAL;
+ 	}
+ 
++#if !defined(CONFIG_SPNFS)
++	if (inode->i_sb->s_pnfs_op &&
++	    (!inode->i_sb->s_pnfs_op->layout_type ||
++	     !inode->i_sb->s_pnfs_op->get_device_info ||
++	     !inode->i_sb->s_pnfs_op->layout_get)) {
++		dprintk("exp_export: export of invalid fs pnfs export ops.\n");
++		return -EINVAL;
++	}
++#endif /* CONFIG_SPNFS */
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	if (!inode->i_sb->s_pnfs_op)
++		pnfsd_lexp_init(inode);
++	return 0;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(inode, *flags)) {
++		dprintk("set pnfs block export structure... \n");
++		inode->i_sb->s_pnfs_op = &bl_export_ops;
++	} else
++#endif /* CONFIG_SPNFS_BLOCK */
++	/*
++	 * spnfs_enabled() indicates we're an MDS.
++	 * XXX Better to check an export time option as well.
++	 */
++	if (spnfs_enabled()) {
++		dprintk("set spnfs export structure...\n");
++		inode->i_sb->s_pnfs_op = &spnfs_export_ops;
++	} else {
++		dprintk("%s spnfs not in use\n", __func__);
++
++		/*
++		 * get_state is needed if we're a DS using spnfs.
++		 * XXX Better to check an export time option instead.
++		 */
++		inode->i_sb->s_pnfs_op = &spnfs_ds_export_ops;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	return 0;
+ 
+ }
+@@ -586,6 +669,8 @@ static int svc_export_parse(struct cache
+ 					if (exp.ex_uuid == NULL)
+ 						err = -ENOMEM;
+ 				}
++			} else if (strcmp(buf, "pnfs") == 0) {
++				exp.ex_pnfs = 1;
+ 			} else if (strcmp(buf, "secinfo") == 0)
+ 				err = secinfo_parse(&mesg, buf, &exp);
+ 			else
+@@ -660,6 +745,8 @@ static int svc_export_show(struct seq_fi
+ 				seq_printf(m, "%02x", exp->ex_uuid[i]);
+ 			}
+ 		}
++		if (exp->ex_pnfs)
++			seq_puts(m, ",pnfs");
+ 		show_secinfo(m, exp);
+ 	}
+ 	seq_puts(m, ")\n");
+@@ -687,6 +774,7 @@ static void svc_export_init(struct cache
+ 	new->ex_fslocs.locations = NULL;
+ 	new->ex_fslocs.locations_count = 0;
+ 	new->ex_fslocs.migrated = 0;
++	new->ex_pnfs = 0;
+ }
+ 
+ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+@@ -699,6 +787,7 @@ static void export_update(struct cache_h
+ 	new->ex_anon_uid = item->ex_anon_uid;
+ 	new->ex_anon_gid = item->ex_anon_gid;
+ 	new->ex_fsid = item->ex_fsid;
++	new->ex_pnfs = item->ex_pnfs;
+ 	new->ex_uuid = item->ex_uuid;
+ 	item->ex_uuid = NULL;
+ 	new->ex_pathname = item->ex_pathname;
+@@ -1635,8 +1724,17 @@ nfsd_export_init(void)
+ 	if (rv)
+ 		return rv;
+ 	rv = cache_register(&svc_expkey_cache);
+-	if (rv)
++	if (rv) {
+ 		cache_unregister(&svc_export_cache);
++		goto out;
++	}
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = THIS_MODULE;
++	pnfsd_cb_ctl.cb_op = &pnfsd_cb_op;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
++out:
+ 	return rv;
+ 
+ }
+@@ -1664,6 +1762,12 @@ nfsd_export_shutdown(void)
+ 
+ 	exp_writelock();
+ 
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = NULL;
++	pnfsd_cb_ctl.cb_op = NULL;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
+ 	cache_unregister(&svc_expkey_cache);
+ 	cache_unregister(&svc_export_cache);
+ 	svcauth_unix_purge();
+diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
+--- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-31 20:42:05.514196343 -0400
+@@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
+ 	.rpc_release = nfs_direct_read_release,
+ };
+ 
++static long nfs_direct_read_execute(struct nfs_read_data *data,
++				    struct rpc_task_setup *task_setup_data,
++				    struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	nfs_fattr_init(&data->fattr);
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	NFS_PROTO(inode)->read_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct read call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
+  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
+@@ -283,7 +315,6 @@ static ssize_t nfs_direct_read_schedule_
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+ 	size_t rsize = NFS_SERVER(inode)->rsize;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -343,26 +374,9 @@ static ssize_t nfs_direct_read_schedule_
+ 		data->res.fattr = &data->fattr;
+ 		data->res.eof = 0;
+ 		data->res.count = bytes;
+-		nfs_fattr_init(&data->fattr);
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct read call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_read_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+@@ -448,12 +462,15 @@ static void nfs_direct_free_writedata(st
+ }
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg);
++
+ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+ {
+ 	struct inode *inode = dreq->inode;
+ 	struct list_head *p;
+ 	struct nfs_write_data *data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = dreq->ctx->cred,
+ 	};
+@@ -487,25 +504,7 @@ static void nfs_direct_write_reschedule(
+ 		 * Reuse data->task; data->args should not have changed
+ 		 * since the original request was sent.
+ 		 */
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		/*
+-		 * We're called via an RPC callback, so BKL is already held.
+-		 */
+-		task = rpc_run_task(&task_setup_data);
+-		if (!IS_ERR(task))
+-			rpc_put_task(task);
+-
+-		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				data->args.count,
+-				(unsigned long long)data->args.offset);
++		nfs_direct_write_execute(data, &task_setup_data, &msg);
+ 	}
+ 
+ 	if (put_dreq(dreq))
+@@ -548,10 +547,31 @@ static const struct rpc_call_ops nfs_com
+ 	.rpc_release = nfs_direct_commit_release,
+ };
+ 
++static long nfs_direct_commit_execute(struct nfs_direct_req *dreq,
++				      struct nfs_write_data *data,
++				      struct rpc_task_setup *task_setup_data,
++				      struct rpc_message *msg)
++{
++	struct rpc_task *task;
++
++	NFS_PROTO(data->inode)->commit_setup(data, msg);
++
++	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
++	dreq->commit_data = NULL;
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++	return 0;
++}
++
+ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+ {
+ 	struct nfs_write_data *data = dreq->commit_data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+@@ -579,16 +599,7 @@ static void nfs_direct_commit_schedule(s
+ 	data->res.verf = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	NFS_PROTO(data->inode)->commit_setup(data, &msg);
+-
+-	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+-	dreq->commit_data = NULL;
+-
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (!IS_ERR(task))
+-		rpc_put_task(task);
++	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
+ }
+ 
+ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+@@ -690,6 +701,36 @@ static const struct rpc_call_ops nfs_wri
+ 	.rpc_release = nfs_direct_write_release,
+ };
+ 
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++	NFS_PROTO(inode)->write_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+@@ -705,7 +746,6 @@ static ssize_t nfs_direct_write_schedule
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -771,24 +811,8 @@ static ssize_t nfs_direct_write_schedule
+ 		data->res.verf = &data->verf;
+ 		nfs_fattr_init(&data->fattr);
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct write call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_write_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
+--- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-31 20:42:05.549222922 -0400
+@@ -79,3 +79,52 @@ config NFSD_V4
+ 	  available from http://linux-nfs.org/.
+ 
+ 	  If unsure, say N.
++
++config PNFSD
++	bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
++	depends on NFSD_V4 && EXPERIMENTAL
++	select EXPORTFS_FILE_LAYOUT
++	help
++	  This option enables support for the parallel NFS features of the
++	  minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
++	  in the kernel's NFS server.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFSD_LOCAL_EXPORT
++	bool "Enable pNFS support for exporting local filesystems for debugging purposes"
++	depends on PNFSD
++	help
++	  Say Y here if you want your pNFS server to export local file systems
++	  over the files layout type.  With this option the MDS (metadata
++	  server) functions also as a single DS (data server).  This is mostly
++	  useful for development and debugging purposes.
++
++	  If unsure, say N.
++
++config SPNFS
++	bool "Provide spNFS server support (EXPERIMENTAL)"
++	depends on PNFSD
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS server support.
++
++	  If unsure, say N.
++
++config SPNFS_LAYOUTSEGMENTS
++	bool "Allow spNFS to return partial file layouts (EXPERIMENTAL)"
++	depends on SPNFS
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS to be able to return layout segments.
++
++	  If unsure, say N.
++
++config SPNFS_BLOCK
++	bool "Provide Block Layout server support (EXPERIMENTAL)"
++	depends on SPNFS
++	select EXPORTFS_BLOCK_LAYOUT
++	help
++	  Say Y here if you want spNFS block layout support
++
++	  If unsure, say N.
+diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
+--- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-31 20:42:05.549222922 -0400
+@@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ 			   nfs4acl.o nfs4callback.o nfs4recover.o
++nfsd-$(CONFIG_PNFSD)	+= nfs4pnfsd.o nfs4pnfsdlm.o nfs4pnfsds.o
++nfsd-$(CONFIG_PNFSD_LOCAL_EXPORT) += pnfsd_lexp.o
++nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
++nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-31 20:41:19.197150385 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-31 20:42:05.554114789 -0400
+@@ -40,7 +40,6 @@
+ 
+ #define NFSPROC4_CB_NULL 0
+ #define NFSPROC4_CB_COMPOUND 1
+-#define NFS4_STATEID_SIZE 16
+ 
+ /* Index of predefined Linux callback client operations */
+ 
+@@ -48,11 +47,17 @@ enum {
+ 	NFSPROC4_CLNT_CB_NULL = 0,
+ 	NFSPROC4_CLNT_CB_RECALL,
+ 	NFSPROC4_CLNT_CB_SEQUENCE,
++#if defined(CONFIG_PNFSD)
++	NFSPROC4_CLNT_CB_LAYOUT,
++	NFSPROC4_CLNT_CB_DEVICE,
++#endif
+ };
+ 
+ enum nfs_cb_opnum4 {
+ 	OP_CB_RECALL            = 4,
++	OP_CB_LAYOUT            = 5,
+ 	OP_CB_SEQUENCE          = 11,
++	OP_CB_DEVICE            = 14,
+ };
+ 
+ #define NFS4_MAXTAGLEN		20
+@@ -78,6 +83,19 @@ enum nfs_cb_opnum4 {
+ #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
++#define NFS4_enc_cb_layout_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 3 +                         \
++					enc_nfs4_fh_sz + 4)
++#define NFS4_dec_cb_layout_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
++#define NFS4_enc_cb_device_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 6)
++#define NFS4_dec_cb_device_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
+ 
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+@@ -94,6 +112,10 @@ xdr_writemem(__be32 *p, const void *ptr,
+ }
+ 
+ #define WRITE32(n)               *p++ = htonl(n)
++#define WRITE64(n)               do {				\
++	*p++ = htonl((u32)((n) >> 32));				\
++	*p++ = htonl((u32)(n));					\
++} while (0)
+ #define WRITEMEM(ptr,nbytes)     do {                           \
+ 	p = xdr_writemem(p, ptr, nbytes);                       \
+ } while (0)
+@@ -204,6 +226,16 @@ nfs_cb_stat_to_errno(int stat)
+  */
+ 
+ static void
++encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
++{
++	__be32 *p;
++
++	RESERVE_SPACE(sizeof(stateid_t));
++	WRITE32(sid->si_generation);
++	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
++}
++
++static void
+ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+ {
+ 	__be32 * p;
+@@ -228,10 +260,10 @@ encode_cb_recall(struct xdr_stream *xdr,
+ 	__be32 *p;
+ 	int len = dp->dl_fh.fh_size;
+ 
+-	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
++	RESERVE_SPACE(4);
+ 	WRITE32(OP_CB_RECALL);
+-	WRITE32(dp->dl_stateid.si_generation);
+-	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
++	encode_stateid(xdr, &dp->dl_stateid);
++	RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
+ 	WRITE32(0); /* truncate optimization not implemented */
+ 	WRITE32(len);
+ 	WRITEMEM(&dp->dl_fh.fh_base, len);
+@@ -259,6 +291,111 @@ encode_cb_sequence(struct xdr_stream *xd
+ 	hdr->nops++;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++#include "pnfsd.h"
++
++static void
++encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(20);
++	WRITE32(OP_CB_LAYOUT);
++	WRITE32(clr->cb.cbl_seg.layout_type);
++	WRITE32(clr->cb.cbl_seg.iomode);
++	WRITE32(clr->cb.cbl_layoutchanged);
++	WRITE32(clr->cb.cbl_recall_type);
++	if (unlikely(clr->cb.cbl_recall_type == RETURN_FSID)) {
++		struct nfs4_fsid fsid = clr->cb.cbl_fsid;
++
++		RESERVE_SPACE(16);
++		WRITE64(fsid.major);
++		WRITE64(fsid.minor);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"fsid 0x%llx-0x%llx\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type, fsid.major, fsid.minor);
++	} else if (clr->cb.cbl_recall_type == RETURN_FILE) {
++		int len = clr->clr_file->fi_fhlen;
++		stateid_t *cbl_sid = (stateid_t *)&clr->cb.cbl_sid;
++
++		RESERVE_SPACE(20 + len);
++		WRITE32(len);
++		WRITEMEM(clr->clr_file->fi_fhval, len);
++		WRITE64(clr->cb.cbl_seg.offset);
++		WRITE64(clr->cb.cbl_seg.length);
++		encode_stateid(xdr, cbl_sid);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"offset %lld length %lld stateid " STATEID_FMT "\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type,
++			clr->cb.cbl_seg.offset, clr->cb.cbl_seg.length,
++			STATEID_VAL(cbl_sid));
++	} else {
++		dprintk("%s: type %x iomode %d changed %d recall_type %d\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type);
++	}
++	hdr->nops++;
++}
++
++static void
++encode_cb_device(struct xdr_stream *xdr, struct nfs4_notify_device *nd,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++	int i;
++	int len					= nd->nd_list->cbd_len;
++	struct nfsd4_pnfs_cb_dev_item *cbd	= nd->nd_list->cbd_list;
++
++	dprintk("NFSD %s: --> num %d\n", __func__, len);
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(8);
++	WRITE32(OP_CB_DEVICE);
++
++	/* notify4 cnda_changes<>; */
++	WRITE32(len);
++	for (i = 0; i < len; i++) {
++		dprintk("%s: nt %d lt %d devid x%llx-x%llx im %d i %d\n",
++			__func__, cbd[i].cbd_notify_type,
++			cbd[i].cbd_layout_type,
++			cbd[i].cbd_devid.sbid,
++			cbd[i].cbd_devid.devid,
++			cbd[i].cbd_immediate, i);
++
++		BUG_ON(cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		       cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_DELETE);
++		RESERVE_SPACE(32);
++		/* bitmap4         notify_mask; */
++		WRITE32(1);
++		WRITE32(cbd[i].cbd_notify_type);
++		/* opaque     notify_vals<>; */
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
++			WRITE32(24);
++		else
++			WRITE32(20);
++		WRITE32(cbd[i].cbd_layout_type);
++		WRITE64(cbd[i].cbd_devid.sbid);
++		WRITE64(cbd[i].cbd_devid.devid);
++
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) {
++			RESERVE_SPACE(4);
++			WRITE32(cbd[i].cbd_immediate);
++		}
++	}
++	hdr->nops++;
++}
++#endif /* CONFIG_PNFSD */
++
+ static int
+ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+ {
+@@ -288,6 +425,45 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_layoutrecall *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_layout(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++
++static int
++nfs4_xdr_enc_cb_device(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_notify_device *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_device(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++#endif /* CONFIG_PNFSD */
+ 
+ static int
+ decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+@@ -403,6 +579,48 @@ out:
+ 	return status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_LAYOUT);
++out:
++	return status;
++}
++
++static int
++nfs4_xdr_dec_cb_device(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_DEVICE);
++out:
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * RPC procedure tables
+  */
+@@ -420,6 +638,10 @@ out:
+ static struct rpc_procinfo     nfs4_cb_procedures[] = {
+     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
++#if defined(CONFIG_PNFSD)
++    PROC(CB_LAYOUT,    COMPOUND,   enc_cb_layout,      dec_cb_layout),
++    PROC(CB_DEVICE,    COMPOUND,   enc_cb_device,      dec_cb_device),
++#endif
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
+@@ -606,10 +828,9 @@ out:
+  * TODO: cb_sequence should support referring call lists, cachethis, multiple
+  * slots, and mark callback channel down on communication errors.
+  */
+-static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_prepare_sequence(struct rpc_task *task,
++				      struct nfs4_client *clp)
+ {
+-	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+ 	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+ 	int status = 0;
+@@ -629,11 +850,15 @@ static void nfsd4_cb_prepare(struct rpc_
+ 	rpc_call_start(task);
+ }
+ 
+-static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_recall_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
++	nfsd4_cb_prepare_sequence(task, dp->dl_client);
++}
+ 
++static void nfsd4_cb_done_sequence(struct rpc_task *task,
++				   struct nfs4_client *clp)
++{
+ 	dprintk("%s: minorversion=%d\n", __func__,
+ 		clp->cl_cb_conn.cb_minorversion);
+ 
+@@ -657,7 +882,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	struct nfs4_client *clp = dp->dl_client;
+ 	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+-	nfsd4_cb_done(task, calldata);
++	nfsd4_cb_done_sequence(task, clp);
+ 
+ 	if (current_rpc_client == NULL) {
+ 		/* We're shutting down; give up. */
+@@ -688,7 +913,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+ 		task->tk_status = 0;
+-		rpc_restart_call(task);
++		rpc_restart_call_prepare(task);
+ 		return;
+ 	} else {
+ 		atomic_set(&clp->cl_cb_set, 0);
+@@ -704,7 +929,7 @@ static void nfsd4_cb_recall_release(void
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+-	.rpc_call_prepare = nfsd4_cb_prepare,
++	.rpc_call_prepare = nfsd4_cb_recall_prepare,
+ 	.rpc_call_done = nfsd4_cb_recall_done,
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+@@ -781,3 +1006,173 @@ void nfsd4_cb_recall(struct nfs4_delegat
+ {
+ 	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
++
++#if defined(CONFIG_PNFSD)
++static void nfsd4_cb_layout_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	nfsd4_cb_prepare_sequence(task, clr->clr_client);
++}
++
++static void nfsd4_cb_layout_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	struct nfs4_client *clp = clr->clr_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	if (!task->tk_status)
++		return;
++
++	printk("%s: clp %p cb_client %p fp %p failed with status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       clr->clr_file,
++	       task->tk_status);
++
++	switch (task->tk_status) {
++	case -EIO:
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++		/* FIXME:
++		 * The pnfs standard states that we need to only expire
++		 * the client after at-least "lease time" .eg lease-time * 2
++		 * when failing to communicate a recall
++		 */
++		break;
++	case -NFS4ERR_DELAY:
++		/* Pole the client until it's done with the layout */
++		rpc_delay(task, HZ/100); /* 10 mili-seconds */
++		task->tk_status = 0;
++		rpc_restart_call_prepare(task);
++		break;
++	case -NFS4ERR_NOMATCHING_LAYOUT:
++		task->tk_status = 0;
++		nomatching_layout(clr);
++	}
++}
++
++static void nfsd4_cb_layout_release(void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	kfree(clr->clr_args);
++	clr->clr_args = NULL;
++	put_layoutrecall(clr);
++}
++
++static const struct rpc_call_ops nfsd4_cb_layout_ops = {
++	.rpc_call_prepare = nfsd4_cb_layout_prepare,
++	.rpc_call_done = nfsd4_cb_layout_done,
++	.rpc_release = nfsd4_cb_layout_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfs4_client *clp = clr->clr_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_LAYOUT],
++		.rpc_cred = callback_cred
++	};
++	int status;
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	clr->clr_args = args;
++	args->args_op = clr;
++	msg.rpc_argp = args;
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_layout_ops, clr);
++out:
++	if (status) {
++		kfree(args);
++		put_layoutrecall(clr);
++	}
++	dprintk("NFSD: nfsd4_cb_layout: status %d\n", status);
++	return status;
++}
++
++static void nfsd4_cb_device_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	nfsd4_cb_prepare_sequence(task, cbnd->nd_client);
++}
++
++static void nfsd4_cb_device_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	struct nfs4_client *clp = cbnd->nd_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	dprintk("%s: clp %p cb_client %p: status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       task->tk_status);
++
++	if (task->tk_status == -EIO) {
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++	}
++}
++
++static void nfsd4_cb_device_release(void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	kfree(cbnd->nd_args);
++	cbnd->nd_args = NULL;
++	kfree(cbnd);
++}
++
++static const struct rpc_call_ops nfsd4_cb_device_ops = {
++	.rpc_call_prepare = nfsd4_cb_device_prepare,
++	.rpc_call_done = nfsd4_cb_device_done,
++	.rpc_release = nfsd4_cb_device_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_notify_device(struct nfs4_notify_device *cbnd)
++{
++	struct nfs4_client *clp = cbnd->nd_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_DEVICE],
++		.rpc_cred = callback_cred
++	};
++	int status = -EIO;
++
++	dprintk("%s: clp %p\n", __func__, clp);
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	args->args_op = cbnd;
++	msg.rpc_argp = args;
++
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_device_ops, cbnd);
++out:
++	if (status)
++		kfree(args);
++	dprintk("%s: status %d\n", __func__, status);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-31 20:42:05.556172071 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-31 20:42:05.556172071 -0400
+@@ -0,0 +1,1679 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *****************************************************************************/
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Globals */
++static u32 current_layoutid = 1;
++
++/*
++ * Currently used for manipulating the layout state.
++ */
++static DEFINE_SPINLOCK(layout_lock);
++
++#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_SMP)
++#  define BUG_ON_UNLOCKED_LAYOUT() BUG_ON(!spin_is_locked(&layout_lock))
++#else
++#  define BUG_ON_UNLOCKED_LAYOUT()
++#endif
++
++/*
++ * Layout state - NFSv4.1 pNFS
++ */
++static struct kmem_cache *pnfs_layout_slab;
++static struct kmem_cache *pnfs_layoutrecall_slab;
++
++/* hash table for nfsd4_pnfs_deviceid.sbid */
++#define SBID_HASH_BITS	8
++#define SBID_HASH_SIZE	(1 << SBID_HASH_BITS)
++#define SBID_HASH_MASK	(SBID_HASH_SIZE - 1)
++
++struct sbid_tracker {
++	u64 id;
++	struct super_block *sb;
++	struct list_head hash;
++};
++
++static u64 current_sbid;
++static struct list_head sbid_hashtbl[SBID_HASH_SIZE];
++
++static inline unsigned long
++sbid_hashval(struct super_block *sb)
++{
++	return hash_ptr(sb, SBID_HASH_BITS);
++}
++
++static inline struct sbid_tracker *
++alloc_sbid(void)
++{
++	return kmalloc(sizeof(struct sbid_tracker), GFP_KERNEL);
++}
++
++static void
++destroy_sbid(struct sbid_tracker *sbid)
++{
++	spin_lock(&layout_lock);
++	list_del(&sbid->hash);
++	spin_unlock(&layout_lock);
++	kfree(sbid);
++}
++
++void
++nfsd4_free_pnfs_slabs(void)
++{
++	int i;
++	struct sbid_tracker *sbid;
++
++	nfsd4_free_slab(&pnfs_layout_slab);
++	nfsd4_free_slab(&pnfs_layoutrecall_slab);
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		while (!list_empty(&sbid_hashtbl[i])) {
++			sbid = list_first_entry(&sbid_hashtbl[i],
++						struct sbid_tracker,
++						hash);
++			destroy_sbid(sbid);
++		}
++	}
++}
++
++int
++nfsd4_init_pnfs_slabs(void)
++{
++	int i;
++
++	pnfs_layout_slab = kmem_cache_create("pnfs_layouts",
++			sizeof(struct nfs4_layout), 0, 0, NULL);
++	if (pnfs_layout_slab == NULL)
++		return -ENOMEM;
++	pnfs_layoutrecall_slab = kmem_cache_create("pnfs_layoutrecalls",
++			sizeof(struct nfs4_layoutrecall), 0, 0, NULL);
++	if (pnfs_layoutrecall_slab == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		INIT_LIST_HEAD(&sbid_hashtbl[i]);
++	}
++
++	return 0;
++}
++
++/* XXX: Need to implement the notify types and track which
++ * clients have which devices. */
++void pnfs_set_device_notify(clientid_t *clid, unsigned int types)
++{
++	struct nfs4_client *clp;
++	dprintk("%s: -->\n", __func__);
++
++	nfs4_lock_state();
++	/* Indicate that client has a device so we can only notify
++	 * the correct clients */
++	clp = find_confirmed_client(clid);
++	if (clp) {
++		atomic_inc(&clp->cl_deviceref);
++		dprintk("%s: Incr device count (clnt %p) to %d\n",
++			__func__, clp, atomic_read(&clp->cl_deviceref));
++	}
++	nfs4_unlock_state();
++}
++
++/* Clear notifications for this client
++ * XXX: Do we need to loop through a clean up all
++ *      krefs when nfsd cleans up the client? */
++void pnfs_clear_device_notify(struct nfs4_client *clp)
++{
++	atomic_dec(&clp->cl_deviceref);
++	dprintk("%s: Decr device count (clnt %p) to %d\n",
++		__func__, clp, atomic_read(&clp->cl_deviceref));
++}
++
++static struct nfs4_layout_state *
++alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
++			stateid_t *stateid)
++{
++	struct nfs4_layout_state *new;
++
++	/* FIXME: use a kmem_cache */
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return new;
++	get_nfs4_file(fp);
++	INIT_LIST_HEAD(&new->ls_perfile);
++	INIT_LIST_HEAD(&new->ls_layouts);
++	kref_init(&new->ls_ref);
++	new->ls_client = clp;
++	new->ls_file = fp;
++	new->ls_stateid.si_boot = stateid->si_boot;
++	new->ls_stateid.si_stateownerid = 0; /* identifies layout stateid */
++	new->ls_stateid.si_generation = 1;
++	spin_lock(&layout_lock);
++	new->ls_stateid.si_fileid = current_layoutid++;
++	list_add(&new->ls_perfile, &fp->fi_layout_states);
++	spin_unlock(&layout_lock);
++	return new;
++}
++
++static inline void
++get_layout_state(struct nfs4_layout_state *ls)
++{
++	kref_get(&ls->ls_ref);
++}
++
++static void
++destroy_layout_state_common(struct nfs4_layout_state *ls)
++{
++	struct nfs4_file *fp = ls->ls_file;
++
++	dprintk("pNFS %s: ls %p fp %p clp %p\n", __func__, ls, fp,
++		ls->ls_client);
++	BUG_ON(!list_empty(&ls->ls_layouts));
++	kfree(ls);
++	put_nfs4_file(fp);
++}
++
++static void
++destroy_layout_state(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	spin_lock(&layout_lock);
++	list_del(&ls->ls_perfile);
++	spin_unlock(&layout_lock);
++	destroy_layout_state_common(ls);
++}
++
++static void
++destroy_layout_state_locked(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	list_del(&ls->ls_perfile);
++	destroy_layout_state_common(ls);
++}
++
++static inline void
++put_layout_state(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state);
++}
++
++static inline void
++put_layout_state_locked(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state_locked);
++}
++
++/*
++ * Search the fp->fi_layout_state list for a layout state with the clientid.
++ * If not found, then this is a 'first open/delegation/lock stateid' from
++ * the client for this file.
++ * Called under the layout_lock.
++ */
++static struct nfs4_layout_state *
++find_get_layout_state(struct nfs4_client *clp, struct nfs4_file *fp)
++{
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_for_each_entry(ls, &fp->fi_layout_states, ls_perfile) {
++		if (ls->ls_client == clp) {
++			dprintk("pNFS %s: before GET ls %p ls_ref %d\n",
++				__func__, ls,
++				atomic_read(&ls->ls_ref.refcount));
++			get_layout_state(ls);
++			return ls;
++		}
++	}
++	return NULL;
++}
++
++static __be32
++verify_stateid(struct nfs4_file *fp, stateid_t *stateid)
++{
++	struct nfs4_stateid *local = NULL;
++	struct nfs4_delegation *temp = NULL;
++
++	/* check if open or lock stateid */
++	local = find_stateid(stateid, RD_STATE);
++	if (local)
++		return 0;
++	temp = find_delegation_stateid(fp->fi_inode, stateid);
++	if (temp)
++		return 0;
++	return nfserr_bad_stateid;
++}
++
++/*
++ * nfs4_preocess_layout_stateid ()
++ *
++ * We have looked up the nfs4_file corresponding to the current_fh, and
++ * confirmed the clientid. Pull the few tests from nfs4_preprocess_stateid_op()
++ * that make sense with a layout stateid.
++ *
++ * Called with the state_lock held
++ * Returns zero and stateid is updated, or error.
++ *
++ * Note: the struct nfs4_layout_state pointer is only set by layoutget.
++ */
++static __be32
++nfs4_process_layout_stateid(struct nfs4_client *clp, struct nfs4_file *fp,
++			    stateid_t *stateid, struct nfs4_layout_state **lsp)
++{
++	struct nfs4_layout_state *ls = NULL;
++	__be32 status = 0;
++
++	dprintk("--> %s clp %p fp %p \n", __func__, clp, fp);
++
++	dprintk("%s: operation stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		goto out;
++
++	/* Is this the first use of this layout ? */
++	spin_lock(&layout_lock);
++	ls = find_get_layout_state(clp, fp);
++	spin_unlock(&layout_lock);
++	if (!ls) {
++		/* Only alloc layout state on layoutget (which sets lsp). */
++		if (!lsp) {
++			dprintk("%s ERROR: Not layoutget & no layout stateid\n",
++				__func__);
++			status = nfserr_bad_stateid;
++			goto out;
++		}
++		dprintk("%s Initial stateid for layout: file %p client %p\n",
++			__func__, fp, clp);
++
++		/* verify input stateid */
++		status = verify_stateid(fp, stateid);
++		if (status) {
++			dprintk("%s ERROR: invalid open/deleg/lock stateid\n",
++				__func__);
++			goto out;
++		}
++		ls = alloc_init_layout_state(clp, fp, stateid);
++		if (!ls) {
++			dprintk("%s pNFS ERROR: no memory for layout state\n",
++				__func__);
++			status = nfserr_resource;
++			goto out;
++		}
++	} else {
++		dprintk("%s Not initial stateid. Layout state %p file %p\n",
++			__func__, ls, fp);
++
++		/* BAD STATEID */
++		status = nfserr_bad_stateid;
++		if (memcmp(&ls->ls_stateid.si_opaque, &stateid->si_opaque,
++			sizeof(stateid_opaque_t)) != 0) {
++
++			/* if a LAYOUTGET operation and stateid is a valid
++			 * open/deleg/lock stateid, accept it as a parallel
++			 * initial layout stateid
++			 */
++			if (lsp && ((verify_stateid(fp, stateid)) == 0)) {
++				dprintk("%s parallel initial layout state\n",
++					__func__);
++				goto update;
++			}
++
++			dprintk("%s ERROR bad opaque in stateid 1\n", __func__);
++			goto out_put;
++		}
++
++		/* stateid is a valid layout stateid for this file. */
++		if (stateid->si_generation > ls->ls_stateid.si_generation) {
++			dprintk("%s bad stateid 1\n", __func__);
++			goto out_put;
++		}
++update:
++		update_stateid(&ls->ls_stateid);
++		dprintk("%s Updated ls_stateid to %d on layoutstate %p\n",
++			__func__, ls->ls_stateid.si_generation, ls);
++	}
++	status = 0;
++	/* Set the stateid to be encoded */
++	memcpy(stateid, &ls->ls_stateid, sizeof(stateid_t));
++
++	/* Return the layout state if requested */
++	if (lsp) {
++		get_layout_state(ls);
++		*lsp = ls;
++	}
++	dprintk("%s: layout stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(&ls->ls_stateid));
++out_put:
++	dprintk("%s PUT LO STATE:\n", __func__);
++	put_layout_state(ls);
++out:
++	dprintk("<-- %s status %d\n", __func__, htonl(status));
++
++	return status;
++}
++
++static inline struct nfs4_layout *
++alloc_layout(void)
++{
++	return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
++}
++
++static inline void
++free_layout(struct nfs4_layout *lp)
++{
++	kmem_cache_free(pnfs_layout_slab, lp);
++}
++
++static void
++init_layout(struct nfs4_layout_state *ls,
++	    struct nfs4_layout *lp,
++	    struct nfs4_file *fp,
++	    struct nfs4_client *clp,
++	    struct svc_fh *current_fh,
++	    struct nfsd4_layout_seg *seg)
++{
++	dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__,
++		ls, lp, clp, fp, fp->fi_inode);
++
++	get_nfs4_file(fp);
++	lp->lo_client = clp;
++	lp->lo_file = fp;
++	get_layout_state(ls);
++	lp->lo_state = ls;
++	memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
++	spin_lock(&layout_lock);
++	list_add_tail(&lp->lo_perstate, &ls->ls_layouts);
++	list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
++	list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
++	spin_unlock(&layout_lock);
++	dprintk("pNFS %s end\n", __func__);
++}
++
++static void
++dequeue_layout(struct nfs4_layout *lp)
++{
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del(&lp->lo_perclnt);
++	list_del(&lp->lo_perfile);
++	list_del(&lp->lo_perstate);
++}
++
++static void
++destroy_layout(struct nfs4_layout *lp)
++{
++	struct nfs4_client *clp;
++	struct nfs4_file *fp;
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	clp = lp->lo_client;
++	fp = lp->lo_file;
++	ls = lp->lo_state;
++	dprintk("pNFS %s: lp %p clp %p fp %p ino %p ls_layouts empty %d\n",
++		__func__, lp, clp, fp, fp->fi_inode,
++		list_empty(&ls->ls_layouts));
++
++	kmem_cache_free(pnfs_layout_slab, lp);
++	/* release references taken by init_layout */
++	put_layout_state_locked(ls);
++	put_nfs4_file(fp);
++}
++
++void fs_layout_return(struct super_block *sb, struct inode *ino,
++		      struct nfsd4_pnfs_layoutreturn *lrp, int flags,
++		      void *recall_cookie)
++{
++	int ret;
++
++	if (unlikely(!sb->s_pnfs_op->layout_return))
++		return;
++
++	lrp->lr_flags = flags;
++	lrp->args.lr_cookie = recall_cookie;
++
++	if (!ino) /* FSID or ALL */
++		ino = sb->s_root->d_inode;
++
++	ret = sb->s_pnfs_op->layout_return(ino, &lrp->args);
++	dprintk("%s: inode %lu iomode=%d offset=0x%llx length=0x%llx "
++		"cookie = %p flags 0x%x status=%d\n",
++		__func__, ino->i_ino, lrp->args.lr_seg.iomode,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length,
++		recall_cookie, flags, ret);
++}
++
++static u64
++alloc_init_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	struct sbid_tracker *new = alloc_sbid();
++	unsigned long hash_idx = sbid_hashval(sb);
++	u64 id = 0;
++
++	if (likely(new)) {
++		spin_lock(&layout_lock);
++		id = ++current_sbid;
++		new->id = (id << SBID_HASH_BITS) | (hash_idx & SBID_HASH_MASK);
++		id = new->id;
++		BUG_ON(id == 0);
++		new->sb = sb;
++
++		list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash)
++			if (sbid->sb == sb) {
++				kfree(new);
++				id = sbid->id;
++				spin_unlock(&layout_lock);
++				return id;
++			}
++		list_add(&new->hash, &sbid_hashtbl[hash_idx]);
++		spin_unlock(&layout_lock);
++	}
++	return id;
++}
++
++struct super_block *
++find_sbid_id(u64 id)
++{
++	struct sbid_tracker *sbid;
++	struct super_block *sb = NULL;
++	unsigned long hash_idx = id & SBID_HASH_MASK;
++	int pos = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->id != id)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		sb = sbid->sb;
++		break;
++	}
++	spin_unlock(&layout_lock);
++	return sb;
++}
++
++u64
++find_create_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	unsigned long hash_idx = sbid_hashval(sb);
++	int pos = 0;
++	u64 id = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->sb != sb)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		id = sbid->id;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	if (!id)
++		id = alloc_init_sbid(sb);
++
++	return id;
++}
++
++/*
++ * Create a layoutrecall structure
++ * An optional layoutrecall can be cloned (except for the layoutrecall lists)
++ */
++static struct nfs4_layoutrecall *
++alloc_init_layoutrecall(struct nfsd4_pnfs_cb_layout *cbl,
++			struct nfs4_client *clp,
++			struct nfs4_file *lrfile)
++{
++	struct nfs4_layoutrecall *clr;
++
++	dprintk("NFSD %s\n", __func__);
++	clr = kmem_cache_alloc(pnfs_layoutrecall_slab, GFP_KERNEL);
++	if (clr == NULL)
++		return clr;
++
++	dprintk("NFSD %s -->\n", __func__);
++
++	memset(clr, 0, sizeof(*clr));
++	if (lrfile)
++		get_nfs4_file(lrfile);
++	clr->clr_client = clp;
++	clr->clr_file = lrfile;
++	clr->cb = *cbl;
++
++	kref_init(&clr->clr_ref);
++	INIT_LIST_HEAD(&clr->clr_perclnt);
++
++	dprintk("NFSD %s return %p\n", __func__, clr);
++	return clr;
++}
++
++static void
++get_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	kref_get(&clr->clr_ref);
++}
++
++static void
++destroy_layoutrecall(struct kref *kref)
++{
++	struct nfs4_layoutrecall *clr =
++			container_of(kref, struct nfs4_layoutrecall, clr_ref);
++	dprintk("pNFS %s: clr %p fp %p clp %p\n", __func__, clr,
++		clr->clr_file, clr->clr_client);
++	BUG_ON(!list_empty(&clr->clr_perclnt));
++	if (clr->clr_file)
++		put_nfs4_file(clr->clr_file);
++	kmem_cache_free(pnfs_layoutrecall_slab, clr);
++}
++
++int
++put_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	return kref_put(&clr->clr_ref, destroy_layoutrecall);
++}
++
++void *
++layoutrecall_done(struct nfs4_layoutrecall *clr)
++{
++	void *recall_cookie = clr->cb.cbl_cookie;
++	struct nfs4_layoutrecall *parent = clr->parent;
++
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del_init(&clr->clr_perclnt);
++	put_layoutrecall(clr);
++
++	if (parent && !put_layoutrecall(parent))
++		recall_cookie = NULL;
++
++	return recall_cookie;
++}
++
++/*
++ * get_state() and cb_get_state() are
++ */
++void
++release_pnfs_ds_dev_list(struct nfs4_stateid *stp)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	while (!list_empty(&stp->st_pnfs_ds_id)) {
++		ddp = list_entry(stp->st_pnfs_ds_id.next,
++				 struct pnfs_ds_dev_entry, dd_dev_entry);
++		list_del(&ddp->dd_dev_entry);
++		kfree(ddp);
++	}
++}
++
++static int
++nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 dsid)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	ddp = kmalloc(sizeof(*ddp), GFP_KERNEL);
++	if (!ddp)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&ddp->dd_dev_entry);
++	list_add(&ddp->dd_dev_entry, &stp->st_pnfs_ds_id);
++	ddp->dd_dsid = dsid;
++	return 0;
++}
++
++/*
++ * are two octet ranges overlapping?
++ * start1            last1
++ *   |-----------------|
++ *                start2            last2
++ *                  |----------------|
++ */
++static inline int
++lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 last1 = last_byte_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 last2 = last_byte_offset(start2, l2->length);
++	int ret;
++
++	/* if last1 == start2 there's a single byte overlap */
++	ret = (last2 >= start1) && (last1 >= start2);
++	dprintk("%s: l1 %llu:%lld l2 %llu:%lld ret=%d\n", __func__,
++		l1->offset, l1->length, l2->offset, l2->length, ret);
++	return ret;
++}
++
++static inline int
++same_fsid_major(struct nfs4_fsid *fsid, u64 major)
++{
++	return fsid->major == major;
++}
++
++static inline int
++same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
++{
++	return same_fsid_major(fsid, current_fh->fh_export->ex_fsid);
++}
++
++/*
++ * find a layout recall conflicting with the specified layoutget
++ */
++static int
++is_layout_recalled(struct nfs4_client *clp,
++		   struct svc_fh *current_fh,
++		   struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layoutrecall *clr;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (clr, &clp->cl_layoutrecalls, clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != seg->layout_type)
++			continue;
++		if (clr->cb.cbl_recall_type == RETURN_ALL)
++			goto found;
++		if (clr->cb.cbl_recall_type == RETURN_FSID) {
++			if (same_fsid(&clr->cb.cbl_fsid, current_fh))
++				goto found;
++			else
++				continue;
++		}
++		BUG_ON(clr->cb.cbl_recall_type != RETURN_FILE);
++		if (clr->cb.cbl_seg.clientid == seg->clientid &&
++		    lo_seg_overlapping(&clr->cb.cbl_seg, seg))
++			goto found;
++	}
++	spin_unlock(&layout_lock);
++	return 0;
++found:
++	spin_unlock(&layout_lock);
++	return 1;
++}
++
++/*
++ * are two octet ranges overlapping or adjacent?
++ */
++static inline int
++lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	/* is end1 == start2 ranges are adjacent */
++	return (end2 >= start1) && (end1 >= start2);
++}
++
++static void
++extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lg_start = lg->offset;
++	u64 lg_end = end_offset(lg_start, lg->length);
++
++	/* lo already covers lg? */
++	if (lo_start <= lg_start && lg_end <= lo_end)
++		return;
++
++	/* extend start offset */
++	if (lo_start > lg_start)
++		lo_start = lg_start;
++
++	/* extend end offset */
++	if (lo_end < lg_end)
++		lo_end = lg_end;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ?
++		      lo_end : lo_end - lo_start;
++}
++
++static struct nfs4_layout *
++merge_layout(struct nfs4_file *fp,
++	     struct nfs4_client *clp,
++	     struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layout *lp = NULL;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
++		if (lp->lo_seg.layout_type == seg->layout_type &&
++		    lp->lo_seg.clientid == seg->clientid &&
++		    lp->lo_seg.iomode == seg->iomode &&
++		    lo_seg_mergeable(&lp->lo_seg, seg)) {
++			extend_layout(&lp->lo_seg, seg);
++			break;
++		}
++	spin_unlock(&layout_lock);
++
++	return lp;
++}
++
++__be32
++nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
++		     struct exp_xdr_stream *xdr)
++{
++	u32 status;
++	__be32 nfserr;
++	struct inode *ino = lgp->lg_fhp->fh_dentry->d_inode;
++	struct super_block *sb = ino->i_sb;
++	int can_merge;
++	struct nfs4_file *fp;
++	struct nfs4_client *clp;
++	struct nfs4_layout *lp = NULL;
++	struct nfs4_layout_state *ls = NULL;
++	struct nfsd4_pnfs_layoutget_arg args = {
++		.lg_minlength = lgp->lg_minlength,
++		.lg_fh = &lgp->lg_fhp->fh_handle,
++	};
++	struct nfsd4_pnfs_layoutget_res res = {
++		.lg_seg = lgp->lg_seg,
++	};
++
++	dprintk("NFSD: %s Begin\n", __func__);
++
++	args.lg_sbid = find_create_sbid(sb);
++	if (!args.lg_sbid) {
++		nfserr = nfserr_layouttrylater;
++		goto out;
++	}
++
++	can_merge = sb->s_pnfs_op->can_merge_layouts != NULL &&
++		    sb->s_pnfs_op->can_merge_layouts(lgp->lg_seg.layout_type);
++
++	nfs4_lock_state();
++	fp = find_alloc_file(ino, lgp->lg_fhp);
++	clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
++	dprintk("pNFS %s: fp %p clp %p \n", __func__, fp, clp);
++	if (!fp || !clp) {
++		nfserr = nfserr_inval;
++		goto out_unlock;
++	}
++
++	/* Check decoded layout stateid */
++	nfserr = nfs4_process_layout_stateid(clp, fp, &lgp->lg_sid, &ls);
++	if (nfserr)
++		goto out_unlock;
++
++	if (is_layout_recalled(clp, lgp->lg_fhp, &lgp->lg_seg)) {
++		nfserr = nfserr_recallconflict;
++		goto out;
++	}
++
++	/* pre-alloc layout in case we can't merge after we call
++	 * the file system
++	 */
++	lp = alloc_layout();
++	if (!lp) {
++		nfserr = nfserr_layouttrylater;
++		goto out_unlock;
++	}
++
++	dprintk("pNFS %s: pre-export type 0x%x maxcount %Zd "
++		"iomode %u offset %llu length %llu\n",
++		__func__, lgp->lg_seg.layout_type,
++		exp_xdr_qbytes(xdr->end - xdr->p),
++		lgp->lg_seg.iomode, lgp->lg_seg.offset, lgp->lg_seg.length);
++
++	/* FIXME: need to eliminate the use of the state lock */
++	nfs4_unlock_state();
++	status = sb->s_pnfs_op->layout_get(ino, xdr, &args, &res);
++	nfs4_lock_state();
++
++	dprintk("pNFS %s: post-export status %u "
++		"iomode %u offset %llu length %llu\n",
++		__func__, status, res.lg_seg.iomode,
++		res.lg_seg.offset, res.lg_seg.length);
++
++	/*
++	 * The allowable error codes for the layout_get pNFS export
++	 * operations vector function (from the file system) can be
++	 * expanded as needed to include other errors defined for
++	 * the RFC 5561 LAYOUTGET operation.
++	 */
++	switch (status) {
++	case 0:
++		nfserr = NFS4_OK;
++		break;
++	case NFS4ERR_ACCESS:
++	case NFS4ERR_BADIOMODE:
++		/* No support for LAYOUTIOMODE4_RW layouts */
++	case NFS4ERR_BADLAYOUT:
++		/* No layout matching loga_minlength rules */
++	case NFS4ERR_INVAL:
++	case NFS4ERR_IO:
++	case NFS4ERR_LAYOUTTRYLATER:
++	case NFS4ERR_LAYOUTUNAVAILABLE:
++	case NFS4ERR_LOCKED:
++	case NFS4ERR_NOSPC:
++	case NFS4ERR_RECALLCONFLICT:
++	case NFS4ERR_SERVERFAULT:
++	case NFS4ERR_TOOSMALL:
++		/* Requested layout too big for loga_maxcount */
++	case NFS4ERR_WRONG_TYPE:
++		/* Not a regular file */
++		nfserr = cpu_to_be32(status);
++		goto out_freelayout;
++	default:
++		BUG();
++		nfserr = nfserr_serverfault;
++	}
++
++	lgp->lg_seg = res.lg_seg;
++	lgp->lg_roc = res.lg_return_on_close;
++
++	/* SUCCESS!
++	 * Can the new layout be merged into an existing one?
++	 * If so, free unused layout struct
++	 */
++	if (can_merge && merge_layout(fp, clp, &res.lg_seg))
++		goto out_freelayout;
++
++	/* Can't merge, so let's initialize this new layout */
++	init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
++out_unlock:
++	if (ls)
++		put_layout_state(ls);
++	if (fp)
++		put_nfs4_file(fp);
++	nfs4_unlock_state();
++out:
++	dprintk("pNFS %s: lp %p exit nfserr %u\n", __func__, lp,
++		be32_to_cpu(nfserr));
++	return nfserr;
++out_freelayout:
++	free_layout(lp);
++	goto out_unlock;
++}
++
++static void
++trim_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lr)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lr_start = lr->offset;
++	u64 lr_end = end_offset(lr_start, lr->length);
++
++	dprintk("%s:Begin lo %llu:%lld lr %llu:%lld\n", __func__,
++		lo->offset, lo->length, lr->offset, lr->length);
++
++	/* lr fully covers lo? */
++	if (lr_start <= lo_start && lo_end <= lr_end) {
++		lo->length = 0;
++		goto out;
++	}
++
++	/*
++	 * split not supported yet. retain layout segment.
++	 * remains must be returned by the client
++	 * on the final layout return.
++	 */
++	if (lo_start < lr_start && lr_end < lo_end) {
++		dprintk("%s: split not supported\n", __func__);
++		goto out;
++	}
++
++	if (lo_start < lr_start)
++		lo_end = lr_start - 1;
++	else /* lr_end < lo_end */
++		lo_start = lr_end + 1;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ? lo_end : lo_end - lo_start;
++out:
++	dprintk("%s:End lo %llu:%lld\n", __func__, lo->offset, lo->length);
++}
++
++static int
++pnfs_return_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp,
++			 struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	dprintk("%s: clp %p fp %p\n", __func__, clp, fp);
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &fp->fi_layouts, lo_perfile) {
++		dprintk("%s: lp %p client %p,%p lo_type %x,%x iomode %d,%d\n",
++			__func__, lp,
++			lp->lo_client, clp,
++			lp->lo_seg.layout_type, lrp->args.lr_seg.layout_type,
++			lp->lo_seg.iomode, lrp->args.lr_seg.iomode);
++		if (lp->lo_client != clp ||
++		    lp->lo_seg.layout_type != lrp->args.lr_seg.layout_type ||
++		    (lp->lo_seg.iomode != lrp->args.lr_seg.iomode &&
++		     lrp->args.lr_seg.iomode != IOMODE_ANY) ||
++		     !lo_seg_overlapping(&lp->lo_seg, &lrp->args.lr_seg))
++			continue;
++		layouts_found++;
++		trim_layout(&lp->lo_seg, &lrp->args.lr_seg);
++		if (!lp->lo_seg.length) {
++			lrp->lrs_present = 0;
++			dequeue_layout(lp);
++			destroy_layout(lp);
++		}
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++pnfs_return_client_layouts(struct nfs4_client *clp,
++			   struct nfsd4_pnfs_layoutreturn *lrp, u64 ex_fsid)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &clp->cl_layouts, lo_perclnt) {
++		if (lrp->args.lr_seg.layout_type != lp->lo_seg.layout_type ||
++		   (lrp->args.lr_seg.iomode != lp->lo_seg.iomode &&
++		    lrp->args.lr_seg.iomode != IOMODE_ANY))
++			continue;
++
++		if (lrp->args.lr_return_type == RETURN_FSID &&
++		    !same_fsid_major(&lp->lo_file->fi_fsid, ex_fsid))
++			continue;
++
++		layouts_found++;
++		dequeue_layout(lp);
++		destroy_layout(lp);
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++recall_return_perfect_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode ||
++	    clr->cb.cbl_recall_type != lrp->args.lr_return_type)
++		return 0;
++
++	return (clr->cb.cbl_recall_type == RETURN_FILE &&
++		clr->clr_file == fp &&
++		clr->cb.cbl_seg.offset == lrp->args.lr_seg.offset &&
++		clr->cb.cbl_seg.length == lrp->args.lr_seg.length) ||
++
++		(clr->cb.cbl_recall_type == RETURN_FSID &&
++		 same_fsid(&clr->cb.cbl_fsid, current_fh)) ||
++
++		clr->cb.cbl_recall_type == RETURN_ALL;
++}
++
++static int
++recall_return_partial_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	/* iomode matching? */
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode &&
++	    clr->cb.cbl_seg.iomode != IOMODE_ANY &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY)
++		return 0;
++
++	if (clr->cb.cbl_recall_type == RETURN_ALL ||
++	    lrp->args.lr_return_type == RETURN_ALL)
++		return 1;
++
++	/* fsid matches? */
++	if (clr->cb.cbl_recall_type == RETURN_FSID ||
++	    lrp->args.lr_return_type == RETURN_FSID)
++		return same_fsid(&clr->cb.cbl_fsid, current_fh);
++
++	/* file matches, range overlapping? */
++	return clr->clr_file == fp &&
++	       lo_seg_overlapping(&clr->cb.cbl_seg, &lrp->args.lr_seg);
++}
++
++int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
++			    struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status = 0;
++	int layouts_found = 0;
++	struct inode *ino = current_fh->fh_dentry->d_inode;
++	struct nfs4_file *fp = NULL;
++	struct nfs4_client *clp;
++	struct nfs4_layoutrecall *clr, *nextclr;
++	u64 ex_fsid = current_fh->fh_export->ex_fsid;
++	void *recall_cookie = NULL;
++
++	dprintk("NFSD: %s\n", __func__);
++
++	nfs4_lock_state();
++	clp = find_confirmed_client((clientid_t *)&lrp->args.lr_seg.clientid);
++	if (!clp)
++		goto out;
++
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		fp = find_file(ino);
++		if (!fp) {
++			printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
++				"ino %p:%lu\n",
++				__func__, ino, ino ? ino->i_ino : 0L);
++			goto out;
++		}
++
++		/* Check the stateid */
++		dprintk("%s PROCESS LO_STATEID inode %p\n", __func__, ino);
++		status = nfs4_process_layout_stateid(clp, fp, &lrp->lr_sid,
++						     NULL);
++		if (status)
++			goto out_put_file;
++
++		/* update layouts */
++		layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
++		/* optimize for the all-empty case */
++		if (list_empty(&fp->fi_layouts))
++			recall_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++	} else {
++		layouts_found = pnfs_return_client_layouts(clp, lrp, ex_fsid);
++	}
++
++	dprintk("pNFS %s: clp %p fp %p layout_type 0x%x iomode %d "
++		"return_type %d fsid 0x%llx offset %llu length %llu: "
++		"layouts_found %d\n",
++		__func__, clp, fp, lrp->args.lr_seg.layout_type,
++		lrp->args.lr_seg.iomode, lrp->args.lr_return_type,
++		ex_fsid,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);
++
++	/* update layoutrecalls
++	 * note: for RETURN_{FSID,ALL}, fp may be NULL
++	 */
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (clr, nextclr, &clp->cl_layoutrecalls,
++				  clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != lrp->args.lr_seg.layout_type)
++			continue;
++
++		if (recall_return_perfect_match(clr, lrp, fp, current_fh))
++			recall_cookie = layoutrecall_done(clr);
++		else if (layouts_found &&
++			 recall_return_partial_match(clr, lrp, fp, current_fh))
++			clr->clr_time = CURRENT_TIME;
++	}
++	spin_unlock(&layout_lock);
++
++out_put_file:
++	if (fp)
++		put_nfs4_file(fp);
++out:
++	nfs4_unlock_state();
++
++	/* call exported filesystem layout_return (ignore return-code) */
++	fs_layout_return(sb, ino, lrp, 0, recall_cookie);
++
++	dprintk("pNFS %s: exit status %d \n", __func__, status);
++	return status;
++}
++
++/*
++ * PNFS Metadata server export operations callback for get_state
++ *
++ * called by the cluster fs when it receives a get_state() from a data
++ * server.
++ * returns status, or pnfs_get_state* with pnfs_get_state->status set.
++ *
++ */
++int
++nfs4_pnfs_cb_get_state(struct super_block *sb, struct pnfs_get_state *arg)
++{
++	struct nfs4_stateid *stp;
++	int flags = LOCK_STATE | OPEN_STATE; /* search both hash tables */
++	int status = -EINVAL;
++	struct inode *ino;
++	struct nfs4_delegation *dl;
++	stateid_t *stid = (stateid_t *)&arg->stid;
++
++	dprintk("NFSD: %s sid=" STATEID_FMT " ino %llu\n", __func__,
++		STATEID_VAL(stid), arg->ino);
++
++	nfs4_lock_state();
++	stp = find_stateid(stid, flags);
++	if (!stp) {
++		ino = iget_locked(sb, arg->ino);
++		if (!ino)
++			goto out;
++
++		if (ino->i_state & I_NEW) {
++			iget_failed(ino);
++			goto out;
++		}
++
++		dl = find_delegation_stateid(ino, stid);
++		if (dl)
++			status = 0;
++
++		iput(ino);
++	} else {
++		/* XXX ANDROS: marc removed nfs4_check_fh - how come? */
++
++		/* arg->devid is the Data server id, set by the cluster fs */
++		status = nfs4_add_pnfs_ds_dev(stp, arg->dsid);
++		if (status)
++			goto out;
++
++		arg->access = stp->st_access_bmap;
++		*(clientid_t *)&arg->clid =
++			stp->st_stateowner->so_client->cl_clientid;
++	}
++out:
++	nfs4_unlock_state();
++	return status;
++}
++
++static int
++cl_has_file_layout(struct nfs4_client *clp, struct nfs4_file *lrfile,
++		   stateid_t *lsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++	struct nfs4_layout_state *ls;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt) {
++		if (lp->lo_file != lrfile)
++			continue;
++
++		ls = find_get_layout_state(clp, lrfile);
++		if (!ls) {
++			/* This shouldn't happen as the file should have a
++			 * layout stateid if it has a layout.
++			 */
++			printk(KERN_ERR "%s: file %p has no layout stateid\n",
++				__func__, lrfile);
++			WARN_ON(1);
++			break;
++		}
++		update_stateid(&ls->ls_stateid);
++		memcpy(lsid, &ls->ls_stateid, sizeof(stateid_t));
++		put_layout_state_locked(ls);
++		found = 1;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	return found;
++}
++
++static int
++cl_has_fsid_layout(struct nfs4_client *clp, struct nfs4_fsid *fsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++
++	/* note: minor version unused */
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt)
++		if (lp->lo_file->fi_fsid.major == fsid->major) {
++			found = 1;
++			break;
++		}
++	spin_unlock(&layout_lock);
++	return found;
++}
++
++static int
++cl_has_any_layout(struct nfs4_client *clp)
++{
++	return !list_empty(&clp->cl_layouts);
++}
++
++static int
++cl_has_layout(struct nfs4_client *clp, struct nfsd4_pnfs_cb_layout *cbl,
++	      struct nfs4_file *lrfile, stateid_t *lsid)
++{
++	switch (cbl->cbl_recall_type) {
++	case RETURN_FILE:
++		return cl_has_file_layout(clp, lrfile, lsid);
++	case RETURN_FSID:
++		return cl_has_fsid_layout(clp, &cbl->cbl_fsid);
++	default:
++		return cl_has_any_layout(clp);
++	}
++}
++
++/*
++ * Called without the layout_lock.
++ */
++void
++nomatching_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfsd4_pnfs_layoutreturn lr = {
++		.args.lr_return_type = clr->cb.cbl_recall_type,
++		.args.lr_seg = clr->cb.cbl_seg,
++	};
++	struct inode *inode;
++	void *recall_cookie;
++
++	if (clr->clr_file) {
++		inode = igrab(clr->clr_file->fi_inode);
++		if (WARN_ON(!inode))
++			return;
++	} else {
++		inode = NULL;
++	}
++
++	dprintk("%s: clp %p fp %p: simulating layout_return\n", __func__,
++		clr->clr_client, clr->clr_file);
++
++	if (clr->cb.cbl_recall_type == RETURN_FILE)
++		pnfs_return_file_layouts(clr->clr_client, clr->clr_file, &lr);
++	else
++		pnfs_return_client_layouts(clr->clr_client, &lr,
++					   clr->cb.cbl_fsid.major);
++
++	spin_lock(&layout_lock);
++	recall_cookie = layoutrecall_done(clr);
++	spin_unlock(&layout_lock);
++
++	fs_layout_return(clr->clr_sb, inode, &lr, LR_FLAG_INTERN,
++			 recall_cookie);
++	iput(inode);
++}
++
++void pnfs_expire_client(struct nfs4_client *clp)
++{
++	for (;;) {
++		struct nfs4_layoutrecall *lrp = NULL;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layoutrecalls)) {
++			lrp = list_entry(clp->cl_layoutrecalls.next,
++					 struct nfs4_layoutrecall, clr_perclnt);
++			get_layoutrecall(lrp);
++		}
++		spin_unlock(&layout_lock);
++		if (!lrp)
++			break;
++
++		dprintk("%s: lrp %p, fp %p\n", __func__, lrp, lrp->clr_file);
++		BUG_ON(lrp->clr_client != clp);
++		nomatching_layout(lrp);
++		put_layoutrecall(lrp);
++	}
++
++	for (;;) {
++		struct nfs4_layout *lp = NULL;
++		struct inode *inode = NULL;
++		struct nfsd4_pnfs_layoutreturn lr;
++		bool empty = false;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layouts)) {
++			lp = list_entry(clp->cl_layouts.next,
++					struct nfs4_layout, lo_perclnt);
++			inode = igrab(lp->lo_file->fi_inode);
++			memset(&lr, 0, sizeof(lr));
++			lr.args.lr_return_type = RETURN_FILE;
++			lr.args.lr_seg = lp->lo_seg;
++			empty = list_empty(&lp->lo_file->fi_layouts);
++			BUG_ON(lp->lo_client != clp);
++			dequeue_layout(lp);
++			destroy_layout(lp); /* do not access lp after this */
++		}
++		spin_unlock(&layout_lock);
++		if (!lp)
++			break;
++
++		if (WARN_ON(!inode))
++			break;
++
++		dprintk("%s: inode %lu lp %p clp %p\n", __func__, inode->i_ino,
++			lp, clp);
++
++		fs_layout_return(inode->i_sb, inode, &lr, LR_FLAG_EXPIRE,
++				 empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL);
++		iput(inode);
++	}
++}
++
++struct create_recall_list_arg {
++	struct nfsd4_pnfs_cb_layout *cbl;
++	struct nfs4_file *lrfile;
++	struct list_head *todolist;
++	unsigned todo_count;
++};
++
++/*
++ * look for matching layout for the given client
++ * and add a pending layout recall to the todo list
++ * if found any.
++ * returns:
++ *   0 if layouts found or negative error.
++ */
++static int
++lo_recall_per_client(struct nfs4_client *clp, void *p)
++{
++	stateid_t lsid;
++	struct nfs4_layoutrecall *pending;
++	struct create_recall_list_arg *arg = p;
++
++	memset(&lsid, 0, sizeof(lsid));
++	if (!cl_has_layout(clp, arg->cbl, arg->lrfile, &lsid))
++		return 0;
++
++	/* Matching put done by layoutreturn */
++	pending = alloc_init_layoutrecall(arg->cbl, clp, arg->lrfile);
++	/* out of memory, drain todo queue */
++	if (!pending)
++		return -ENOMEM;
++
++	*(stateid_t *)&pending->cb.cbl_sid = lsid;
++	list_add(&pending->clr_perclnt, arg->todolist);
++	arg->todo_count++;
++	return 0;
++}
++
++/* Create a layoutrecall structure for each client based on the
++ * original structure. */
++int
++create_layout_recall_list(struct list_head *todolist, unsigned *todo_len,
++			  struct nfsd4_pnfs_cb_layout *cbl,
++			  struct nfs4_file *lrfile)
++{
++	struct nfs4_client *clp;
++	struct create_recall_list_arg arg = {
++		.cbl = cbl,
++		.lrfile = lrfile,
++		.todolist = todolist,
++	};
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* If client given by fs, just do single client */
++	if (cbl->cbl_seg.clientid) {
++		clp = find_confirmed_client(
++				(clientid_t *)&cbl->cbl_seg.clientid);
++		if (!clp) {
++			status = -ENOENT;
++			dprintk("%s: clientid %llx not found\n", __func__,
++				(unsigned long long)cbl->cbl_seg.clientid);
++			goto out;
++		}
++
++		status = lo_recall_per_client(clp, &arg);
++	} else {
++		/* Check all clients for layout matches */
++		status = filter_confirmed_clients(lo_recall_per_client, &arg);
++	}
++
++out:
++	*todo_len = arg.todo_count;
++	dprintk("%s: <-- list len %u status %d\n", __func__, *todo_len, status);
++	return status;
++}
++
++/*
++ * Recall layouts asynchronously
++ * Called with state lock.
++ */
++static int
++spawn_layout_recall(struct super_block *sb, struct list_head *todolist,
++		    unsigned todo_len)
++{
++	struct nfs4_layoutrecall *pending;
++	struct nfs4_layoutrecall *parent = NULL;
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	if (todo_len > 1) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++
++		parent = alloc_init_layoutrecall(&pending->cb, NULL,
++						 pending->clr_file);
++		if (unlikely(!parent)) {
++			/* We want forward progress. If parent cannot be
++			 * allocated take the first one as parent but don't
++			 * execute it.  Caller must check for -EAGAIN, if so
++			 * When the partial recalls return,
++			 * nfsd_layout_recall_cb should be called again.
++			 */
++			list_del_init(&pending->clr_perclnt);
++			if (todo_len > 2) {
++				parent = pending;
++			} else {
++				parent = NULL;
++				put_layoutrecall(pending);
++			}
++			--todo_len;
++				status = -ENOMEM;
++		}
++	}
++
++	while (!list_empty(todolist)) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++		list_del_init(&pending->clr_perclnt);
++		dprintk("%s: clp %p cb_client %p fp %p\n", __func__,
++			pending->clr_client,
++			pending->clr_client->cl_cb_client,
++			pending->clr_file);
++		if (unlikely(!pending->clr_client->cl_cb_client)) {
++			printk(KERN_INFO
++				"%s: clientid %08x/%08x has no callback path\n",
++				__func__,
++				pending->clr_client->cl_clientid.cl_boot,
++				pending->clr_client->cl_clientid.cl_id);
++			put_layoutrecall(pending);
++			continue;
++		}
++
++		pending->clr_time = CURRENT_TIME;
++		pending->clr_sb = sb;
++		if (parent) {
++			/* If we created a parent its initial ref count is 1.
++			 * We will need to de-ref it eventually. So we just
++			 * don't increment on behalf of the last one.
++			 */
++			if (todo_len != 1)
++				get_layoutrecall(parent);
++		}
++		pending->parent = parent;
++		get_layoutrecall(pending);
++		/* Add to list so corresponding layoutreturn can find req */
++		list_add(&pending->clr_perclnt,
++			 &pending->clr_client->cl_layoutrecalls);
++
++		nfsd4_cb_layout(pending);
++		--todo_len;
++	}
++
++	return status;
++}
++
++/*
++ * Spawn a thread to perform a recall layout
++ *
++ */
++int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,
++			  struct nfsd4_pnfs_cb_layout *cbl)
++{
++	int status;
++	struct nfs4_file *lrfile = NULL;
++	struct list_head todolist;
++	unsigned todo_len = 0;
++
++	dprintk("NFSD nfsd_layout_recall_cb: inode %p cbl %p\n", inode, cbl);
++	BUG_ON(!cbl);
++	BUG_ON(cbl->cbl_recall_type != RETURN_FILE &&
++	       cbl->cbl_recall_type != RETURN_FSID &&
++	       cbl->cbl_recall_type != RETURN_ALL);
++	BUG_ON(cbl->cbl_recall_type == RETURN_FILE && !inode);
++	BUG_ON(cbl->cbl_seg.iomode != IOMODE_READ &&
++	       cbl->cbl_seg.iomode != IOMODE_RW &&
++	       cbl->cbl_seg.iomode != IOMODE_ANY);
++
++	if (nfsd_serv == NULL) {
++		dprintk("NFSD nfsd_layout_recall_cb: nfsd_serv == NULL\n");
++		return -ENOENT;
++	}
++
++	nfs4_lock_state();
++	status = -ENOENT;
++	if (inode) {
++		lrfile = find_file(inode);
++		if (!lrfile) {
++			dprintk("NFSD nfsd_layout_recall_cb: "
++				"nfs4_file not found\n");
++			goto err;
++		}
++		if (cbl->cbl_recall_type == RETURN_FSID)
++			cbl->cbl_fsid = lrfile->fi_fsid;
++	}
++
++	INIT_LIST_HEAD(&todolist);
++
++	/* If no cookie provided by FS, return a default one */
++	if (!cbl->cbl_cookie)
++		cbl->cbl_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++
++	status = create_layout_recall_list(&todolist, &todo_len, cbl, lrfile);
++	if (list_empty(&todolist)) {
++		status = -ENOENT;
++	} else {
++		/* process todolist even if create_layout_recall_list
++		 * returned an error */
++		int status2 = spawn_layout_recall(sb, &todolist, todo_len);
++		if (status2)
++			status = status2;
++	}
++
++err:
++	nfs4_unlock_state();
++	if (lrfile)
++		put_nfs4_file(lrfile);
++	return (todo_len && status) ? -EAGAIN : status;
++}
++
++struct create_device_notify_list_arg {
++	struct list_head *todolist;
++	struct nfsd4_pnfs_cb_dev_list *ndl;
++};
++
++static int
++create_device_notify_per_cl(struct nfs4_client *clp, void *p)
++{
++	struct nfs4_notify_device *cbnd;
++	struct create_device_notify_list_arg *arg = p;
++
++	if (atomic_read(&clp->cl_deviceref) <= 0)
++		return 0;
++
++	cbnd = kmalloc(sizeof(*cbnd), GFP_KERNEL);
++	if (!cbnd)
++		return -ENOMEM;
++
++	cbnd->nd_list = arg->ndl;
++	cbnd->nd_client = clp;
++	list_add(&cbnd->nd_perclnt, arg->todolist);
++	return 0;
++}
++
++/* Create a list of clients to send device notifications. */
++int
++create_device_notify_list(struct list_head *todolist,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	int status;
++	struct create_device_notify_list_arg arg = {
++		.todolist = todolist,
++		.ndl = ndl,
++	};
++
++	nfs4_lock_state();
++	status = filter_confirmed_clients(create_device_notify_per_cl, &arg);
++	nfs4_unlock_state();
++
++	return status;
++}
++
++/*
++ * For each client that a device, send a device notification.
++ * XXX: Need to track which clients have which devices.
++ */
++int nfsd_device_notify_cb(struct super_block *sb,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	struct nfs4_notify_device *cbnd;
++	unsigned int notify_num = 0;
++	int status2, status = 0;
++	struct list_head todolist;
++
++	BUG_ON(!ndl || ndl->cbd_len == 0 || !ndl->cbd_list);
++
++	dprintk("NFSD %s: cbl %p len %u\n", __func__, ndl, ndl->cbd_len);
++
++	if (nfsd_serv == NULL)
++		return -ENOENT;
++
++	INIT_LIST_HEAD(&todolist);
++
++	status = create_device_notify_list(&todolist, ndl);
++
++	while (!list_empty(&todolist)) {
++		cbnd = list_entry(todolist.next, struct nfs4_notify_device,
++				  nd_perclnt);
++		list_del_init(&cbnd->nd_perclnt);
++		status2 = nfsd4_cb_notify_device(cbnd);
++		pnfs_clear_device_notify(cbnd->nd_client);
++		if (status2) {
++			kfree(cbnd);
++			status = status2;
++		}
++		notify_num++;
++	}
++
++	dprintk("NFSD %s: status %d clients %u\n",
++		__func__, status, notify_num);
++	return status;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-31 20:42:05.557222774 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-31 20:42:05.557222774 -0400
+@@ -0,0 +1,461 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++
++#include <linux/nfs4.h>
++#include <linux/nfsd/const.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/sunrpc/clnt.h>
++
++#include "nfsfh.h"
++#include "nfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Just use a linked list. Do not expect more than 32 dlm_device_entries
++ * the first implementation will just use one device per cluster file system
++ */
++
++static LIST_HEAD(dlm_device_list);
++static DEFINE_SPINLOCK(dlm_device_list_lock);
++
++struct dlm_device_entry {
++	struct list_head	dlm_dev_list;
++	char			disk_name[DISK_NAME_LEN];
++	int			num_ds;
++	char			ds_list[NFSD_DLM_DS_LIST_MAX];
++};
++
++static struct dlm_device_entry *
++_nfsd4_find_pnfs_dlm_device(char *disk_name)
++{
++	struct dlm_device_entry *dlm_pdev;
++
++	dprintk("--> %s  disk name %s\n", __func__, disk_name);
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
++		dprintk("%s Look for dlm_pdev %s\n", __func__,
++			dlm_pdev->disk_name);
++		if (!memcmp(dlm_pdev->disk_name, disk_name, strlen(disk_name))) {
++			spin_unlock(&dlm_device_list_lock);
++			return dlm_pdev;
++		}
++	}
++	spin_unlock(&dlm_device_list_lock);
++	return NULL;
++}
++
++static struct dlm_device_entry *
++nfsd4_find_pnfs_dlm_device(struct super_block *sb) {
++	char dname[BDEVNAME_SIZE];
++
++	bdevname(sb->s_bdev, dname);
++	return _nfsd4_find_pnfs_dlm_device(dname);
++}
++
++ssize_t
++nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen)
++{
++	char *pos = buf;
++	ssize_t size = 0;
++	struct dlm_device_entry *dlm_pdev;
++	int ret = -EINVAL;
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list)
++	{
++		int advanced;
++		advanced = snprintf(pos, buflen - size, "%s:%s\n", dlm_pdev->disk_name, dlm_pdev->ds_list);
++		if (advanced >= buflen - size)
++			goto out;
++		size += advanced;
++		pos += advanced;
++	}
++	ret = size;
++
++out:
++	spin_unlock(&dlm_device_list_lock);
++	return ret;
++}
++
++bool nfsd4_validate_pnfs_dlm_device(char *ds_list, int *num_ds)
++{
++	char *start = ds_list;
++
++	*num_ds = 0;
++
++	while (*start) {
++		struct sockaddr_storage tempAddr;
++		int ipLen = strcspn(start, ",");
++
++		if (!rpc_pton(start, ipLen, (struct sockaddr *)&tempAddr, sizeof(tempAddr)))
++			return false;
++		(*num_ds)++;
++		start += ipLen + 1;
++	}
++	return true;
++}
++
++/*
++ * pnfs_dlm_device string format:
++ *     block-device-path:<ds1 ipv4 address>,<ds2 ipv4 address>
++ *
++ * Examples
++ *     /dev/sda:192.168.1.96,192.168.1.97' creates a data server list with
++ *     two data servers for the dlm cluster file system mounted on /dev/sda.
++ *
++ *     /dev/sda:192.168.1.96,192.168.1.100'
++ *     replaces the data server list for /dev/sda
++ *
++ *     Only the deviceid == 1 is supported. Can add device id to
++ *     pnfs_dlm_device string when needed.
++ *
++ *     Only the round robin each data server once stripe index is supported.
++ */
++int
++nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
++
++{
++	struct dlm_device_entry *new, *found;
++	char *bufp = pnfs_dlm_device;
++	char *endp = bufp + strlen(bufp);
++	int err = -ENOMEM;
++
++	dprintk("--> %s len %d\n", __func__, len);
++
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return err;
++
++	err = -EINVAL;
++	/* disk_name */
++	/* FIXME: need to check for valid disk_name. search superblocks?
++	 * check for slash dev slash ?
++	 */
++	len = strcspn(bufp, ":");
++	if (len > DISK_NAME_LEN)
++		goto out_free;
++	memcpy(new->disk_name, bufp, len);
++
++	err = -EINVAL;
++	bufp += len + 1;
++	if (bufp >= endp)
++		goto out_free;
++
++	/* data server list */
++	/* FIXME: need to check for comma separated valid ip format */
++	len = strcspn(bufp, ":");
++	if (len > NFSD_DLM_DS_LIST_MAX)
++		goto out_free;
++	memcpy(new->ds_list, bufp, len);
++
++
++	/*  validate the ips */
++	if (!nfsd4_validate_pnfs_dlm_device(new->ds_list, &(new->num_ds)))
++		goto out_free;
++
++	dprintk("%s disk_name %s num_ds %d ds_list %s\n", __func__,
++		new->disk_name, new->num_ds, new->ds_list);
++
++	found = _nfsd4_find_pnfs_dlm_device(new->disk_name);
++	if (found) {
++		/* FIXME: should compare found->ds_list with new->ds_list
++		 * and if it is different, kick off a CB_NOTIFY change
++		 * deviceid.
++		 */
++		dprintk("%s pnfs_dlm_device %s:%s already in cache "
++			" replace ds_list with new ds_list %s\n", __func__,
++			found->disk_name, found->ds_list, new->ds_list);
++		memset(found->ds_list, 0, DISK_NAME_LEN);
++		memcpy(found->ds_list, new->ds_list, strlen(new->ds_list));
++		found->num_ds = new->num_ds;
++		kfree(new);
++	} else {
++		dprintk("%s Adding pnfs_dlm_device %s:%s\n", __func__,
++				new->disk_name, new->ds_list);
++		spin_lock(&dlm_device_list_lock);
++		list_add(&new->dlm_dev_list, &dlm_device_list);
++		spin_unlock(&dlm_device_list_lock);
++	}
++	dprintk("<-- %s Success\n", __func__);
++	return 0;
++
++out_free:
++	kfree(new);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++void nfsd4_pnfs_dlm_shutdown(void)
++{
++	struct dlm_device_entry *dlm_pdev, *next;
++
++	dprintk("--> %s\n", __func__);
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry_safe (dlm_pdev, next, &dlm_device_list,
++				  dlm_dev_list) {
++		list_del(&dlm_pdev->dlm_dev_list);
++		kfree(dlm_pdev);
++	}
++	spin_unlock(&dlm_device_list_lock);
++}
++
++static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
++				     u32 layout_type,
++				     struct nfsd4_pnfs_dev_iter_res *res)
++{
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		printk(KERN_ERR "%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return -ENOTSUPP;
++	}
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++	return 0;
++}
++
++static int nfsd4_pnfs_dlm_getdevinfo(struct super_block *sb,
++				     struct exp_xdr_stream *xdr,
++				     u32 layout_type,
++				     const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err, len, i = 0;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_devaddr *daddr;
++	struct dlm_device_entry *dlm_pdev;
++	char   *bufp;
++
++	err = -ENOTSUPP;
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		dprintk("%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return err;
++	}
++
++	/* We only hand out a deviceid of 1 in LAYOUTGET, so a GETDEVICEINFO
++	 * with a gdia_device_id != 1 is invalid.
++	 */
++	err = -EINVAL;
++	if (devid->devid != 1) {
++		dprintk("%s: WARNING: didn't receive a deviceid of "
++			"1 (got: 0x%llx)\n", __func__, devid->devid);
++		return err;
++	}
++
++	/*
++	 * If the DS list has not been established, return -EINVAL
++	 */
++	dlm_pdev = nfsd4_find_pnfs_dlm_device(sb);
++	if (!dlm_pdev) {
++		dprintk("%s: DEBUG: disk %s Not Found\n", __func__,
++			sb->s_bdev->bd_disk->disk_name);
++		return err;
++	}
++
++	dprintk("%s: Found disk %s with DS list |%s|\n",
++		__func__, dlm_pdev->disk_name, dlm_pdev->ds_list);
++
++	memset(&fdev, '\0', sizeof(fdev));
++	fdev.fl_device_length = dlm_pdev->num_ds;
++
++	err = -ENOMEM;
++	len = sizeof(*fdev.fl_device_list) * fdev.fl_device_length;
++	fdev.fl_device_list = kzalloc(len, GFP_KERNEL);
++	if (!fdev.fl_device_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a device list "
++			"buffer for %d DSes.\n", __func__, i);
++		fdev.fl_device_length = 0;
++		goto out;
++	}
++
++	/* Set a simple stripe indicie */
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = kzalloc(sizeof(u32) *
++				     fdev.fl_stripeindices_length, GFP_KERNEL);
++
++	if (!fdev.fl_stripeindices_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a stripeindices "
++			"list buffer for %d DSes.\n", __func__, i);
++		goto out;
++	}
++	for (i = 0; i < fdev.fl_stripeindices_length; i++)
++		fdev.fl_stripeindices_list[i] = i;
++
++	/* Transfer the data server list with a single multipath entry */
++	bufp = dlm_pdev->ds_list;
++	for (i = 0; i < fdev.fl_device_length; i++) {
++		daddr = kmalloc(sizeof(*daddr), GFP_KERNEL);
++		if (!daddr) {
++			printk(KERN_ERR "%s: ERROR: unable to kmalloc a device "
++				"addr buffer.\n", __func__);
++			goto out;
++		}
++
++		daddr->r_netid.data = "tcp";
++		daddr->r_netid.len = 3;
++
++		len = strcspn(bufp, ",");
++		daddr->r_addr.data = kmalloc(len + 4, GFP_KERNEL);
++		memcpy(daddr->r_addr.data, bufp, len);
++		/*
++		 * append the port number.  interpreted as two more bytes
++		 * beyond the quad: ".8.1" -> 0x08.0x01 -> 0x0801 = port 2049.
++		 */
++		memcpy(daddr->r_addr.data + len, ".8.1", 4);
++		daddr->r_addr.len = len + 4;
++
++		fdev.fl_device_list[i].fl_multipath_length = 1;
++		fdev.fl_device_list[i].fl_multipath_list = daddr;
++
++		dprintk("%s: encoding DS |%s|\n", __func__, bufp);
++
++		bufp += len + 1;
++	}
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	for (i = 0; i < fdev.fl_device_length; i++)
++		kfree(fdev.fl_device_list[i].fl_multipath_list);
++	kfree(fdev.fl_device_list);
++	kfree(fdev.fl_stripeindices_list);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize >= NFSSVC_MAXBLKSIZE)
++		return blocksize;
++	return NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++}
++
++/*
++ * Look up inode block device in pnfs_dlm_device list.
++ * Hash on the inode->i_ino and number of data servers.
++ */
++static int dlm_ino_hash(struct inode *ino)
++{
++	struct dlm_device_entry *de;
++	u32 hash_mask = 0;
++
++	/* If can't find the inode block device in the pnfs_dlm_deivce list
++	 * then don't hand out a layout
++	 */
++	de = nfsd4_find_pnfs_dlm_device(ino->i_sb);
++	if (!de)
++		return -1;
++	hash_mask = de->num_ds - 1;
++	return ino->i_ino & hash_mask;
++}
++
++static enum nfsstat4 nfsd4_pnfs_dlm_layoutget(struct inode *inode,
++			   struct exp_xdr_stream *xdr,
++			   const struct nfsd4_pnfs_layoutget_arg *args,
++			   struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++	int index;
++	enum nfsstat4 rc = NFS4_OK;
++
++	dprintk("%s: LAYOUT_GET\n", __func__);
++
++	/* DLM exported file systems only support layouts for READ */
++	if (res->lg_seg.iomode == IOMODE_RW)
++		return NFS4ERR_BADIOMODE;
++
++	index = dlm_ino_hash(inode);
++	dprintk("%s first stripe index %d i_ino %lu\n", __func__, index,
++		inode->i_ino);
++	if (index < 0)
++		return NFS4ERR_LAYOUTUNAVAILABLE;
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	/* Always give out whole file layouts */
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	/* Always give out READ ONLY layouts */
++	res->lg_seg.iomode = IOMODE_READ;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = false;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = args->lg_sbid;
++	layout->device_id.devid = 1;                                /*FSFTEMP*/
++	layout->lg_first_stripe_index = index;                      /*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	memcpy(fhp, args->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++/* For use by DLM cluster file systems exported by pNFSD */
++const struct pnfs_export_operations pnfs_dlm_export_ops = {
++	.layout_type = nfsd4_pnfs_dlm_layouttype,
++	.get_device_info = nfsd4_pnfs_dlm_getdevinfo,
++	.get_device_iter = nfsd4_pnfs_dlm_getdeviter,
++	.layout_get = nfsd4_pnfs_dlm_layoutget,
++};
++EXPORT_SYMBOL(pnfs_dlm_export_ops);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-31 20:42:05.558141620 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-31 20:42:05.558141620 -0400
+@@ -0,0 +1,620 @@
++/*
++*  linux/fs/nfsd/nfs4pnfsds.c
++*
++*  Copyright (c) 2005 The Regents of the University of Michigan.
++*  All rights reserved.
++*
++*  Andy Adamson <andros@umich.edu>
++*
++*  Redistribution and use in source and binary forms, with or without
++*  modification, are permitted provided that the following conditions
++*  are met:
++*
++*  1. Redistributions of source code must retain the above copyright
++*     notice, this list of conditions and the following disclaimer.
++*  2. Redistributions in binary form must reproduce the above copyright
++*     notice, this list of conditions and the following disclaimer in the
++*     documentation and/or other materials provided with the distribution.
++*  3. Neither the name of the University nor the names of its
++*     contributors may be used to endorse or promote products derived
++*     from this software without specific prior written permission.
++*
++*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*
++*/
++#if defined(CONFIG_PNFSD)
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++#include <linux/param.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/debug.h>
++#include <linux/nfs4.h>
++#include <linux/exportfs.h>
++#include <linux/sched.h>
++
++#include "nfsd.h"
++#include "pnfsd.h"
++#include "state.h"
++
++/*
++ *******************
++ *   	 PNFS
++ *******************
++ */
++/*
++ * Hash tables for pNFS Data Server state
++ *
++ * mds_nodeid:	list of struct pnfs_mds_id one per Metadata server (MDS) using
++ *		this data server (DS).
++ *
++ * mds_clid_hashtbl[]: uses clientid_hashval(), hash of all clientids obtained
++ *			from any MDS.
++ *
++ * ds_stid_hashtbl[]: uses stateid_hashval(), hash of all stateids obtained
++ *			from any MDS.
++ *
++ */
++/* Hash tables for clientid state */
++#define CLIENT_HASH_BITS                 4
++#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
++#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
++
++#define clientid_hashval(id) \
++	((id) & CLIENT_HASH_MASK)
++
++/* hash table for pnfs_ds_stateid */
++#define STATEID_HASH_BITS              10
++#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
++#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
++
++#define stateid_hashval(owner_id, file_id)  \
++	(((owner_id) + (file_id)) & STATEID_HASH_MASK)
++
++static struct list_head mds_id_tbl;
++static struct list_head mds_clid_hashtbl[CLIENT_HASH_SIZE];
++static struct list_head ds_stid_hashtbl[STATEID_HASH_SIZE];
++
++static inline void put_ds_clientid(struct pnfs_ds_clientid *dcp);
++static inline void put_ds_mdsid(struct pnfs_mds_id *mdp);
++
++/* Mutex for data server state.  Needs to be separate from
++ * mds state mutex since a node can be both mds and ds */
++static DEFINE_MUTEX(ds_mutex);
++static struct thread_info *ds_mutex_owner;
++
++static void
++ds_lock_state(void)
++{
++	mutex_lock(&ds_mutex);
++	ds_mutex_owner = current_thread_info();
++}
++
++static void
++ds_unlock_state(void)
++{
++	BUG_ON(ds_mutex_owner != current_thread_info());
++	ds_mutex_owner = NULL;
++	mutex_unlock(&ds_mutex);
++}
++
++static int
++cmp_clid(const clientid_t *cl1, const clientid_t *cl2)
++{
++	return (cl1->cl_boot == cl2->cl_boot) &&
++	       (cl1->cl_id == cl2->cl_id);
++}
++
++void
++nfs4_pnfs_state_init(void)
++{
++	int i;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&mds_clid_hashtbl[i]);
++
++	for (i = 0; i < STATEID_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&ds_stid_hashtbl[i]);
++
++	INIT_LIST_HEAD(&mds_id_tbl);
++}
++
++static struct pnfs_mds_id *
++find_pnfs_mds_id(u32 mdsid)
++{
++	struct pnfs_mds_id *local = NULL;
++
++	dprintk("pNFSD: %s\n", __func__);
++	list_for_each_entry(local, &mds_id_tbl, di_hash) {
++		if (local->di_mdsid == mdsid)
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_clientid *
++find_pnfs_ds_clientid(const clientid_t *clid)
++{
++	struct pnfs_ds_clientid *local = NULL;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = clientid_hashval(clid->cl_id);
++	list_for_each_entry(local, &mds_clid_hashtbl[hashval], dc_hash) {
++		if (cmp_clid(&local->dc_mdsclid, clid))
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_stateid *
++find_pnfs_ds_stateid(stateid_t *stid)
++{
++	struct pnfs_ds_stateid *local = NULL;
++	u32 st_id = stid->si_stateownerid;
++	u32 f_id = stid->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_for_each_entry(local, &ds_stid_hashtbl[hashval], ds_hash)
++		if ((local->ds_stid.si_stateownerid == st_id) &&
++				(local->ds_stid.si_fileid == f_id) &&
++				(local->ds_stid.si_boot == stid->si_boot)) {
++			stateid_t *sid = &local->ds_stid;
++			dprintk("NFSD: %s <-- %p ds_flags %lx " STATEID_FMT "\n",
++				__func__, local, local->ds_flags,
++				STATEID_VAL(sid));
++			return local;
++		}
++	return NULL;
++}
++
++static void
++release_ds_mdsid(struct kref *kref)
++{
++	struct pnfs_mds_id *mdp =
++		container_of(kref, struct pnfs_mds_id, di_ref);
++	dprintk("pNFSD: %s\n", __func__);
++
++	list_del(&mdp->di_hash);
++	list_del(&mdp->di_mdsclid);
++	kfree(mdp);
++}
++
++static void
++release_ds_clientid(struct kref *kref)
++{
++	struct pnfs_ds_clientid *dcp =
++		container_of(kref, struct pnfs_ds_clientid, dc_ref);
++	struct pnfs_mds_id *mdp;
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(dcp->dc_mdsid);
++	if (mdp)
++		put_ds_mdsid(mdp);
++
++	list_del(&dcp->dc_hash);
++	list_del(&dcp->dc_stateid);
++	list_del(&dcp->dc_permdsid);
++	kfree(dcp);
++}
++
++static void
++release_ds_stateid(struct kref *kref)
++{
++	struct pnfs_ds_stateid *dsp =
++		container_of(kref, struct pnfs_ds_stateid, ds_ref);
++	struct pnfs_ds_clientid *dcp;
++	dprintk("pNFS %s: dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid(&dsp->ds_mdsclid);
++	if (dcp)
++		put_ds_clientid(dcp);
++
++	list_del(&dsp->ds_hash);
++	list_del(&dsp->ds_perclid);
++	kfree(dsp);
++}
++
++static inline void
++put_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_put(&dcp->dc_ref, release_ds_clientid);
++}
++
++static inline void
++get_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_get(&dcp->dc_ref);
++}
++
++static inline void
++put_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_put(&mdp->di_ref, release_ds_mdsid);
++}
++
++static inline void
++get_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_get(&mdp->di_ref);
++}
++
++static inline void
++put_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_put(&dsp->ds_ref, release_ds_stateid);
++}
++
++static inline void
++get_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_get(&dsp->ds_ref);
++}
++
++void
++nfs4_pnfs_state_shutdown(void)
++{
++	struct pnfs_ds_stateid *dsp;
++	int i;
++
++	dprintk("pNFSD %s: -->\n", __func__);
++
++	ds_lock_state();
++	for (i = 0; i < STATEID_HASH_SIZE; i++) {
++		while (!list_empty(&ds_stid_hashtbl[i])) {
++			dsp = list_entry(ds_stid_hashtbl[i].next,
++					 struct pnfs_ds_stateid, ds_hash);
++			put_ds_stateid(dsp);
++		}
++	}
++	ds_unlock_state();
++}
++
++static struct pnfs_mds_id *
++alloc_init_mds_id(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = kmalloc(sizeof(*mdp), GFP_KERNEL);
++	if (!mdp)
++		return NULL;
++	INIT_LIST_HEAD(&mdp->di_hash);
++	INIT_LIST_HEAD(&mdp->di_mdsclid);
++	list_add(&mdp->di_hash, &mds_id_tbl);
++	mdp->di_mdsid = gsp->dsid;
++	mdp->di_mdsboot = 0;
++	kref_init(&mdp->di_ref);
++	return mdp;
++}
++
++static struct pnfs_ds_clientid *
++alloc_init_ds_clientid(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++	struct pnfs_ds_clientid *dcp;
++	clientid_t *clid = (clientid_t *)&gsp->clid;
++	unsigned int hashval = clientid_hashval(clid->cl_id);
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(gsp->dsid);
++	if (!mdp) {
++		mdp = alloc_init_mds_id(gsp);
++		if (!mdp)
++			return NULL;
++	} else {
++		get_ds_mdsid(mdp);
++	}
++
++	dcp = kmalloc(sizeof(*dcp), GFP_KERNEL);
++	if (!dcp)
++		return NULL;
++
++	INIT_LIST_HEAD(&dcp->dc_hash);
++	INIT_LIST_HEAD(&dcp->dc_stateid);
++	INIT_LIST_HEAD(&dcp->dc_permdsid);
++	list_add(&dcp->dc_hash, &mds_clid_hashtbl[hashval]);
++	list_add(&dcp->dc_permdsid, &mdp->di_mdsclid);
++	dcp->dc_mdsclid = *clid;
++	kref_init(&dcp->dc_ref);
++	dcp->dc_mdsid = gsp->dsid;
++	return dcp;
++}
++
++static struct pnfs_ds_stateid *
++alloc_init_ds_stateid(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct pnfs_ds_stateid *dsp;
++	u32 st_id = stidp->si_stateownerid;
++	u32 f_id  = stidp->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	dsp = kmalloc(sizeof(*dsp), GFP_KERNEL);
++	if (!dsp)
++		return dsp;
++
++	INIT_LIST_HEAD(&dsp->ds_hash);
++	INIT_LIST_HEAD(&dsp->ds_perclid);
++	memcpy(&dsp->ds_stid, stidp, sizeof(stateid_t));
++	fh_copy_shallow(&dsp->ds_fh, &cfh->fh_handle);
++	dsp->ds_access = 0;
++	dsp->ds_status = 0;
++	dsp->ds_flags = 0L;
++	kref_init(&dsp->ds_ref);
++	set_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	clear_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	init_waitqueue_head(&dsp->ds_waitq);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_add(&dsp->ds_hash, &ds_stid_hashtbl[hashval]);
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++static int
++update_ds_stateid(struct pnfs_ds_stateid *dsp, struct svc_fh *cfh,
++		  struct pnfs_get_state *gsp)
++{
++	struct pnfs_ds_clientid *dcp;
++	int new = 0;
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid((clientid_t *)&gsp->clid);
++	if (!dcp) {
++		dcp = alloc_init_ds_clientid(gsp);
++		if (!dcp)
++			return 1;
++		new = 1;
++	}
++	if (test_bit(DS_STATEID_NEW, &dsp->ds_flags)) {
++		list_add(&dsp->ds_perclid, &dcp->dc_stateid);
++		if (!new)
++			get_ds_clientid(dcp);
++	}
++
++	memcpy(&dsp->ds_stid, &gsp->stid, sizeof(stateid_t));
++	dsp->ds_access = gsp->access;
++	dsp->ds_status = 0;
++	dsp->ds_verifier[0] = gsp->verifier[0];
++	dsp->ds_verifier[1] = gsp->verifier[1];
++	memcpy(&dsp->ds_mdsclid, &gsp->clid, sizeof(clientid_t));
++	set_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	clear_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	return 0;
++}
++
++int
++nfs4_pnfs_cb_change_state(struct pnfs_get_state *gs)
++{
++	stateid_t *stid = (stateid_t *)&gs->stid;
++	struct pnfs_ds_stateid *dsp;
++
++	dprintk("pNFSD: %s stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stid));
++
++	ds_lock_state();
++	dsp = find_pnfs_ds_stateid(stid);
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	if (dsp)
++		return 0;
++	return -ENOENT;
++}
++
++/* Retrieves and validates stateid.
++ * If stateid exists and its fields match, return it.
++ * If stateid exists but either the generation or
++ * ownerids don't match, check with mds to see if it is valid.
++ * If the stateid doesn't exist, the first thread creates a
++ * invalid *marker* stateid, then checks to see if the
++ * stateid exists on the mds.  If so, it validates the *marker*
++ * stateid and updates its fields.  Subsequent threads that
++ * find the *marker* stateid wait until it is valid or an error
++ * occurs.
++ * Called with ds_state_lock.
++ */
++static struct pnfs_ds_stateid *
++nfsv4_ds_get_state(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct inode *ino = cfh->fh_dentry->d_inode;
++	struct super_block *sb;
++	struct pnfs_ds_stateid *dsp = NULL;
++	struct pnfs_get_state gs = {
++		.access = 0,
++	};
++	int status = 0, waiter = 0;
++
++	dprintk("pNFSD: %s -->\n", __func__);
++
++	dsp = find_pnfs_ds_stateid(stidp);
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags) &&
++	    (stidp->si_generation == dsp->ds_stid.si_generation))
++		goto out_noput;
++
++	sb = ino->i_sb;
++	if (!sb || !sb->s_pnfs_op->get_state)
++		goto out_noput;
++
++	/* Uninitialize current state if it exists yet it doesn't match.
++	 * If it is already invalid, another thread is checking state */
++	if (dsp) {
++		if (!test_and_clear_bit(DS_STATEID_VALID, &dsp->ds_flags))
++			waiter = 1;
++	} else {
++		dsp = alloc_init_ds_stateid(cfh, stidp);
++		if (!dsp)
++			goto out_noput;
++	}
++
++	dprintk("pNFSD: %s Starting loop\n", __func__);
++	get_ds_stateid(dsp);
++	while (!test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		ds_unlock_state();
++
++		/* Another thread is checking the state */
++		if (waiter) {
++			dprintk("pNFSD: %s waiting\n", __func__);
++			wait_event_interruptible_timeout(dsp->ds_waitq,
++				(test_bit(DS_STATEID_VALID, &dsp->ds_flags) ||
++				 test_bit(DS_STATEID_ERROR, &dsp->ds_flags)),
++				 msecs_to_jiffies(1024));
++			dprintk("pNFSD: %s awake\n", __func__);
++			ds_lock_state();
++			if (test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++				goto out;
++
++			continue;
++		}
++
++		/* Validate stateid on mds */
++		dprintk("pNFSD: %s Checking state on MDS\n", __func__);
++		memcpy(&gs.stid, stidp, sizeof(stateid_t));
++		status = sb->s_pnfs_op->get_state(ino, &cfh->fh_handle, &gs);
++		dprintk("pNFSD: %s from MDS status %d\n", __func__, status);
++		ds_lock_state();
++		/* if !status and stateid is valid, update id and mark valid */
++		if (status || update_ds_stateid(dsp, cfh, &gs)) {
++			set_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++			/* remove invalid stateid from list */
++			put_ds_stateid(dsp);
++			wake_up(&dsp->ds_waitq);
++			goto out;
++		}
++
++		wake_up(&dsp->ds_waitq);
++	}
++out:
++	if (dsp)
++		put_ds_stateid(dsp);
++out_noput:
++	if (dsp)
++		dprintk("pNFSD: %s <-- dsp %p ds_flags %lx " STATEID_FMT "\n",
++			__func__, dsp, dsp->ds_flags, STATEID_VAL(&dsp->ds_stid));
++	/* If error, return null */
++	if (dsp && test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++		dsp = NULL;
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++int
++nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *cfh, stateid_t *stateid)
++{
++	struct pnfs_ds_stateid *dsp;
++	int status = 0;
++
++	dprintk("pNFSD: %s --> " STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	/* Must release state lock while verifying stateid on mds */
++	nfs4_unlock_state();
++	ds_lock_state();
++	dsp = nfsv4_ds_get_state(cfh, stateid);
++	if (dsp) {
++		get_ds_stateid(dsp);
++		dprintk("pNFSD: %s Found " STATEID_FMT "\n", __func__,
++			STATEID_VAL(&dsp->ds_stid));
++
++		dprintk("NFSD: %s: dsp %p fh_size %u:%u "
++			"fh [%08x:%08x:%08x:%08x]:[%08x:%08x:%08x:%08x] "
++			"gen %x:%x\n",
++			__func__, dsp,
++			cfh->fh_handle.fh_size, dsp->ds_fh.fh_size,
++			((unsigned *)&cfh->fh_handle.fh_base)[0],
++			((unsigned *)&cfh->fh_handle.fh_base)[1],
++			((unsigned *)&cfh->fh_handle.fh_base)[2],
++			((unsigned *)&cfh->fh_handle.fh_base)[3],
++			((unsigned *)&dsp->ds_fh.fh_base)[0],
++			((unsigned *)&dsp->ds_fh.fh_base)[1],
++			((unsigned *)&dsp->ds_fh.fh_base)[2],
++			((unsigned *)&dsp->ds_fh.fh_base)[3],
++			stateid->si_generation, dsp->ds_stid.si_generation);
++	}
++
++	if (!dsp ||
++	    (cfh->fh_handle.fh_size != dsp->ds_fh.fh_size) ||
++	    (memcmp(&cfh->fh_handle.fh_base, &dsp->ds_fh.fh_base,
++		    dsp->ds_fh.fh_size) != 0) ||
++	    (stateid->si_generation > dsp->ds_stid.si_generation))
++		status = nfserr_bad_stateid;
++	else if (stateid->si_generation < dsp->ds_stid.si_generation)
++		status = nfserr_old_stateid;
++
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++	nfs4_lock_state();
++	dprintk("pNFSD: %s <-- status %d\n", __func__, be32_to_cpu(status));
++	return status;
++}
++
++void
++nfs4_ds_get_verifier(stateid_t *stateid, struct super_block *sb, u32 *p)
++{
++	struct pnfs_ds_stateid *dsp = NULL;
++
++	dprintk("pNFSD: %s --> stid %p\n", __func__, stateid);
++
++	ds_lock_state();
++	if (stateid != NULL) {
++		dsp = find_pnfs_ds_stateid(stateid);
++		if (dsp)
++			get_ds_stateid(dsp);
++	}
++
++	/* XXX: Should we fetch the stateid or wait if some other
++	 * thread is currently retrieving the stateid ? */
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		*p++ = dsp->ds_verifier[0];
++		*p++ = dsp->ds_verifier[1];
++		put_ds_stateid(dsp);
++	} else {
++		/* must be on MDS */
++		ds_unlock_state();
++		sb->s_pnfs_op->get_verifier(sb, p);
++		ds_lock_state();
++		p += 2;
++	}
++	ds_unlock_state();
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-31 20:41:19.198160463 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-31 20:42:05.559129617 -0400
+@@ -34,10 +34,14 @@
+  */
+ #include <linux/file.h>
+ #include <linux/slab.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "cache.h"
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_PROC
+ 
+@@ -372,6 +376,24 @@ nfsd4_open(struct svc_rqst *rqstp, struc
+ 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ 	 */
+ 	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
++#if defined(CONFIG_SPNFS)
++	if (!status && spnfs_enabled()) {
++		struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
++
++		status = spnfs_open(inode, open);
++		if (status) {
++			dprintk(
++			     "nfsd: pNFS could not be enabled for inode: %lu\n",
++			     inode->i_ino);
++			/*
++			 * XXX When there's a failure then need to indicate to
++			 * future ops that no pNFS is available.  Should I save
++			 * the status in the inode?  It's kind of a big hammer.
++			 * But there may be no stripes available?
++			 */
++		}
++	}
++#endif /* CONFIG_SPNFS */
+ out:
+ 	if (open->op_stateowner) {
+ 		nfs4_get_stateowner(open->op_stateowner);
+@@ -454,16 +476,30 @@ nfsd4_access(struct svc_rqst *rqstp, str
+ 			   &access->ac_supported);
+ }
+ 
++static void
++nfsd4_get_verifier(struct super_block *sb, nfs4_verifier *verf)
++{
++	u32 *p = (u32 *)verf->data;
++
++#if defined(CONFIG_PNFSD)
++	if (sb->s_pnfs_op && sb->s_pnfs_op->get_verifier) {
++		nfs4_ds_get_verifier(NULL, sb, p);
++		return;
++	}
++#endif /* CONFIG_PNFSD */
++
++	*p++ = nfssvc_boot.tv_sec;
++	*p++ = nfssvc_boot.tv_usec;
++}
++
+ static __be32
+ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	     struct nfsd4_commit *commit)
+ {
+ 	__be32 status;
+ 
+-	u32 *p = (u32 *)commit->co_verf.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+-
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &commit->co_verf);
+ 	status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+ 			     commit->co_count);
+ 	if (status == nfserr_symlink)
+@@ -816,7 +852,6 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ {
+ 	stateid_t *stateid = &write->wr_stateid;
+ 	struct file *filp = NULL;
+-	u32 *p;
+ 	__be32 status = nfs_ok;
+ 	unsigned long cnt;
+ 
+@@ -838,13 +873,49 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ 
+ 	cnt = write->wr_buflen;
+ 	write->wr_how_written = write->wr_stable_how;
+-	p = (u32 *)write->wr_verifier.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+ 
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &write->wr_verifier);
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(cstate->current_fh.fh_dentry->d_inode, 0)) {
++                status = bl_layoutrecall(cstate->current_fh.fh_dentry->d_inode,
++		    RETURN_FILE, write->wr_offset, write->wr_buflen);
++                if (!status) {
++                        status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++                }
++        } else
++#endif
++		
++	if (spnfs_enabled()) {
++		status = spnfs_write(cstate->current_fh.fh_dentry->d_inode,
++			write->wr_offset, write->wr_buflen, write->wr_vlen,
++			rqstp);
++		if (status == nfs_ok) {
++			/* DMXXX: HACK to get filesize set */
++			/* write one byte at offset+length-1 */
++			struct kvec k[1];
++			char zero = 0;
++			unsigned long cnt = 1;
++
++			k[0].iov_base = (void *)&zero;
++			k[0].iov_len = 1;
++			nfsd_write(rqstp, &cstate->current_fh, filp,
++				   write->wr_offset+write->wr_buflen-1, k, 1,
++				   &cnt, &write->wr_how_written);
++		}
++	} else /* we're not an MDS */
++		status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++#else
+ 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
+ 			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+ 			     &cnt, &write->wr_how_written);
++#endif /* CONFIG_SPNFS */
++
+ 	if (filp)
+ 		fput(filp);
+ 
+@@ -935,6 +1006,306 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+ 	return status == nfserr_same ? nfs_ok : status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++static __be32
++nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
++		    unsigned int layout_type)
++{
++	int status, type;
++
++	/* check to see if pNFS  is supported. */
++	status = nfserr_layoutunavailable;
++	if (exp && exp->ex_pnfs == 0) {
++		dprintk("%s: Underlying file system "
++			"is not exported over pNFS\n", __func__);
++		goto out;
++	}
++	if (!sb->s_pnfs_op || !sb->s_pnfs_op->layout_type) {
++		dprintk("%s: Underlying file system "
++			"does not support pNFS\n", __func__);
++		goto out;
++	}
++
++	type = sb->s_pnfs_op->layout_type(sb);
++
++	/* check to see if requested layout type is supported. */
++	status = nfserr_unknown_layouttype;
++	if (!type)
++		dprintk("BUG: %s: layout_type 0 is reserved and must not be "
++			"used by filesystem\n", __func__);
++	else if (type != layout_type)
++		dprintk("%s: requested layout type %d "
++		       "does not match supported type %d\n",
++			__func__, layout_type, type);
++	else
++		status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevlist(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevlist *gdlp)
++{
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++	int status;
++
++	dprintk("%s: type %u maxdevices %u cookie %llu verf %llu\n",
++		__func__, gdlp->gd_layout_type, gdlp->gd_maxdevices,
++		gdlp->gd_cookie, gdlp->gd_verf);
++
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* We must be able to encode at list one device */
++	if (!gdlp->gd_maxdevices)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     gdlp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Do nothing if underlying file system does not support
++	 * getdevicelist */
++	if (!sb->s_pnfs_op->get_device_iter) {
++		status = nfserr_notsupp;
++		goto out;
++	}
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdlp->gd_fhp = &cstate->current_fh;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevinfo(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevinfo *gdp)
++{
++	struct super_block *sb;
++	int status;
++	clientid_t clid;
++
++	dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
++	       __func__, gdp->gd_layout_type, gdp->gd_devid.sbid,
++	       gdp->gd_devid.devid, gdp->gd_maxcount);
++
++	status = nfserr_inval;
++	sb = find_sbid_id(gdp->gd_devid.sbid);
++	dprintk("%s: sb %p\n", __func__, sb);
++	if (!sb) {
++		status = nfserr_noent;
++		goto out;
++	}
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, NULL, gdp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdp->gd_sb = sb;
++
++	/* Update notifications */
++	copy_clientid(&clid, cstate->session);
++	pnfs_set_device_notify(&clid, gdp->gd_notify_types);
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutget(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutget *lgp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lgp->lg_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_badiomode;
++	if (lgp->lg_seg.iomode != IOMODE_READ &&
++	    lgp->lg_seg.iomode != IOMODE_RW) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lgp->lg_seg.iomode);
++		goto out;
++	}
++
++	/* Set up arguments so layout can be retrieved at encode time */
++	lgp->lg_fhp = current_fh;
++	copy_clientid((clientid_t *)&lgp->lg_seg.clientid, cstate->session);
++	status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutcommit(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	int status;
++	struct inode *ino = NULL;
++	struct iattr ia;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	dprintk("NFSD: nfsd4_layoutcommit \n");
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	ino = current_fh->fh_dentry->d_inode;
++	if (!ino)
++		goto out;
++
++	status = nfserr_inval;
++	sb = ino->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lcp->args.lc_seg.layout_type);
++	if (status)
++		goto out;
++
++	/* This will only extend the file length.  Do a quick
++	 * check to see if there is any point in waiting for the update
++	 * locks.
++	 * TODO: Is this correct for all back ends?
++	 */
++	dprintk("%s:new offset: %d new size: %llu old size: %lld\n",
++		__func__, lcp->args.lc_newoffset, lcp->args.lc_last_wr + 1,
++		ino->i_size);
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lcp->args.lc_seg.clientid, cstate->session);
++	lcp->res.lc_size_chg = 0;
++	if (sb->s_pnfs_op->layout_commit) {
++		status = sb->s_pnfs_op->layout_commit(ino, &lcp->args, &lcp->res);
++		dprintk("%s:layout_commit result %d\n", __func__, status);
++	} else {
++		fh_lock(current_fh);
++		if ((lcp->args.lc_newoffset == 0) ||
++		    ((lcp->args.lc_last_wr + 1) <= ino->i_size)) {
++			status = 0;
++			lcp->res.lc_size_chg = 0;
++			fh_unlock(current_fh);
++			goto out;
++		}
++
++		/* Try our best to update the file size */
++		dprintk("%s: Modifying file size\n", __func__);
++		ia.ia_valid = ATTR_SIZE;
++		ia.ia_size = lcp->args.lc_last_wr + 1;
++		status = notify_change(current_fh->fh_dentry, &ia);
++		fh_unlock(current_fh);
++		dprintk("%s:notify_change result %d\n", __func__, status);
++	}
++
++	if (!status && lcp->res.lc_size_chg &&
++	    EX_ISSYNC(current_fh->fh_export)) {
++		dprintk("%s: Synchronously writing inode size %llu\n",
++			__func__, ino->i_size);
++		write_inode_now(ino, 1);
++		lcp->res.lc_newsize = i_size_read(ino);
++	}
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutreturn(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lrp->args.lr_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	if (lrp->args.lr_return_type != RETURN_FILE &&
++	    lrp->args.lr_return_type != RETURN_FSID &&
++	    lrp->args.lr_return_type != RETURN_ALL) {
++		dprintk("pNFS %s: invalid return_type %d\n", __func__,
++			lrp->args.lr_return_type);
++		goto out;
++	}
++
++	status = nfserr_inval;
++	if (lrp->args.lr_seg.iomode != IOMODE_READ &&
++	    lrp->args.lr_seg.iomode != IOMODE_RW &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lrp->args.lr_seg.iomode);
++		goto out;
++	}
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lrp->args.lr_seg.clientid, cstate->session);
++	lrp->lrs_present = (lrp->args.lr_return_type == RETURN_FILE);
++	status = nfs4_pnfs_return_layout(sb, current_fh, lrp);
++out:
++	dprintk("pNFS %s: status %d return_type 0x%x lrs_present %d\n",
++		__func__, status, lrp->args.lr_return_type, lrp->lrs_present);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * NULL call.
+  */
+@@ -1317,6 +1688,29 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH,
+ 		.op_name = "OP_RECLAIM_COMPLETE",
+ 	},
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICELIST] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevlist,
++		.op_name = "OP_GETDEVICELIST",
++	},
++	[OP_GETDEVICEINFO] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevinfo,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_GETDEVICEINFO",
++	},
++	[OP_LAYOUTGET] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutget,
++		.op_name = "OP_LAYOUTGET",
++	},
++	[OP_LAYOUTCOMMIT] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
++		.op_name = "OP_LAYOUTCOMMIT",
++	},
++	[OP_LAYOUTRETURN] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
++		.op_name = "OP_LAYOUTRETURN",
++	},
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-31 20:41:19.200150153 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-31 20:42:05.561202607 -0400
+@@ -42,6 +42,8 @@
+ #include "xdr4.h"
+ #include "vfs.h"
+ 
++#include "pnfsd.h"
++
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+@@ -60,8 +62,6 @@ static u64 current_sessionid = 1;
+ #define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+ 
+ /* forward declarations */
+-static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+-static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+ static void nfs4_set_recdir(char *recdir);
+ 
+@@ -69,6 +69,7 @@ static void nfs4_set_recdir(char *recdir
+ 
+ /* Currently used for almost all code touching nfsv4 state: */
+ static DEFINE_MUTEX(client_mutex);
++struct task_struct *client_mutex_owner;
+ 
+ /*
+  * Currently used for the del_recall_lru and file hash table.  In an
+@@ -86,11 +87,21 @@ void
+ nfs4_lock_state(void)
+ {
+ 	mutex_lock(&client_mutex);
++	client_mutex_owner = current;
++}
++
++#define BUG_ON_UNLOCKED_STATE() BUG_ON(client_mutex_owner != current)
++
++void
++nfs4_bug_on_unlocked_state(void)
++{
++	BUG_ON(client_mutex_owner != current);
+ }
+ 
+ void
+ nfs4_unlock_state(void)
+ {
++	client_mutex_owner = NULL;
+ 	mutex_unlock(&client_mutex);
+ }
+ 
+@@ -109,7 +120,7 @@ opaque_hashval(const void *ptr, int nbyt
+ 
+ static struct list_head del_recall_lru;
+ 
+-static inline void
++inline void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+ 	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+@@ -120,7 +131,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ 	}
+ }
+ 
+-static inline void
++inline void
+ get_nfs4_file(struct nfs4_file *fi)
+ {
+ 	atomic_inc(&fi->fi_ref);
+@@ -230,7 +241,10 @@ nfs4_close_delegation(struct nfs4_delega
+ 	 * but we want to remove the lease in any case. */
+ 	if (dp->dl_flock)
+ 		vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(filp);
++	nfs4_lock_state();
+ }
+ 
+ /* Called under the state lock. */
+@@ -266,8 +280,8 @@ static DEFINE_SPINLOCK(client_lock);
+  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+  * used in reboot/reset lease grace period processing
+  *
+- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+- * setclientid_confirmed info. 
++ * conf_id_hashtbl[], and conf_str_hashtbl[] hold
++ * confirmed setclientid_confirmed info.
+  *
+  * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+  * setclientid info.
+@@ -292,6 +306,7 @@ static void unhash_generic_stateid(struc
+ 	list_del(&stp->st_hash);
+ 	list_del(&stp->st_perfile);
+ 	list_del(&stp->st_perstateowner);
++	release_pnfs_ds_dev_list(stp);
+ }
+ 
+ static void free_generic_stateid(struct nfs4_stateid *stp)
+@@ -345,7 +360,10 @@ static void release_open_stateid(struct 
+ {
+ 	unhash_generic_stateid(stp);
+ 	release_stateid_lockowners(stp);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(stp->st_vfs_file);
++	nfs4_lock_state();
+ 	free_generic_stateid(stp);
+ }
+ 
+@@ -739,6 +757,8 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
++	BUG_ON_UNLOCKED_STATE();
++
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -758,6 +778,7 @@ expire_client(struct nfs4_client *clp)
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
++	pnfs_expire_client(clp);
+ 	nfsd4_set_callback_client(clp, NULL);
+ 	if (clp->cl_cb_conn.cb_xprt)
+ 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+@@ -770,6 +791,13 @@ expire_client(struct nfs4_client *clp)
+ 	spin_unlock(&client_lock);
+ }
+ 
++void expire_client_lock(struct nfs4_client *clp)
++{
++	nfs4_lock_state();
++	expire_client(clp);
++	nfs4_unlock_state();
++}
++
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+ {
+ 	memcpy(target->cl_verifier.data, source->data,
+@@ -859,6 +887,11 @@ static struct nfs4_client *create_client
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++	INIT_LIST_HEAD(&clp->cl_layoutrecalls);
++	atomic_set(&clp->cl_deviceref, 0);
++#endif /* CONFIG_PNFSD */
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
+ 	clp->cl_time = get_seconds();
+@@ -908,7 +941,7 @@ move_to_confirmed(struct nfs4_client *cl
+ 	renew_client(clp);
+ }
+ 
+-static struct nfs4_client *
++struct nfs4_client *
+ find_confirmed_client(clientid_t *clid)
+ {
+ 	struct nfs4_client *clp;
+@@ -978,6 +1011,24 @@ find_unconfirmed_client_by_str(const cha
+ 	return NULL;
+ }
+ 
++int
++filter_confirmed_clients(int (* func)(struct nfs4_client *, void *),
++			 void *arg)
++{
++	struct nfs4_client *clp, *next;
++	int i, status = 0;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		list_for_each_entry_safe (clp, next, &conf_str_hashtbl[i],
++					  cl_strhash) {
++			status = func(clp, arg);
++			if (status)
++				break;
++		}
++
++	return status;
++}
++
+ static void
+ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
+ {
+@@ -1110,8 +1161,12 @@ nfsd4_replay_cache_entry(struct nfsd4_co
+ static void
+ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
+ {
+-	/* pNFS is not supported */
++#if defined(CONFIG_PNFSD)
++	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
++				  EXCHGID4_FLAG_USE_PNFS_DS;
++#else  /* CONFIG_PNFSD */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
++#endif /* CONFIG_PNFSD */
+ 
+ 	/* Referrals are supported, Migration is not. */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
+@@ -1301,6 +1356,13 @@ nfsd4_create_session(struct svc_rqst *rq
+ 	struct nfsd4_clid_slot *cs_slot = NULL;
+ 	int status = 0;
+ 
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	/* XXX hack to get local ip address */
++	memcpy(&pnfsd_lexp_addr, &rqstp->rq_xprt->xpt_local,
++		sizeof(pnfsd_lexp_addr));
++	pnfs_lexp_addr_len = rqstp->rq_xprt->xpt_locallen;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
+ 	nfs4_lock_state();
+ 	unconf = find_unconfirmed_client(&cr_ses->clientid);
+ 	conf = find_confirmed_client(&cr_ses->clientid);
+@@ -1340,25 +1402,26 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(rqstp->rq_xprt);
+-			rpc_copy_addr(
+-				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+-				sa);
+-			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+-			unconf->cl_cb_conn.cb_minorversion =
+-				cstate->minorversion;
+-			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+-			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+-		}
++		if (is_ds_only_session(unconf->cl_exchange_flags))
++			cr_ses->flags &= ~SESSION4_BACK_CHAN;
++
+ 		conf = unconf;
+ 	} else {
+ 		status = nfserr_stale_clientid;
+ 		goto out;
+ 	}
+ 
++	if (cr_ses->flags & SESSION4_BACK_CHAN) {
++		conf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++		svc_xprt_get(rqstp->rq_xprt);
++		rpc_copy_addr((struct sockaddr *)&conf->cl_cb_conn.cb_addr, sa);
++		conf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
++		conf->cl_cb_conn.cb_minorversion = cstate->minorversion;
++		conf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
++		conf->cl_cb_seq_nr = 1;
++		nfsd4_probe_callback(conf, &conf->cl_cb_conn);
++	}
++
+ 	/*
+ 	 * We do not support RDMA or persistent sessions
+ 	 */
+@@ -1746,7 +1809,7 @@ out:
+ 
+ /* OPEN Share state helper functions */
+ static inline struct nfs4_file *
+-alloc_init_file(struct inode *ino)
++alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
+ {
+ 	struct nfs4_file *fp;
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1760,6 +1823,16 @@ alloc_init_file(struct inode *ino)
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++#if defined(CONFIG_PNFSD)
++		INIT_LIST_HEAD(&fp->fi_layouts);
++		INIT_LIST_HEAD(&fp->fi_layout_states);
++		fp->fi_fsid.major = current_fh->fh_export->ex_fsid;
++		fp->fi_fsid.minor = 0;
++		fp->fi_fhlen = current_fh->fh_handle.fh_size;
++		BUG_ON(fp->fi_fhlen > sizeof(fp->fi_fhval));
++		memcpy(fp->fi_fhval, &current_fh->fh_handle.fh_base,
++		       fp->fi_fhlen);
++#endif /* CONFIG_PNFSD */
+ 		spin_lock(&recall_lock);
+ 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ 		spin_unlock(&recall_lock);
+@@ -1768,7 +1841,7 @@ alloc_init_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
+-static void
++void
+ nfsd4_free_slab(struct kmem_cache **slab)
+ {
+ 	if (*slab == NULL)
+@@ -1784,6 +1857,7 @@ nfsd4_free_slabs(void)
+ 	nfsd4_free_slab(&file_slab);
+ 	nfsd4_free_slab(&stateid_slab);
+ 	nfsd4_free_slab(&deleg_slab);
++	nfsd4_free_pnfs_slabs();
+ }
+ 
+ static int
+@@ -1805,6 +1879,8 @@ nfsd4_init_slabs(void)
+ 			sizeof(struct nfs4_delegation), 0, 0, NULL);
+ 	if (deleg_slab == NULL)
+ 		goto out_nomem;
++	if (nfsd4_init_pnfs_slabs())
++		goto out_nomem;
+ 	return 0;
+ out_nomem:
+ 	nfsd4_free_slabs();
+@@ -1878,6 +1954,9 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners);
+ 	INIT_LIST_HEAD(&stp->st_perfile);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+@@ -1919,6 +1998,7 @@ find_openstateowner_str(unsigned int has
+ {
+ 	struct nfs4_stateowner *so = NULL;
+ 
++	BUG_ON_UNLOCKED_STATE();
+ 	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ 		if (same_owner_str(so, &open->op_owner, &open->op_clientid))
+ 			return so;
+@@ -1927,7 +2007,7 @@ find_openstateowner_str(unsigned int has
+ }
+ 
+ /* search file_hashtbl[] for file */
+-static struct nfs4_file *
++struct nfs4_file *
+ find_file(struct inode *ino)
+ {
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1945,6 +2025,18 @@ find_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
++struct nfs4_file *
++find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
++{
++	struct nfs4_file *fp;
++
++	fp = find_file(ino);
++	if (fp)
++		return fp;
++
++	return alloc_init_file(ino, current_fh);
++}
++
+ static inline int access_valid(u32 x, u32 minorversion)
+ {
+ 	if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
+@@ -2503,7 +2595,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+ 			goto out;
+ 		status = nfserr_resource;
+-		fp = alloc_init_file(ino);
++		fp = alloc_init_file(ino, current_fh);
+ 		if (fp == NULL)
+ 			goto out;
+ 	}
+@@ -2730,7 +2822,7 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ 	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
+ }
+ 
+-static int
++int
+ STALE_STATEID(stateid_t *stateid)
+ {
+ 	if (stateid->si_boot == boot_time)
+@@ -2740,6 +2832,16 @@ STALE_STATEID(stateid_t *stateid)
+ 	return 1;
+ }
+ 
++__be32
++nfs4_check_stateid(stateid_t *stateid)
++{
++	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++		return nfserr_bad_stateid;
++	if (STALE_STATEID(stateid))
++		return nfserr_stale_stateid;
++	return 0;
++}
++
+ static inline int
+ access_permit_read(unsigned long access_bmap)
+ {
+@@ -2848,6 +2950,24 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	if (grace_disallows_io(ino))
+ 		return nfserr_grace;
+ 
++#if defined(CONFIG_PNFSD)
++	if (pnfs_fh_is_ds(&current_fh->fh_handle)) {
++		if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++			status = nfserr_bad_stateid;
++		else
++#ifdef CONFIG_GFS2_FS_LOCKING_DLM
++		{
++			dprintk("%s Don't check DS stateid\n", __func__);
++			return 0;
++		}
++#else /* CONFIG_GFS2_FS_LOCKING_DLM */
++			status = nfs4_preprocess_pnfs_ds_stateid(current_fh,
++								 stateid);
++#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
++		goto out;
++	}
++#endif /* CONFIG_PNFSD */
++
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 
+@@ -2924,13 +3044,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 	*stpp = NULL;
+ 	*sopp = NULL;
+ 
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+-		dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
+-		return nfserr_bad_stateid;
+-	}
+-
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		return status;
+ 
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+@@ -3205,11 +3321,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 	nfs4_lock_state();
+-	status = nfserr_bad_stateid;
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+-		goto out;
+-	status = nfserr_stale_stateid;
+-	if (STALE_STATEID(stateid))
++	status = nfs4_check_stateid(stateid);
++	if (status)
+ 		goto out;
+ 	status = nfserr_bad_stateid;
+ 	if (!is_delegation_stateid(stateid))
+@@ -3238,26 +3351,6 @@ out:
+ #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
+ #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+ 
+-static inline u64
+-end_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	end = start + len;
+-	return end >= start ? end: NFS4_MAX_UINT64;
+-}
+-
+-/* last octet in a range */
+-static inline u64
+-last_byte_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	BUG_ON(!len);
+-	end = start + len;
+-	return end > start ? end - 1: NFS4_MAX_UINT64;
+-}
+-
+ #define lockownerid_hashval(id) \
+         ((id) & LOCK_HASH_MASK)
+ 
+@@ -3274,7 +3367,7 @@ static struct list_head lock_ownerid_has
+ static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+ 
+-static struct nfs4_stateid *
++struct nfs4_stateid *
+ find_stateid(stateid_t *stid, int flags)
+ {
+ 	struct nfs4_stateid *local;
+@@ -3303,7 +3396,7 @@ find_stateid(stateid_t *stid, int flags)
+ 	return NULL;
+ }
+ 
+-static struct nfs4_delegation *
++struct nfs4_delegation *
+ find_delegation_stateid(struct inode *ino, stateid_t *stid)
+ {
+ 	struct nfs4_file *fp;
+@@ -3436,6 +3529,9 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	INIT_LIST_HEAD(&stp->st_perfile);
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+@@ -3998,6 +4094,9 @@ nfs4_state_init(void)
+ 	INIT_LIST_HEAD(&client_lru);
+ 	INIT_LIST_HEAD(&del_recall_lru);
+ 	reclaim_str_hashtbl_size = 0;
++#if defined(CONFIG_PNFSD)
++	nfs4_pnfs_state_init();
++#endif /* CONFIG_PNFSD */
+ 	return 0;
+ }
+ 
+@@ -4110,6 +4209,7 @@ __nfs4_state_shutdown(void)
+ 	}
+ 
+ 	nfsd4_shutdown_recdir();
++	nfs4_pnfs_state_shutdown();
+ 	nfs4_init = 0;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-31 20:41:19.202150173 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-31 20:42:05.563232916 -0400
+@@ -47,9 +47,14 @@
+ #include <linux/nfsd_idmap.h>
+ #include <linux/nfs4_acl.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -1234,6 +1239,138 @@ nfsd4_decode_sequence(struct nfsd4_compo
+ 	DECODE_TAIL;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static __be32
++nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16 + sizeof(nfs4_verifier));
++	READ32(gdevl->gd_layout_type);
++	READ32(gdevl->gd_maxdevices);
++	READ64(gdevl->gd_cookie);
++	COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	u32 num;
++	DECODE_HEAD;
++
++	READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
++	READ64(gdev->gd_devid.sbid);
++	READ64(gdev->gd_devid.devid);
++	READ32(gdev->gd_layout_type);
++	READ32(gdev->gd_maxcount);
++	READ32(num);
++	if (num) {
++		READ_BUF(4);
++		READ32(gdev->gd_notify_types);
++	} else {
++		gdev->gd_notify_types = 0;
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_layoutget *lgp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(36);
++	READ32(lgp->lg_signal);
++	READ32(lgp->lg_seg.layout_type);
++	READ32(lgp->lg_seg.iomode);
++	READ64(lgp->lg_seg.offset);
++	READ64(lgp->lg_seg.length);
++	READ64(lgp->lg_minlength);
++	nfsd4_decode_stateid(argp, &lgp->lg_sid);
++	READ_BUF(4);
++	READ32(lgp->lg_maxcount);
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	DECODE_HEAD;
++	u32 timechange;
++
++	READ_BUF(20);
++	READ64(lcp->args.lc_seg.offset);
++	READ64(lcp->args.lc_seg.length);
++	READ32(lcp->args.lc_reclaim);
++	nfsd4_decode_stateid(argp, &lcp->lc_sid);
++	READ_BUF(4);
++	READ32(lcp->args.lc_newoffset);
++	if (lcp->args.lc_newoffset) {
++		READ_BUF(8);
++		READ64(lcp->args.lc_last_wr);
++	} else
++		lcp->args.lc_last_wr = 0;
++	READ_BUF(4);
++	READ32(timechange);
++	if (timechange) {
++		READ_BUF(12);
++		READ64(lcp->args.lc_mtime.seconds);
++		READ32(lcp->args.lc_mtime.nseconds);
++	} else {
++		lcp->args.lc_mtime.seconds = 0;
++		lcp->args.lc_mtime.nseconds = 0;
++	}
++	READ_BUF(8);
++	READ32(lcp->args.lc_seg.layout_type);
++	/* XXX: saving XDR'ed layout update. Since we don't have the
++	 * current_fh yet, and therefore no export_ops, we can't call
++	 * the layout specific decode routines. File and pVFS2
++	 * do not use the layout update....
++	 */
++	READ32(lcp->args.lc_up_len);
++	if (lcp->args.lc_up_len > 0) {
++		READ_BUF(lcp->args.lc_up_len);
++		READMEM(lcp->args.lc_up_layout, lcp->args.lc_up_len);
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16);
++	READ32(lrp->args.lr_reclaim);
++	READ32(lrp->args.lr_seg.layout_type);
++	READ32(lrp->args.lr_seg.iomode);
++	READ32(lrp->args.lr_return_type);
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		READ_BUF(16);
++		READ64(lrp->args.lr_seg.offset);
++		READ64(lrp->args.lr_seg.length);
++		nfsd4_decode_stateid(argp, &lrp->lr_sid);
++		READ_BUF(4);
++		READ32(lrp->args.lrf_body_len);
++		if (lrp->args.lrf_body_len > 0) {
++			READ_BUF(lrp->args.lrf_body_len);
++			READMEM(lrp->args.lrf_body, lrp->args.lrf_body_len);
++		}
++	}
++
++	DECODE_TAIL;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+ {
+@@ -1335,11 +1472,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+@@ -2136,6 +2281,36 @@ out_acl:
+ 		}
+ 		WRITE64(stat.ino);
+ 	}
++#if defined(CONFIG_PNFSD)
++	if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
++		struct super_block *sb = dentry->d_inode->i_sb;
++		int type = 0;
++
++		/* Query the filesystem for supported pNFS layout types.
++		 * Currently, we only support one layout type per file system.
++		 * The export_ops->layout_type() returns the pnfs_layouttype4.
++		 */
++		buflen -= 4;
++		if (buflen < 0)		/* length */
++			goto out_resource;
++
++		if (sb && sb->s_pnfs_op && sb->s_pnfs_op->layout_type)
++			type = sb->s_pnfs_op->layout_type(sb);
++		if (type) {
++			if ((buflen -= 4) < 0)	/* type */
++				goto out_resource;
++			WRITE32(1); 	/* length */
++			WRITE32(type);  /* type */
++		} else
++			WRITE32(0);  /* length */
++	}
++
++	if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		if ((buflen -= 4) < 0)
++			goto out_resource;
++		WRITE32(stat.blksize);
++	}
++#endif /* CONFIG_PNFSD */
+ 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ 		WRITE32(3);
+ 		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+@@ -2366,6 +2541,10 @@ nfsd4_encode_commit(struct nfsd4_compoun
+ 	if (!nfserr) {
+ 		RESERVE_SPACE(8);
+ 		WRITEMEM(commit->co_verf.data, 8);
++		dprintk("NFSD: nfsd4_encode_commit: verifier %x:%x\n",
++			((u32 *)(&commit->co_verf.data))[0],
++			((u32 *)(&commit->co_verf.data))[1]);
++
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -2620,9 +2799,20 @@ nfsd4_encode_read(struct nfsd4_compoundr
+ 	}
+ 	read->rd_vlen = v;
+ 
++#if defined(CONFIG_SPNFS)
++	if (spnfs_enabled())
++		nfserr = spnfs_read(read->rd_fhp->fh_dentry->d_inode,
++				    read->rd_offset, &maxcount, read->rd_vlen,
++				    resp->rqstp);
++	else /* we're not an MDS */
++		nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
++			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
++			&maxcount);
++#else
+ 	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ 			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ 			&maxcount);
++#endif /* CONFIG_SPNFS */
+ 
+ 	if (nfserr == nfserr_symlink)
+ 		nfserr = nfserr_inval;
+@@ -2926,6 +3116,9 @@ nfsd4_encode_write(struct nfsd4_compound
+ 		WRITE32(write->wr_bytes_written);
+ 		WRITE32(write->wr_how_written);
+ 		WRITEMEM(write->wr_verifier.data, 8);
++		dprintk("NFSD: nfsd4_encode_write: verifier %x:%x\n",
++			((u32 *)(&write->wr_verifier.data))[0],
++			((u32 *)(&write->wr_verifier.data))[1]);
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -3069,6 +3262,343 @@ nfsd4_encode_sequence(struct nfsd4_compo
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/* Uses the export interface to iterate through the available devices
++ * and encodes them on the response stream.
++ */
++static  __be32
++nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
++			      struct nfsd4_pnfs_getdevlist *gdevl,
++			      unsigned int *dev_count)
++{
++	struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
++	__be32 nfserr;
++	int status;
++	__be32 *p;
++	struct nfsd4_pnfs_dev_iter_res res = {
++		.gd_cookie = gdevl->gd_cookie,
++		.gd_verf = gdevl->gd_verf,
++		.gd_eof = 0
++	};
++	u64 sbid;
++
++	dprintk("%s: Begin\n", __func__);
++
++	sbid = find_create_sbid(sb);
++	*dev_count = 0;
++	do {
++		status = sb->s_pnfs_op->get_device_iter(sb,
++							gdevl->gd_layout_type,
++							&res);
++		if (status) {
++			if (status == -ENOENT) {
++				res.gd_eof = 1;
++				/* return success */
++				break;
++			}
++			nfserr = nfserrno(status);
++			goto out_err;
++		}
++
++		/* Encode device id and layout type */
++		RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
++		WRITE64((__be64)sbid);
++		WRITE64(res.gd_devid);	/* devid minor */
++		ADJUST_ARGS();
++		(*dev_count)++;
++	} while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
++	gdevl->gd_cookie = res.gd_cookie;
++	gdevl->gd_verf = res.gd_verf;
++	gdevl->gd_eof = res.gd_eof;
++	nfserr = nfs_ok;
++out_err:
++	dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
++	return nfserr;
++}
++
++/* Encodes the response of get device list.
++*/
++static __be32
++nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	unsigned int dev_count = 0, lead_count;
++	u32 *p_in = resp->p;
++	__be32 *p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	/* Ensure we have room for cookie, verifier, and devlist len,
++	 * which we will backfill in after we encode as many devices as possible
++	 */
++	lead_count = 8 + sizeof(nfs4_verifier) + 4;
++	RESERVE_SPACE(lead_count);
++	/* skip past these values */
++	p += XDR_QUADLEN(lead_count);
++	ADJUST_ARGS();
++
++	/* Iterate over as many device ids as possible on the xdr stream */
++	nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
++	if (nfserr)
++		goto out_err;
++
++	/* Backfill in cookie, verf and number of devices encoded */
++	p = p_in;
++	WRITE64(gdevl->gd_cookie);
++	WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++	WRITE32(dev_count);
++
++	/* Skip over devices */
++	p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
++	ADJUST_ARGS();
++
++	/* are we at the end of devices? */
++	RESERVE_SPACE(4);
++	WRITE32(gdevl->gd_eof);
++	ADJUST_ARGS();
++
++	dprintk("%s: done.\n", __func__);
++
++	nfserr = nfs_ok;
++out:
++	return nfserr;
++out_err:
++	p = p_in;
++	ADJUST_ARGS();
++	goto out;
++}
++
++/* For a given device id, have the file system retrieve and encode the
++ * associated device.  For file layout, the encoding function is
++ * passed down to the file system.  The file system then has the option
++ * of using this encoding function or one of its own.
++ *
++ * Note: the file system must return the XDR size of struct device_addr4
++ * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
++ * gdir_mincount calculation.
++ */
++static __be32
++nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	struct super_block *sb;
++	int maxcount = 0, type_notify_len = 12;
++	__be32 *p, *p_save = NULL, *p_in = resp->p;
++	struct exp_xdr_stream xdr;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = gdev->gd_sb;
++
++	if (gdev->gd_maxcount != 0) {
++		/* FIXME: this will be bound by the session max response */
++		maxcount = svc_max_payload(resp->rqstp);
++		if (maxcount > gdev->gd_maxcount)
++			maxcount = gdev->gd_maxcount;
++
++		/* Ensure have room for type and notify field */
++		maxcount -= type_notify_len;
++		if (maxcount < 0) {
++			nfserr = -ETOOSMALL;
++			goto toosmall;
++		}
++	}
++
++	RESERVE_SPACE(4);
++	WRITE32(gdev->gd_layout_type);
++	ADJUST_ARGS();
++
++	/* If maxcount is 0 then just update notifications */
++	if (gdev->gd_maxcount == 0)
++		goto handle_notifications;
++
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
++						&gdev->gd_devid);
++	if (nfserr)
++		goto err;
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++	BUG_ON(xdr.p > xdr.end);
++
++	/* Update the xdr stream with the number of bytes encoded
++	 * by the file system.
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++handle_notifications:
++	/* Encode supported device notifications */
++	RESERVE_SPACE(4);
++	if (sb->s_pnfs_op->set_device_notify) {
++		struct pnfs_devnotify_arg dn_args;
++
++		dn_args.dn_layout_type = gdev->gd_layout_type;
++		dn_args.dn_devid = gdev->gd_devid;
++		dn_args.dn_notify_types = gdev->gd_notify_types;
++		nfserr = sb->s_pnfs_op->set_device_notify(sb, &dn_args);
++		if (nfserr)
++			goto err;
++		WRITE32(dn_args.dn_notify_types);
++	} else {
++		WRITE32(0);
++	}
++	ADJUST_ARGS();
++
++out:
++	return nfserrno(nfserr);
++toosmall:
++	dprintk("%s: maxcount too small\n", __func__);
++	RESERVE_SPACE(4);
++	WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
++	ADJUST_ARGS();
++	goto out;
++err:
++	/* Rewind to the beginning */
++	p = p_in;
++	ADJUST_ARGS();
++	if (nfserr == -ETOOSMALL)
++		goto toosmall;
++	printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
++	goto out;
++}
++
++static __be32
++nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
++		       __be32 nfserr,
++		       struct nfsd4_pnfs_layoutget *lgp)
++{
++	int maxcount, leadcount;
++	struct super_block *sb;
++	struct exp_xdr_stream xdr;
++	__be32 *p, *p_save, *p_start = resp->p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = lgp->lg_fhp->fh_dentry->d_inode->i_sb;
++	maxcount = PAGE_SIZE;
++	if (maxcount > lgp->lg_maxcount)
++		maxcount = lgp->lg_maxcount;
++
++	/* Check for space on xdr stream */
++	leadcount = 36 + sizeof(stateid_opaque_t);
++	RESERVE_SPACE(leadcount);
++	/* encode layout metadata after file system encodes layout */
++	p += XDR_QUADLEN(leadcount);
++	ADJUST_ARGS();
++
++	/* Ensure have room for ret_on_close, off, len, iomode, type */
++	maxcount -= leadcount;
++	if (maxcount < 0) {
++		printk(KERN_ERR "%s: buffer too small\n", __func__);
++		nfserr = nfserr_toosmall;
++		goto err;
++	}
++
++	/* Set xdr info so file system can encode layout */
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	/* Retrieve, encode, and merge layout; process stateid */
++	nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
++	if (nfserr)
++		goto err;
++
++	/* Ensure file system returned enough bytes for the client
++	 * to access.
++	 */
++	if (lgp->lg_seg.length < lgp->lg_minlength) {
++		nfserr = nfserr_badlayout;
++		goto err;
++	}
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++
++	/* Rewind to beginning and encode attrs */
++	resp->p = p_start;
++	RESERVE_SPACE(4);
++	WRITE32(lgp->lg_roc);	/* return on close */
++	ADJUST_ARGS();
++	nfsd4_encode_stateid(resp, &lgp->lg_sid);
++	RESERVE_SPACE(28);
++	/* Note: response logr_layout array count, always one for now */
++	WRITE32(1);
++	WRITE64(lgp->lg_seg.offset);
++	WRITE64(lgp->lg_seg.length);
++	WRITE32(lgp->lg_seg.iomode);
++	WRITE32(lgp->lg_seg.layout_type);
++
++	/* Update the xdr stream with the number of bytes written
++	 * by the file system
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++	return nfs_ok;
++err:
++	resp->p = p_start;
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lcp->res.lc_size_chg);
++	ADJUST_ARGS();
++	if (lcp->res.lc_size_chg) {
++		RESERVE_SPACE(8);
++		WRITE64(lcp->res.lc_newsize);
++		ADJUST_ARGS();
++	}
++out:
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lrp->lrs_present != 0);    /* got stateid? */
++	ADJUST_ARGS();
++	if (lrp->lrs_present)
++		nfsd4_encode_stateid(resp, &lrp->lr_sid);
++out:
++	return nfserr;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+ {
+@@ -3129,11 +3659,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-31 20:41:19.203150982 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-31 20:42:05.565212801 -0400
+@@ -13,10 +13,15 @@
+ #include <linux/nfsd/syscall.h>
+ #include <linux/lockd/lockd.h>
+ #include <linux/sunrpc/clnt.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "nfsd.h"
+ #include "cache.h"
+ 
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ /*
+  *	We have a single directory with 9 nodes in it.
+  */
+@@ -49,6 +54,9 @@ enum {
+ 	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
++#ifdef CONFIG_PNFSD
++	NFSD_pnfs_dlm_device,
++#endif
+ };
+ 
+ /*
+@@ -74,6 +82,9 @@ static ssize_t write_leasetime(struct fi
+ static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
++#ifdef CONFIG_PNFSD
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size);
++#endif
+ 
+ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ 	[NFSD_Svc] = write_svc,
+@@ -96,6 +107,9 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
++#ifdef CONFIG_PNFSD
++	[NFSD_pnfs_dlm_device] = write_pnfs_dlm_device,
++#endif
+ };
+ 
+ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+@@ -1349,6 +1363,68 @@ static ssize_t write_recoverydir(struct 
+ 
+ #endif
+ 
++#ifdef CONFIG_PNFSD
++
++static ssize_t __write_pnfs_dlm_device(struct file *file, char *buf,
++				       size_t size)
++{
++	char *mesg = buf;
++	char *pnfs_dlm_device;
++	int max_size = NFSD_PNFS_DLM_DEVICE_MAX;
++	int len, ret = 0;
++
++	if (size > 0) {
++		ret = -EINVAL;
++		if (size > max_size || buf[size-1] != '\n')
++			return ret;
++		buf[size-1] = 0;
++
++		pnfs_dlm_device = mesg;
++		len = qword_get(&mesg, pnfs_dlm_device, size);
++		if (len <= 0)
++			return ret;
++
++		ret = nfsd4_set_pnfs_dlm_device(pnfs_dlm_device, len);
++	} else
++		return nfsd4_get_pnfs_dlm_device_list(buf, SIMPLE_TRANSACTION_LIMIT);
++
++	return ret <= 0 ? ret : strlen(buf);
++}
++
++/**
++ * write_pnfs_dlm_device - Set or report the current pNFS data server list
++ *
++ * Input:
++ *			buf:		ignored
++ *			size:		zero
++ *
++ * OR
++ *
++ * Input:
++ *			buf:		C string containing a block device name,
++ *					a colon, and then a comma separated
++ *					list of pNFS data server IPv4 addresses
++ *			size:		non-zero length of C string in @buf
++ * Output:
++ *	On success:	passed-in buffer filled with '\n'-terminated C
++ *			string containing a block device name, a colon, and
++ *			then a comma separated list of pNFS
++ *			data server IPv4 addresses.
++ *			return code is the size in bytes of the string
++ *	On error:	return code is a negative errno value
++ */
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __write_pnfs_dlm_device(file, buf, size);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
++}
++
++#endif /* CONFIG_PNFSD */
++
+ /*----------------------------------------------------------------------------*/
+ /*
+  *	populating the filesystem.
+@@ -1383,6 +1459,10 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
++#ifdef CONFIG_PNFSD
++		[NFSD_pnfs_dlm_device] = {"pnfs_dlm_device", &transaction_ops,
++					   S_IWUSR|S_IRUSR},
++#endif
+ 		/* last one */ {""}
+ 	};
+ 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
+@@ -1421,6 +1501,9 @@ static int create_proc_exports_entry(voi
+ }
+ #endif
+ 
++#if defined(CONFIG_SPNFS_BLOCK)
++int nfsd_bl_init(void);
++#endif
+ static int __init init_nfsd(void)
+ {
+ 	int retval;
+@@ -1443,6 +1526,15 @@ static int __init init_nfsd(void)
+ 	retval = create_proc_exports_entry();
+ 	if (retval)
+ 		goto out_free_idmap;
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	retval = spnfs_init_proc();
++	if (retval != 0)
++		goto out_free_idmap;
++#if defined(CONFIG_SPNFS_BLOCK)
++	nfsd_bl_init();
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ 	retval = register_filesystem(&nfsd_fs_type);
+ 	if (retval)
+ 		goto out_free_all;
+@@ -1465,7 +1557,22 @@ out_free_stat:
+ 
+ static void __exit exit_nfsd(void)
+ {
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	remove_proc_entry("fs/nfs/spnfs/recall", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/getfh", NULL);
++	remove_proc_entry("fs/nfs/spnfs/config", NULL);
++	remove_proc_entry("fs/nfs/spnfs/ctl", NULL);
++	remove_proc_entry("fs/nfs/spnfs", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutsegsize", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS_LAYOUTSEGMENTS */
++
+ 	nfsd_export_shutdown();
++	nfsd4_pnfs_dlm_shutdown();
+ 	nfsd_reply_cache_shutdown();
+ 	remove_proc_entry("fs/nfs/exports", NULL);
+ 	remove_proc_entry("fs/nfs", NULL);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-31 20:41:19.204160960 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-31 20:42:05.565212801 -0400
+@@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD0
+ 
++#if defined(CONFIG_PNFSD)
++#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
++	(NFSD4_SUPPORTED_ATTRS_WORD1 | FATTR4_WORD1_FS_LAYOUT_TYPES)
++#else /* CONFIG_PNFSD */
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD1
++#endif /* CONFIG_PNFSD */
+ 
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
++	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT | \
++	 FATTR4_WORD2_LAYOUT_BLKSIZE)
+ 
+ static inline u32 nfsd_suppattrs0(u32 minorversion)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-31 20:42:05.566222921 -0400
+@@ -10,6 +10,7 @@
+ #include <linux/exportfs.h>
+ 
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
+ #include "nfsd.h"
+ #include "vfs.h"
+ #include "auth.h"
+@@ -139,6 +140,7 @@ static inline __be32 check_pseudo_root(s
+ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ 	struct knfsd_fh	*fh = &fhp->fh_handle;
++	int fsid_type;
+ 	struct fid *fid = NULL, sfid;
+ 	struct svc_export *exp;
+ 	struct dentry *dentry;
+@@ -159,7 +161,8 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 			return error;
+ 		if (fh->fh_auth_type != 0)
+ 			return error;
+-		len = key_len(fh->fh_fsid_type) / 4;
++		fsid_type = pnfs_fh_fsid_type(fh);
++		len = key_len(fsid_type) / 4;
+ 		if (len == 0)
+ 			return error;
+ 		if  (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+@@ -172,7 +175,7 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 		data_left -= len;
+ 		if (data_left < 0)
+ 			return error;
+-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
++		exp = rqst_exp_find(rqstp, fsid_type, fh->fh_auth);
+ 		fid = (struct fid *)(fh->fh_auth + len);
+ 	} else {
+ 		__u32 tfh[2];
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-31 20:42:05.567233002 -0400
+@@ -14,6 +14,7 @@ enum nfsd_fsid {
+ 	FSID_UUID8,
+ 	FSID_UUID16,
+ 	FSID_UUID16_INUM,
++	FSID_MAX
+ };
+ 
+ enum fsid_source {
+@@ -205,4 +206,42 @@ fh_unlock(struct svc_fh *fhp)
+ 	}
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/*
++ * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
++ * to a DS by LAYOUTGET.  nfs4_preprocess_stateid_op() uses this to decide how
++ * to handle a given stateid.
++ */
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return fh->fh_fsid_type >= FSID_MAX;
++}
++
++static inline void pnfs_fh_mark_ds(struct knfsd_fh *fh)
++{
++	BUG_ON(fh->fh_version != 1);
++	BUG_ON(pnfs_fh_is_ds(fh));
++	fh->fh_fsid_type += FSID_MAX;
++}
++
++#else  /* CONFIG_PNFSD */
++
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return 0;
++}
++
++#endif /* CONFIG_PNFSD */
++
++/* allows fh_verify() to check the real fsid_type (i.e., not overloaded). */
++static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
++{
++	int fsid_type = fh->fh_fsid_type;
++
++	if (pnfs_fh_is_ds(fh))
++		return fsid_type - FSID_MAX;
++	return fsid_type;
++}
++
+ #endif /* _LINUX_NFSD_FH_INT_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-31 20:41:17.274232911 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-31 20:42:05.568144414 -0400
+@@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
+ 
+ };
+ 
+-u32 nfsd_supported_minorversion;
++u32 nfsd_supported_minorversion = NFSD_SUPPORTED_MINOR_VERSION;
+ 
+ int nfsd_vers(int vers, enum vers_op change)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-31 20:42:05.569090615 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-31 20:42:05.569090615 -0400
+@@ -0,0 +1,143 @@
++/*
++ *  Copyright (c) 2005 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef LINUX_NFSD_PNFSD_H
++#define LINUX_NFSD_PNFSD_H
++
++#include <linux/list.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#include "state.h"
++#include "xdr4.h"
++
++/* outstanding layout stateid */
++struct nfs4_layout_state {
++	struct list_head	ls_perfile;
++	struct list_head	ls_layouts; /* list of nfs4_layouts */
++	struct kref		ls_ref;
++	struct nfs4_client	*ls_client;
++	struct nfs4_file	*ls_file;
++	stateid_t		ls_stateid;
++};
++
++/* outstanding layout */
++struct nfs4_layout {
++	struct list_head		lo_perfile;	/* hash by f_id */
++	struct list_head		lo_perclnt;	/* hash by clientid */
++	struct list_head		lo_perstate;
++	struct nfs4_file		*lo_file;	/* backpointer */
++	struct nfs4_client		*lo_client;
++	struct nfs4_layout_state	*lo_state;
++	struct nfsd4_layout_seg 	lo_seg;
++};
++
++struct pnfs_inval_state {
++	struct knfsd_fh		mdsfh; /* needed only by invalidate all */
++	stateid_t		stid;
++	clientid_t		clid;
++	u32			status;
++};
++
++/* pNFS Data Server state */
++#define DS_STATEID_VALID   0
++#define DS_STATEID_ERROR   1
++#define DS_STATEID_NEW     2
++
++struct pnfs_ds_stateid {
++	struct list_head	ds_hash;        /* ds_stateid hash entry */
++	struct list_head	ds_perclid;     /* per client hash entry */
++	stateid_t		ds_stid;
++	struct knfsd_fh		ds_fh;
++	unsigned long		ds_access;
++	u32			ds_status;      /* from MDS */
++	u32			ds_verifier[2]; /* from MDS */
++	wait_queue_head_t	ds_waitq;
++	unsigned long		ds_flags;
++	struct kref		ds_ref;
++	clientid_t		ds_mdsclid;
++};
++
++struct pnfs_ds_clientid {
++	struct list_head	dc_hash;        /* mds_clid_hashtbl entry */
++	struct list_head	dc_stateid;     /* ds_stateid head */
++	struct list_head	dc_permdsid;    /* per mdsid hash entry */
++	clientid_t		dc_mdsclid;
++	struct kref		dc_ref;
++	uint32_t		dc_mdsid;
++};
++
++struct pnfs_mds_id {
++	struct list_head	di_hash;        /* mds_nodeid list entry */
++	struct list_head	di_mdsclid;     /* mds_clientid head */
++	uint32_t		di_mdsid;
++	time_t			di_mdsboot;	/* mds boot time */
++	struct kref		di_ref;
++};
++
++/* notify device request (from exported filesystem) */
++struct nfs4_notify_device {
++	struct nfsd4_pnfs_cb_dev_list  *nd_list;
++	struct nfs4_client	       *nd_client;
++	struct list_head	        nd_perclnt;
++
++	void				*nd_args;	/* nfsd internal */
++};
++
++u64 find_create_sbid(struct super_block *);
++struct super_block *find_sbid_id(u64);
++__be32 nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
++int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
++					struct nfsd4_pnfs_layoutreturn *);
++int nfs4_pnfs_cb_get_state(struct super_block *, struct pnfs_get_state *);
++int nfs4_pnfs_cb_change_state(struct pnfs_get_state *);
++void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++int put_layoutrecall(struct nfs4_layoutrecall *);
++void nomatching_layout(struct nfs4_layoutrecall *);
++void *layoutrecall_done(struct nfs4_layoutrecall *);
++int nfsd4_cb_layout(struct nfs4_layoutrecall *);
++int nfsd_layout_recall_cb(struct super_block *, struct inode *,
++			  struct nfsd4_pnfs_cb_layout *);
++int nfsd_device_notify_cb(struct super_block *,
++			  struct nfsd4_pnfs_cb_dev_list *);
++int nfsd4_cb_notify_device(struct nfs4_notify_device *);
++void pnfs_set_device_notify(clientid_t *, unsigned int types);
++void pnfs_clear_device_notify(struct nfs4_client *);
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++extern struct sockaddr pnfsd_lexp_addr;
++extern size_t pnfs_lexp_addr_len;
++
++extern void pnfsd_lexp_init(struct inode *);
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#endif /* LINUX_NFSD_PNFSD_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-31 20:42:05.569090615 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-31 20:42:05.569090615 -0400
+@@ -0,0 +1,225 @@
++/*
++ * linux/fs/nfsd/pnfs_lexp.c
++ *
++ * pNFS export of local filesystems.
++ *
++ * Export local file systems over the files layout type.
++ * The MDS (metadata server) functions also as a single DS (data server).
++ * This is mostly useful for development and debugging purposes.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Copyright (C) 2008 Benny Halevy, <bhalevy@panasas.com>
++ *
++ * Initial implementation was based on the pnfs-gfs2 patches done
++ * by David M. Richter <richterd@citi.umich.edu>
++ */
++
++#include <linux/sunrpc/svc_xprt.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++struct sockaddr pnfsd_lexp_addr;
++size_t pnfs_lexp_addr_len;
++
++static int
++pnfsd_lexp_layout_type(struct super_block *sb)
++{
++	int ret = LAYOUT_NFSV4_1_FILES;
++	dprintk("<-- %s: return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++pnfsd_lexp_get_device_iter(struct super_block *sb,
++			   u32 layout_type,
++			   struct nfsd4_pnfs_dev_iter_res *res)
++{
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++
++	dprintk("<-- %s: return 0\n", __func__);
++	return 0;
++}
++
++static int
++pnfsd_lexp_get_device_info(struct super_block *sb,
++			   struct exp_xdr_stream *xdr,
++			   u32 layout_type,
++			   const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_multipath fl_devices[1];
++	u32 fl_stripe_indices[1] = { 0 };
++	struct pnfs_filelayout_devaddr daddr;
++	/* %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x.%03u.%03u */
++	char daddr_buf[8*4 + 2*3 + 10];
++
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	memset(&fdev, '\0', sizeof(fdev));
++
++	if (devid->devid != 1) {
++		printk(KERN_ERR "%s: WARNING: didn't receive a deviceid of 1 "
++			"(got: 0x%llx)\n", __func__, devid->devid);
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* count the number of comma-delimited DS IPs */
++	fdev.fl_device_length = 1;
++	fdev.fl_device_list = fl_devices;
++
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = fl_stripe_indices;
++
++	daddr.r_addr.data = daddr_buf;
++	daddr.r_addr.len = sizeof(daddr_buf);
++	err = __svc_print_netaddr(&pnfsd_lexp_addr, &daddr.r_addr);
++	if (err < 0)
++		goto out;
++	daddr.r_addr.len = err;
++	switch (pnfsd_lexp_addr.sa_family) {
++	case AF_INET:
++		daddr.r_netid.data = "tcp";
++		daddr.r_netid.len = 3;
++		break;
++	case AF_INET6:
++		daddr.r_netid.data = "tcp6";
++		daddr.r_netid.len = 4;
++		break;
++	default:
++		BUG();
++	}
++	fdev.fl_device_list[0].fl_multipath_length = 1;
++	fdev.fl_device_list[0].fl_multipath_list = &daddr;
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	dprintk("<-- %s: return %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize < NFSSVC_MAXBLKSIZE)
++		blocksize = NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++	dprintk("%s: return %d\n", __func__, blocksize);
++	return blocksize;
++}
++
++static enum nfsstat4
++pnfsd_lexp_layout_get(struct inode *inode,
++		      struct exp_xdr_stream *xdr,
++		      const struct nfsd4_pnfs_layoutget_arg *arg,
++		      struct nfsd4_pnfs_layoutget_res *res)
++{
++	enum nfsstat4 rc = NFS4_OK;
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++
++	dprintk("--> %s: inode=%p\n", __func__, inode);
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = true;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = arg->lg_sbid;
++	layout->device_id.devid = 1;				/*FSFTEMP*/
++	layout->lg_first_stripe_index = 0;			/*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	memcpy(fhp, arg->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	dprintk("<-- %s: return %d\n", __func__, rc);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++pnfsd_lexp_layout_commit(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutcommit_arg *args,
++			 struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int
++pnfsd_lexp_layout_return(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int pnfsd_lexp_get_state(struct inode *inode, struct knfsd_fh *fh,
++				struct pnfs_get_state *p)
++{
++	return 0;	/* just use the current stateid */
++}
++
++static struct pnfs_export_operations pnfsd_lexp_ops = {
++	.layout_type = pnfsd_lexp_layout_type,
++	.get_device_info = pnfsd_lexp_get_device_info,
++	.get_device_iter = pnfsd_lexp_get_device_iter,
++	.layout_get = pnfsd_lexp_layout_get,
++	.layout_commit = pnfsd_lexp_layout_commit,
++	.layout_return = pnfsd_lexp_layout_return,
++	.get_state = pnfsd_lexp_get_state,
++};
++
++void
++pnfsd_lexp_init(struct inode *inode)
++{
++	dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
++	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-31 20:42:05.570119170 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-31 20:42:05.570119170 -0400
+@@ -0,0 +1,535 @@
++/*
++ * fs/nfsd/spnfs_com.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ * Based heavily on idmap.c
++ *
++ */
++
++/*
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/nfsd/debug.h>
++
++#include <linux/nfsd4_spnfs.h>
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PROC
++
++static ssize_t   spnfs_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++		     char __user *, size_t);
++static ssize_t   spnfs_pipe_downcall(struct file *, const char __user *,
++		     size_t);
++static void      spnfs_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops spnfs_upcall_ops = {
++	.upcall		= spnfs_pipe_upcall,
++	.downcall	= spnfs_pipe_downcall,
++	.destroy_msg	= spnfs_pipe_destroy_msg,
++};
++
++/* evil global variable */
++struct spnfs *global_spnfs;
++struct spnfs_config *spnfs_config;
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++int spnfs_use_layoutsegments;
++uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++/*
++ * Used by spnfs_enabled()
++ * Tracks if the subsystem has been initialized at some point.  It doesn't
++ * matter if it's not currently initialized.
++ */
++static int spnfs_enabled_at_some_point;
++
++/* call this to start the ball rolling */
++/* code it like we're going to avoid the global variable in the future */
++int
++nfsd_spnfs_new(void)
++{
++	struct spnfs *spnfs = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	if (global_spnfs != NULL)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	spnfs = kzalloc(sizeof(*spnfs), GFP_KERNEL);
++	if (spnfs == NULL){
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	spnfs->spnfs_dentry = rpc_mkpipe(nd.path.dentry, "spnfs", spnfs,
++					 &spnfs_upcall_ops, 0);
++	if (IS_ERR(spnfs->spnfs_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++
++	mutex_init(&spnfs->spnfs_lock);
++	mutex_init(&spnfs->spnfs_plock);
++	init_waitqueue_head(&spnfs->spnfs_wq);
++
++	global_spnfs = spnfs;
++	spnfs_enabled_at_some_point = 1;
++
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(spnfs);
++	return rc;
++}
++
++/* again, code it like we're going to remove the global variable */
++void
++nfsd_spnfs_delete(void)
++{
++	struct spnfs *spnfs = global_spnfs;
++
++	if (!spnfs)
++		return;
++	rpc_unlink(spnfs->spnfs_dentry);
++	rpc_put_mount();
++	global_spnfs = NULL;
++	kfree(spnfs);
++}
++
++/* RPC pipefs upcall/downcall routines */
++/* looks like this code is invoked by the rpc_pipe code */
++/* to handle upcalls on things we've queued elsewhere */
++/* See nfs_idmap_id for an exmaple of enqueueing */
++static ssize_t
++spnfs_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
++    char __user *dst, size_t buflen)
++{
++	char *data = (char *)msg->data + msg->copied;
++	ssize_t mlen = msg->len - msg->copied;
++	ssize_t left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen -= left;
++	msg->copied += mlen;
++	msg->errno = 0;
++	return mlen;
++}
++
++static ssize_t
++spnfs_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
++	struct spnfs *spnfs = (struct spnfs *)rpci->private;
++	struct spnfs_msg *im_in = NULL, *im = &spnfs->spnfs_im;
++	int ret;
++
++	if (mlen != sizeof(struct spnfs_msg))
++		return -ENOSPC;
++
++	im_in = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im_in == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(im_in, src, mlen) != 0)
++		return -EFAULT;
++
++	mutex_lock(&spnfs->spnfs_plock);
++
++	ret = mlen;
++	im->im_status = im_in->im_status;
++	/* If we got an error, terminate now, and wake up pending upcalls */
++	if (!(im_in->im_status & SPNFS_STATUS_SUCCESS)) {
++		wake_up(&spnfs->spnfs_wq);
++		goto out;
++	}
++
++	ret = -EINVAL;
++	/* Did we match the current upcall? */
++	/* DMXXX: do not understand the comment above, from original code */
++	/* DMXXX: when do we _not_ match the current upcall? */
++	/* DMXXX: anyway, let's to a simplistic check */
++	if (im_in->im_type == im->im_type) {
++		/* copy the response into the spnfs struct */
++		memcpy(&im->im_res, &im_in->im_res, sizeof(im->im_res));
++		ret = mlen;
++	} else
++		dprintk("spnfs: downcall type != upcall type\n");
++
++
++	wake_up(&spnfs->spnfs_wq);
++/* DMXXX handle rval processing */
++out:
++	mutex_unlock(&spnfs->spnfs_plock);
++	kfree(im_in);
++	return ret;
++}
++
++static void
++spnfs_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	struct spnfs_msg *im = msg->data;
++	struct spnfs *spnfs = container_of(im, struct spnfs, spnfs_im);
++
++	if (msg->errno >= 0)
++		return;
++	mutex_lock(&spnfs->spnfs_plock);
++	im->im_status = SPNFS_STATUS_FAIL;  /* DMXXX */
++	wake_up(&spnfs->spnfs_wq);
++	mutex_unlock(&spnfs->spnfs_plock);
++}
++
++/* generic upcall.  called by functions in spnfs_ops.c  */
++int
++spnfs_upcall(struct spnfs *spnfs, struct spnfs_msg *upmsg,
++		union spnfs_msg_res *res)
++{
++	struct rpc_pipe_msg msg;
++	struct spnfs_msg *im;
++	DECLARE_WAITQUEUE(wq, current);
++	int ret = -EIO;
++	int rval;
++
++	im = &spnfs->spnfs_im;
++
++	mutex_lock(&spnfs->spnfs_lock);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	memset(im, 0, sizeof(*im));
++	memcpy(im, upmsg, sizeof(*upmsg));
++
++	memset(&msg, 0, sizeof(msg));
++	msg.data = im;
++	msg.len = sizeof(*im);
++
++	add_wait_queue(&spnfs->spnfs_wq, &wq);
++	rval = rpc_queue_upcall(spnfs->spnfs_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&spnfs->spnfs_wq, &wq);
++		goto out;
++	}
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&spnfs->spnfs_plock);
++	schedule();
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&spnfs->spnfs_wq, &wq);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	if (im->im_status & SPNFS_STATUS_SUCCESS) {
++		/* copy our result from the upcall */
++		memcpy(res, &im->im_res, sizeof(*res));
++		ret = 0;
++	}
++
++out:
++	memset(im, 0, sizeof(*im));
++	mutex_unlock(&spnfs->spnfs_plock);
++	mutex_unlock(&spnfs->spnfs_lock);
++	return(ret);
++}
++
++/*
++ * This is used to determine if the spnfsd daemon has been started at
++ * least once since the system came up.  This is used to by the export
++ * mechanism to decide if spnfs is in use.
++ *
++ * Returns non-zero if the spnfsd has initialized the communication pipe
++ * at least once.
++ */
++int spnfs_enabled(void)
++{
++	return spnfs_enabled_at_some_point;
++}
++
++#ifdef CONFIG_PROC_FS
++
++/*
++ * procfs virtual files for user/kernel space communication:
++ *
++ * ctl - currently just an on/off switch...can be expanded
++ * getfh - fd to fh conversion
++ * recall - recall a layout from the command line, for example:
++ *		echo <path> > /proc/fs/spnfs/recall
++ * config - configuration info, e.g., stripe size, num ds, etc.
++ */
++
++/*-------------- start ctl -------------------------*/
++static ssize_t ctl_write(struct file *file, const char __user *buf,
++			 size_t count, loff_t *offset)
++{
++	int cmd, rc;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (cmd) {
++		rc = nfsd_spnfs_new();
++		if (rc != 0)
++			return rc;
++	} else
++		nfsd_spnfs_delete();
++
++	return count;
++}
++
++static const struct file_operations ctl_ops = {
++	.write		= ctl_write,
++};
++/*-------------- end ctl ---------------------------*/
++
++/*-------------- start config -------------------------*/
++static ssize_t config_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	static struct spnfs_config cfg;
++
++	if (copy_from_user(&cfg, buf, count))
++		return -EFAULT;
++
++	spnfs_config = &cfg;
++	return 0;
++}
++
++static const struct file_operations config_ops = {
++	.write		= config_write,
++};
++/*-------------- end config ---------------------------*/
++
++/*-------------- start getfh -----------------------*/
++static int getfh_open(struct inode *inode, struct file *file)
++{
++	file->private_data = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
++	if (file->private_data == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static ssize_t getfh_read(struct file *file, char __user *buf, size_t count,
++			  loff_t *offset)
++{
++	if (copy_to_user(buf, file->private_data, sizeof(struct nfs_fh)))
++		return -EFAULT;
++
++	return count;
++}
++
++static ssize_t getfh_write(struct file *file, const char __user *buf,
++			   size_t count, loff_t *offset)
++{
++	int fd;
++
++	if (copy_from_user((int *)&fd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (spnfs_getfh(fd, file->private_data) != 0)
++		return -EIO;
++
++	return count;
++}
++
++static int getfh_release(struct inode *inode, struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static const struct file_operations getfh_ops = {
++	.open		= getfh_open,
++	.read		= getfh_read,
++	.write		= getfh_write,
++	.release	= getfh_release,
++};
++/*-------------- end getfh ------------------------*/
++
++
++/*-------------- start recall layout --------------*/
++static ssize_t recall_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	char input[128];
++	char *path, *str, *p;
++	int rc;
++	u64 off = 0, len = 0;
++
++	if (count > 128)
++		return -EINVAL;
++
++	if (copy_from_user(input, buf, count))
++		return -EFAULT;
++
++	/* assumes newline-terminated path */
++	p = memchr(input, '\n', count);
++	if (p == NULL)
++		return -EINVAL;
++	*p = '\0';
++
++	/*
++	 * Scan for path and, optionally, an offset and length
++	 * of a layout segment to be recalled; if there are two
++	 * fields, they're assumed to be path and offset.
++	 */
++	p = input;
++	path = strsep(&p, " ");
++	if (path == NULL)
++		return -EINVAL;
++
++	str = strsep(&p, " ");
++	if (str != NULL) {
++		rc = strict_strtoull(str, 10, &off);
++		if (rc != 0)
++			return -EINVAL;
++
++		str = strsep(&p, " ");
++		if (str != NULL) {
++			rc = strict_strtoull(str, 10, &len);
++			if (rc != 0)
++				return -EINVAL;
++		}
++	}
++
++	rc = spnfs_test_layoutrecall(path, off, len);
++	if (rc != 0)
++		return rc;
++
++	return count;
++}
++
++static const struct file_operations recall_ops = {
++	.write		= recall_write,
++};
++/*-------------- end recall layout --------------*/
++
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++/*-------------- start layoutseg -------------------------*/
++static ssize_t layoutseg_write(struct file *file, const char __user *buf,
++			       size_t count, loff_t *offset)
++{
++	char cmd[3];
++
++	if (copy_from_user(cmd, buf, 1))
++		return -EFAULT;
++	if (cmd[0] == '0')
++		spnfs_use_layoutsegments = 0;
++	else
++		spnfs_use_layoutsegments = 1;
++
++	return count;
++}
++
++static const struct file_operations layoutseg_ops = {
++	.write		= layoutseg_write,
++};
++/*-------------- end layoutseg ---------------------------*/
++
++/*-------------- start layoutsegsize -------------------------*/
++static ssize_t layoutsegsize_write(struct file *file, const char __user *buf,
++				   size_t count, loff_t *offset)
++{
++	char cmd[50];
++
++	if (copy_from_user(cmd, buf, 49))
++		return -EFAULT;
++	layoutsegment_size = simple_strtoull(cmd, NULL, 10);
++
++	return count;
++}
++
++static const struct file_operations layoutsegsize_ops = {
++	.write		= layoutsegsize_write,
++};
++/*-------------- end layoutsegsize ---------------------------*/
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++int
++spnfs_init_proc(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = proc_mkdir("fs/spnfs", NULL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry = create_proc_entry("fs/spnfs/ctl", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &ctl_ops;
++
++	entry = create_proc_entry("fs/spnfs/config", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &config_ops;
++
++	entry = create_proc_entry("fs/spnfs/getfh", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &getfh_ops;
++
++	entry = create_proc_entry("fs/spnfs/recall", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &recall_ops;
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++	entry = create_proc_entry("fs/spnfs/layoutseg", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutseg_ops;
++
++	entry = create_proc_entry("fs/spnfs/layoutsegsize", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutsegsize_ops;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	return 0;
++}
++#endif /* CONFIG_PROC_FS */
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-31 20:42:05.571097807 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-31 20:42:05.572091128 -0400
+@@ -0,0 +1,878 @@
++/*
++ * fs/nfsd/spnfs_ops.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ *
++ */
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/namei.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++/* comment out CONFIG_SPNFS_TEST for non-test behaviour */
++/* #define CONFIG_SPNFS_TEST 1 */
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PNFS
++
++/*
++ * The functions that are called from elsewhere in the kernel
++ * to perform tasks in userspace
++ *
++ */
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++extern int spnfs_use_layoutsegments;
++extern uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++extern struct spnfs *global_spnfs;
++
++int
++spnfs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++enum nfsstat4
++spnfs_layoutget(struct inode *inode, struct exp_xdr_stream *xdr,
++		const struct nfsd4_pnfs_layoutget_arg *lg_arg,
++		struct nfsd4_pnfs_layoutget_res *lg_res)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct pnfs_filelayout_layout *flp = NULL;
++	int status, i;
++	enum nfsstat4 nfserr;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	im->im_type = SPNFS_TYPE_LAYOUTGET;
++	im->im_args.layoutget_args.inode = inode->i_ino;
++	im->im_args.layoutget_args.generation = inode->i_generation;
++
++	/* call function to queue the msg for upcall */
++	if (spnfs_upcall(spnfs, im, res) != 0) {
++		dprintk("failed spnfs upcall: layoutget\n");
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto layoutget_cleanup;
++	}
++	status = res->layoutget_res.status;
++	if (status != 0) {
++		/* FIXME? until user mode is fixed, translate system error */
++		switch (status) {
++		case -E2BIG:
++		case -ETOOSMALL:
++			nfserr = NFS4ERR_TOOSMALL;
++			break;
++		case -ENOMEM:
++		case -EAGAIN:
++		case -EINTR:
++			nfserr = NFS4ERR_LAYOUTTRYLATER;
++			break;
++		case -ENOENT:
++			nfserr = NFS4ERR_BADLAYOUT;
++			break;
++ 		default:
++			nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		}
++		dprintk("spnfs layout_get upcall: status=%d nfserr=%u\n",
++			status, nfserr);
++		goto layoutget_cleanup;
++	}
++
++	lg_res->lg_return_on_close = 0;
++#if defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	/* if spnfs_use_layoutsegments & layoutsegment_size == 0, use */
++	/* the amount requested by the client.			      */
++	if (spnfs_use_layoutsegments) {
++		if (layoutsegment_size != 0)
++			lg_res->lg_seg.length = layoutsegment_size;
++	} else
++		lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#else
++	lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	flp = kmalloc(sizeof(struct pnfs_filelayout_layout), GFP_KERNEL);
++	if (flp == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	flp->device_id.sbid = lg_arg->lg_sbid;
++	flp->device_id.devid = res->layoutget_res.devid;
++	flp->lg_layout_type = 1; /* XXX */
++	flp->lg_stripe_type = res->layoutget_res.stripe_type;
++	flp->lg_commit_through_mds = 0;
++	flp->lg_stripe_unit =  res->layoutget_res.stripe_size;
++	flp->lg_first_stripe_index = 0;
++	flp->lg_pattern_offset = 0;
++	flp->lg_fh_length = res->layoutget_res.stripe_count;
++
++	flp->lg_fh_list = kmalloc(flp->lg_fh_length * sizeof(struct knfsd_fh),
++				  GFP_KERNEL);
++	if (flp->lg_fh_list == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	/*
++	 * FIX: Doing an extra copy here.  Should group res.flist's fh_len
++	 * and fh_val into a knfsd_fh structure.
++	 */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		flp->lg_fh_list[i].fh_size = res->layoutget_res.flist[i].fh_len;
++		memcpy(&flp->lg_fh_list[i].fh_base,
++		       res->layoutget_res.flist[i].fh_val,
++		       res->layoutget_res.flist[i].fh_len);
++	}
++
++	/* encode the layoutget body */
++	nfserr = filelayout_encode_layout(xdr, flp);
++
++layoutget_cleanup:
++	if (flp) {
++		if (flp->lg_fh_list)
++			kfree(flp->lg_fh_list);
++		kfree(flp);
++	}
++	kfree(im);
++	kfree(res);
++
++	return nfserr;
++}
++
++int
++spnfs_layoutcommit(void)
++{
++	return 0;
++}
++
++int
++spnfs_layoutreturn(struct inode *inode,
++		   const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	return 0;
++}
++
++int
++spnfs_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block *sb;
++	struct nfsd4_pnfs_cb_layout lr;
++
++	switch (type) {
++	case RETURN_FILE:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for ino = %lu\n",
++			__func__, inode->i_ino);
++		break;
++	case RETURN_FSID:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++			__func__);
++		return 0;
++	case RETURN_ALL:
++		/* XXX figure out how to get a sb since there's no inode ptr */
++		dprintk("%s: recalling all layouts (unimplemented)\n",
++			__func__);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	lr.cbl_recall_type = type;
++	lr.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	lr.cbl_seg.clientid = 0;
++	lr.cbl_seg.offset = offset;
++	lr.cbl_seg.length = len;
++	lr.cbl_seg.iomode = IOMODE_ANY;
++	lr.cbl_layoutchanged = 0;
++
++	nfsd_layout_recall_cb(sb, inode, &lr);
++
++	return 0;
++}
++
++
++int
++spnfs_test_layoutrecall(char *path, u64 offset, u64 len)
++{
++	struct nameidata nd;
++	struct inode *inode;
++	int type, rc;
++
++	dprintk("%s: path=%s, offset=%llu, len=%llu\n",
++		__func__, path, offset, len);
++
++	if (strcmp(path, "all") == 0) {
++		inode = NULL;
++		type = RETURN_ALL;
++	} else {
++		rc = path_lookup(path, 0, &nd);
++		if (rc != 0)
++			return -ENOENT;
++
++		/*
++		 * XXX todo: add a RETURN_FSID scenario here...maybe if
++		 * inode is a dir...
++		 */
++
++		inode = nd.path.dentry->d_inode;
++		type = RETURN_FILE;
++	}
++
++	if (len == 0)
++		len = NFS4_MAX_UINT64;
++
++	rc = spnfs_layoutrecall(inode, type, offset, len);
++
++	if (type != RETURN_ALL)
++		path_put(&nd.path);
++	return rc;
++}
++
++int
++spnfs_getdeviceiter(struct super_block *sb,
++		    u32 layout_type,
++		    struct nfsd4_pnfs_dev_iter_res *gd_res)
++{
++	struct spnfs *spnfs = global_spnfs;   /* XXX keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEITER;
++	im->im_args.getdeviceiter_args.cookie = gd_res->gd_cookie;
++	im->im_args.getdeviceiter_args.verf = gd_res->gd_verf;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceiter_out;
++	}
++	status = res->getdeviceiter_res.status;
++
++	if (res->getdeviceiter_res.eof)
++		gd_res->gd_eof = 1;
++	else {
++		gd_res->gd_devid = res->getdeviceiter_res.devid;
++		gd_res->gd_cookie = res->getdeviceiter_res.cookie;
++		gd_res->gd_verf = res->getdeviceiter_res.verf;
++		gd_res->gd_eof = 0;
++	}
++
++getdeviceiter_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++#ifdef CONFIG_SPNFS_TEST
++/*
++ * Setup the rq_res xdr_buf.  The svc_rqst rq_respages[1] page contains the
++ * 1024 encoded stripe indices.
++ *
++ * Skip the devaddr4 length and encode the indicies count (1024) in the
++ * rq_res.head and set the rq_res.head length.
++ *
++ * Set the rq_res page_len to 4096 (for the 1024 stripe indices).
++ * Set the rq_res xdr_buf tail base to rq_respages[0] just after the
++ * rq_res head to hold the rest of the getdeviceinfo return.
++ *
++ * So rq_respages[rq_resused - 1] contains the rq_res.head and rq_res.tail and
++ * rq_respages[rq_resused] contains the rq_res.pages.
++ */
++static int spnfs_test_indices_xdr(struct pnfs_xdr_info *info,
++				  const struct pnfs_filelayout_device *fdev)
++{
++	struct nfsd4_compoundres *resp = info->resp;
++	struct svc_rqst *rqstp = resp->rqstp;
++	struct xdr_buf *xb = &resp->rqstp->rq_res;
++	__be32 *p;
++
++	p = nfsd4_xdr_reserve_space(resp, 8);
++	p++; /* Fill in length later */
++	*p++ = cpu_to_be32(fdev->fl_stripeindices_length); /* 1024 */
++	resp->p = p;
++
++	xb->head[0].iov_len = (char *)resp->p - (char *)xb->head[0].iov_base;
++	xb->pages = &rqstp->rq_respages[rqstp->rq_resused];
++	xb->page_base = 0;
++	xb->page_len = PAGE_SIZE; /* page of 1024 encoded indices */
++	xb->tail[0].iov_base = resp->p;
++	resp->end = xb->head[0].iov_base + PAGE_SIZE;
++	xb->tail[0].iov_len = (char *)resp->end - (char *)resp->p;
++	return 0;
++}
++/*
++ * Return a stripeindices of length 1024 to test
++ * the pNFS client multipage getdeviceinfo implementation.
++ *
++ * Encode a page of stripe indices.
++ */
++static void spnfs_set_test_indices(struct pnfs_filelayout_device *fldev,
++				  struct spnfs_device *dev,
++				  struct pnfs_devinfo_arg *info)
++{
++	struct svc_rqst *rqstp = info->xdr.resp->rqstp;
++	__be32 *p;
++	int i, j = 0;
++
++	p = (__be32 *)page_address(rqstp->rq_respages[rqstp->rq_resused]);
++	fldev->fl_stripeindices_length = 1024;
++	/* round-robin the data servers device index into the stripe indicie */
++	for (i = 0; i < 1024; i++) {
++		*p++ = cpu_to_be32(j);
++		if (j < dev->dscount - 1)
++			j++;
++		else
++			j = 0;
++	}
++	fldev->fl_stripeindices_list = NULL;
++}
++#endif /* CONFIG_SPNFS_TEST */
++
++int
++spnfs_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    u32 layout_type,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct spnfs *spnfs = global_spnfs;
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct spnfs_device *dev;
++	struct pnfs_filelayout_device *fldev = NULL;
++	struct pnfs_filelayout_multipath *mp = NULL;
++	struct pnfs_filelayout_devaddr *fldap = NULL;
++	int status = 0, i, len;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEINFO;
++	/* XXX FIX: figure out what to do about fsid */
++	im->im_args.getdeviceinfo_args.devid = devid->devid;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceinfo_out;
++	}
++	status = res->getdeviceinfo_res.status;
++	if (status != 0)
++		goto getdeviceinfo_out;
++
++	dev = &res->getdeviceinfo_res.devinfo;
++
++	/* Fill in the device data, i.e., nfs4_1_file_layout_ds_addr4 */
++	fldev = kzalloc(sizeof(struct pnfs_filelayout_device), GFP_KERNEL);
++	if (fldev == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	/*
++	 * Stripe count is the same as data server count for our purposes
++	 */
++	fldev->fl_stripeindices_length = dev->dscount;
++	fldev->fl_device_length = dev->dscount;
++
++	/* Set stripe indices */
++#ifdef CONFIG_SPNFS_TEST
++	spnfs_set_test_indices(fldev, dev, info);
++	fldev->fl_enc_stripe_indices = spnfs_test_indices_xdr;
++#else /* CONFIG_SPNFS_TEST */
++	fldev->fl_stripeindices_list =
++		kmalloc(fldev->fl_stripeindices_length * sizeof(u32),
++			GFP_KERNEL);
++	if (fldev->fl_stripeindices_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_stripeindices_length; i++)
++		fldev->fl_stripeindices_list[i] = i;
++#endif /* CONFIG_SPNFS_TEST */
++
++	/*
++	 * Set the device's data server addresses  No multipath for spnfs,
++	 * so mp length is always 1.
++	 *
++	 */
++	fldev->fl_device_list =
++		kmalloc(fldev->fl_device_length *
++			sizeof(struct pnfs_filelayout_multipath),
++			GFP_KERNEL);
++	if (fldev->fl_device_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_device_length; i++) {
++		mp = &fldev->fl_device_list[i];
++		mp->fl_multipath_length = 1;
++		mp->fl_multipath_list =
++			kmalloc(sizeof(struct pnfs_filelayout_devaddr),
++				GFP_KERNEL);
++		if (mp->fl_multipath_list == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		fldap = mp->fl_multipath_list;
++
++		/*
++		 * Copy the netid into the device address, for example: "tcp"
++		 */
++		len = strlen(dev->dslist[i].netid);
++		fldap->r_netid.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_netid.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_netid.data, dev->dslist[i].netid, len);
++		fldap->r_netid.len = len;
++
++		/*
++		 * Copy the network address into the device address,
++		 * for example: "10.35.9.16.08.01"
++		 */
++		len = strlen(dev->dslist[i].addr);
++		fldap->r_addr.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_addr.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_addr.data, dev->dslist[i].addr, len);
++		fldap->r_addr.len = len;
++	}
++
++	/* encode the device data */
++	status = filelayout_encode_devinfo(xdr, fldev);
++
++getdeviceinfo_out:
++	if (fldev) {
++		kfree(fldev->fl_stripeindices_list);
++		if (fldev->fl_device_list) {
++			for (i = 0; i < fldev->fl_device_length; i++) {
++				fldap =
++				    fldev->fl_device_list[i].fl_multipath_list;
++				kfree(fldap->r_netid.data);
++				kfree(fldap->r_addr.data);
++				kfree(fldap);
++			}
++			kfree(fldev->fl_device_list);
++		}
++		kfree(fldev);
++	}
++
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_setattr(void)
++{
++	return 0;
++}
++
++int
++spnfs_open(struct inode *inode, struct nfsd4_open *open)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	im->im_type = SPNFS_TYPE_OPEN;
++	im->im_args.open_args.inode = inode->i_ino;
++	im->im_args.open_args.generation = inode->i_generation;
++	im->im_args.open_args.create = open->op_create;
++	im->im_args.open_args.createmode = open->op_createmode;
++	im->im_args.open_args.truncate = open->op_truncate;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto open_out;
++	}
++	status = res->open_res.status;
++
++open_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_create(void)
++{
++	return 0;
++}
++
++/*
++ * Invokes the spnfsd with the inode number of the object to remove.
++ * The file has already been removed on the MDS, so all the spnsfd
++ * daemon does is remove the stripes.
++ * Returns 0 on success otherwise error code
++ */
++int
++spnfs_remove(unsigned long ino, unsigned long generation)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	im->im_type = SPNFS_TYPE_REMOVE;
++	im->im_args.remove_args.inode = ino;
++	im->im_args.remove_args.generation = generation;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto remove_out;
++	}
++	status = res->remove_res.status;
++
++remove_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++static int
++read_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	 struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_read(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		if (err == 0)
++			break;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++     struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDONLY | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0 ; vnum < vlen ; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = read_one(inode, offset + bytecount, iolen,
++			       (char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err < 0) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		if (err < iolen) {
++			bytecount += err;
++			goto read_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++read_out:
++	*lenp = bytecount;
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++	return status;
++}
++
++__be32
++spnfs_read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++	   struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return read(inode, offset, lenp, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++static int
++write_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	  struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_write(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++write(struct inode *inode, loff_t offset, size_t len, int vlen,
++      struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDWR | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto write_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0; vnum < vlen; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = write_one(inode, offset + bytecount, iolen,
++				(char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err != iolen) {
++			dprintk("spnfs_write: err=%d expected %Zd\n", err, len);
++			status = nfserr_io;
++			goto write_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++write_out:
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++
++	return status;
++}
++
++__be32
++spnfs_write(struct inode *inode, loff_t offset, size_t len, int vlen,
++	    struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return write(inode, offset, len, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++int
++spnfs_commit(void)
++{
++	return 0;
++}
++
++/*
++ * Return the state for this object.
++ * At this time simply return 0 to indicate success and use the existing state
++ */
++int
++spnfs_get_state(struct inode *inode, struct knfsd_fh *fh, struct pnfs_get_state *arg)
++{
++	return 0;
++}
++
++/*
++ * Return the filehandle for the specified file descriptor
++ */
++int
++spnfs_getfh(int fd, struct nfs_fh *fh)
++{
++	struct file *file;
++
++	file = fget(fd);
++	if (file == NULL)
++		return -EIO;
++
++	memcpy(fh, NFS_FH(file->f_dentry->d_inode), sizeof(struct nfs_fh));
++	fput(file);
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-31 20:41:19.205016844 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-31 20:42:05.572091128 -0400
+@@ -242,6 +242,12 @@ struct nfs4_client {
+ 	u32			cl_cb_seq_nr;
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
++#if defined(CONFIG_PNFSD)
++	struct list_head	cl_layouts;	/* outstanding layouts */
++	struct list_head	cl_layoutrecalls; /* outstanding layoutrecall
++						     callbacks */
++	atomic_t		cl_deviceref;	/* Num outstanding devs */
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static inline void
+@@ -342,12 +348,31 @@ struct nfs4_file {
+ 	struct list_head        fi_hash;    /* hash by "struct inode *" */
+ 	struct list_head        fi_stateids;
+ 	struct list_head	fi_delegations;
++#if defined(CONFIG_PNFSD)
++	struct list_head	fi_layouts;
++	struct list_head	fi_layout_states;
++#endif /* CONFIG_PNFSD */
+ 	struct inode		*fi_inode;
+ 	u32                     fi_id;      /* used with stateowner->so_id 
+ 					     * for stateid_hashtbl hash */
+ 	bool			fi_had_conflict;
++#if defined(CONFIG_PNFSD)
++	/* used by layoutget / layoutrecall */
++	struct nfs4_fsid	fi_fsid;
++	u32			fi_fhlen;
++	u8			fi_fhval[NFS4_FHSIZE];
++#endif /* CONFIG_PNFSD */
+ };
+ 
++#if defined(CONFIG_PNFSD)
++/* pNFS Metadata server state */
++
++struct pnfs_ds_dev_entry {
++	struct list_head	dd_dev_entry; /* st_pnfs_ds_id entry */
++	u32			dd_dsid;
++};
++#endif /* CONFIG_PNFSD */
++
+ /*
+ * nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+ *
+@@ -370,6 +395,9 @@ struct nfs4_stateid {
+ 	struct list_head              st_perfile;
+ 	struct list_head              st_perstateowner;
+ 	struct list_head              st_lockowners;
++#if defined(CONFIG_PNFSD)
++	struct list_head              st_pnfs_ds_id;
++#endif /* CONFIG_PNFSD */
+ 	struct nfs4_stateowner      * st_stateowner;
+ 	struct nfs4_file            * st_file;
+ 	stateid_t                     st_stateid;
+@@ -421,6 +449,34 @@ extern void nfsd4_recdir_purge_old(void)
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+ extern void release_session_client(struct nfsd4_session *);
++extern void nfsd4_free_slab(struct kmem_cache **);
++extern struct nfs4_file *find_file(struct inode *);
++extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
++extern void put_nfs4_file(struct nfs4_file *);
++extern void get_nfs4_file(struct nfs4_file *);
++extern struct nfs4_client *find_confirmed_client(clientid_t *);
++extern struct nfs4_stateid *find_stateid(stateid_t *, int flags);
++extern struct nfs4_delegation *find_delegation_stateid(struct inode *, stateid_t *);
++extern __be32 nfs4_check_stateid(stateid_t *);
++extern void expire_client_lock(struct nfs4_client *);
++extern int filter_confirmed_clients(int (* func)(struct nfs4_client *, void *), void *);
++
++#if defined(CONFIG_PNFSD)
++extern int nfsd4_init_pnfs_slabs(void);
++extern void nfsd4_free_pnfs_slabs(void);
++extern void pnfs_expire_client(struct nfs4_client *);
++extern void release_pnfs_ds_dev_list(struct nfs4_stateid *);
++extern void nfs4_pnfs_state_init(void);
++extern void nfs4_pnfs_state_shutdown(void);
++extern void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++extern int nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *, stateid_t *);
++#else /* CONFIG_PNFSD */
++static inline void nfsd4_free_pnfs_slabs(void) {}
++static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
++static inline void pnfs_expire_client(struct nfs4_client *clp) {}
++static inline void release_pnfs_ds_dev_list(struct nfs4_stateid *stp) {}
++static inline void nfs4_pnfs_state_shutdown(void) {}
++#endif /* CONFIG_PNFSD */
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+@@ -434,4 +490,24 @@ nfs4_get_stateowner(struct nfs4_stateown
+ 	kref_get(&so->so_ref);
+ }
+ 
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
+ #endif   /* NFSD4_STATE_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-31 20:41:17.275233561 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-31 20:42:05.573121119 -0400
+@@ -37,7 +37,12 @@
+ #ifdef CONFIG_NFSD_V4
+ #include <linux/nfs4_acl.h>
+ #include <linux/nfsd_idmap.h>
++#include <linux/security.h>
++#include <linux/nfsd4_spnfs.h>
+ #endif /* CONFIG_NFSD_V4 */
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
+ 
+ #include "nfsd.h"
+ #include "vfs.h"
+@@ -383,6 +388,12 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ 					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
+ 			if (err)
+ 				goto out;
++#if defined(CONFIG_SPNFS_BLOCK)
++			if (pnfs_block_enabled(inode, 0)) {
++				err = bl_layoutrecall(inode, RETURN_FILE,
++				    iap->ia_size, inode->i_size - iap->ia_size);
++			}
++#endif /* CONFIG_SPNFS_BLOCK */
+ 		}
+ 
+ 		/*
+@@ -1703,6 +1714,11 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	struct inode	*fdir, *tdir;
+ 	__be32		err;
+ 	int		host_err;
++#ifdef CONFIG_SPNFS
++	unsigned long ino = 0;
++	unsigned long generation = 0;
++	unsigned int nlink = 0;
++#endif /* CONFIG_SPNFS */
+ 
+ 	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
+ 	if (err)
+@@ -1766,7 +1782,26 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	if (host_err)
+ 		goto out_dput_new;
+ 
++#ifdef CONFIG_SPNFS
++	/*
++	 * if the target is a preexisting regular file, remember the
++	 * inode number and generation so we can delete the stripes;
++	 * save the link count as well so that the stripes only get
++	 * get deleted when the last link is deleted
++	 */
++	if (ndentry && ndentry->d_inode && S_ISREG(ndentry->d_inode->i_mode)) {
++		ino = ndentry->d_inode->i_ino;
++		generation = ndentry->d_inode->i_generation;
++		nlink = ndentry->d_inode->i_nlink;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
++#ifdef CONFIG_SPNFS
++	if (spnfs_enabled() && (!host_err && ino && nlink == 1))
++		spnfs_remove(ino, generation);
++#endif /* CONFIG_SPNFS */
++
+ 	if (!host_err) {
+ 		host_err = commit_metadata(tfhp);
+ 		if (!host_err)
+@@ -1807,6 +1842,11 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	struct inode	*dirp;
+ 	__be32		err;
+ 	int		host_err;
++#if defined(CONFIG_SPNFS)
++	unsigned long	ino;
++	unsigned long	generation;
++	unsigned int	nlink;
++#endif /* defined(CONFIG_SPNFS) */
+ 
+ 	err = nfserr_acces;
+ 	if (!flen || isdotent(fname, flen))
+@@ -1830,6 +1870,17 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 		goto out;
+ 	}
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * Remember the inode number to communicate to the spnfsd
++	 * for removal of stripes; save the link count as well so that
++	 * the stripes only get get deleted when the last link is deleted
++	 */
++	ino = rdentry->d_inode->i_ino;
++	generation = rdentry->d_inode->i_generation;
++	nlink = rdentry->d_inode->i_nlink;
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	if (!type)
+ 		type = rdentry->d_inode->i_mode & S_IFMT;
+ 
+@@ -1854,6 +1905,29 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	if (!host_err)
+ 		host_err = commit_metadata(fhp);
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * spnfs: notify spnfsd of removal to destroy stripes
++	 */
++/*
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (sb->s_export_op->spnfs_remove) {
++*/
++	dprintk("%s check if spnfs_enabled\n", __FUNCTION__);
++	if (spnfs_enabled() && nlink == 1) {
++		BUG_ON(ino == 0);
++		dprintk("%s calling spnfs_remove inumber=%ld\n",
++			__FUNCTION__, ino);
++		if (spnfs_remove(ino, generation) == 0) {
++			dprintk("%s spnfs_remove success\n", __FUNCTION__);
++		} else {
++			/* XXX How do we make this atomic? */
++			printk(KERN_WARNING "nfsd: pNFS could not "
++				"remove stripes for inode: %ld\n", ino);
++		}
++	}
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ out_nfserr:
+ 	err = nfserrno(host_err);
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-31 20:41:19.206170424 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-31 20:42:05.575139084 -0400
+@@ -37,6 +37,8 @@
+ #ifndef _LINUX_NFSD_XDR4_H
+ #define _LINUX_NFSD_XDR4_H
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++
+ #include "state.h"
+ #include "nfsd.h"
+ 
+@@ -385,6 +387,51 @@ struct nfsd4_reclaim_complete {
+ 	u32 rca_one_fs;
+ };
+ 
++struct nfsd4_pnfs_getdevinfo {
++	struct nfsd4_pnfs_deviceid gd_devid;	/* request */
++	u32			gd_layout_type;	/* request */
++	u32			gd_maxcount;	/* request */
++	u32			gd_notify_types;/* request */
++	struct super_block	*gd_sb;
++};
++
++struct nfsd4_pnfs_getdevlist {
++	u32             gd_layout_type;	/* request */
++	u32		gd_maxdevices;	/* request */
++	u64		gd_cookie;	/* request - response */
++	u64		gd_verf;	/* request - response */
++	struct svc_fh 	*gd_fhp;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++struct nfsd4_pnfs_layoutget {
++	u64			lg_minlength;	/* request */
++	u32			lg_signal;	/* request */
++	u32			lg_maxcount;	/* request */
++	struct svc_fh		*lg_fhp;	/* request */
++	stateid_t		lg_sid;		/* request/response */
++	struct nfsd4_layout_seg	lg_seg;		/* request/response */
++	u32			lg_roc;		/* response */
++};
++
++struct nfsd4_pnfs_layoutcommit {
++	struct nfsd4_pnfs_layoutcommit_arg args;
++	stateid_t		lc_sid;		/* request */
++	struct nfsd4_pnfs_layoutcommit_res res;
++};
++
++enum layoutreturn_flags {
++	LR_FLAG_INTERN = 1 << 0,	/* internal return */
++	LR_FLAG_EXPIRE = 1 << 1,	/* return on client expiration */
++};
++
++struct nfsd4_pnfs_layoutreturn {
++	struct nfsd4_pnfs_layoutreturn_arg args;
++	u32			lr_flags;
++	stateid_t		lr_sid;		/* request/resopnse */
++	u32			lrs_present;	/* response */
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -426,6 +473,13 @@ struct nfsd4_op {
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
+ 		struct nfsd4_reclaim_complete	reclaim_complete;
++#if defined(CONFIG_PNFSD)
++		struct nfsd4_pnfs_getdevlist	pnfs_getdevlist;
++		struct nfsd4_pnfs_getdevinfo	pnfs_getdevinfo;
++		struct nfsd4_pnfs_layoutget	pnfs_layoutget;
++		struct nfsd4_pnfs_layoutcommit	pnfs_layoutcommit;
++		struct nfsd4_pnfs_layoutreturn	pnfs_layoutreturn;
++#endif /* CONFIG_PNFSD */
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-31 20:41:19.146161064 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-31 20:42:05.515139585 -0400
+@@ -36,6 +36,7 @@
+ #include "internal.h"
+ #include "iostat.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_FILE
+ 
+@@ -388,12 +389,17 @@ static int nfs_write_begin(struct file *
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ 	struct page *page;
+ 	int once_thru = 0;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+ 		file->f_path.dentry->d_name.name,
+ 		mapping->host->i_ino, len, (long long) pos);
+ 
++	pnfs_update_layout(mapping->host,
++			   nfs_file_open_context(file),
++			   0, NFS4_MAX_UINT64, IOMODE_RW,
++			   &lseg);
+ start:
+ 	/*
+ 	 * Prevent starvation issues if someone is doing a consistency
+@@ -402,17 +408,22 @@ start:
+ 	ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ 			nfs_wait_bit_killable, TASK_KILLABLE);
+ 	if (ret)
+-		return ret;
++		goto out;
+ 
+ 	page = grab_cache_page_write_begin(mapping, index, flags);
+-	if (!page)
+-		return -ENOMEM;
++	if (!page) {
++		ret = -ENOMEM;
++		goto out;
++	}
+ 	*pagep = page;
+ 
+-	ret = nfs_flush_incompatible(file, page);
++	ret = nfs_flush_incompatible(file, page, lseg);
+ 	if (ret) {
+ 		unlock_page(page);
+ 		page_cache_release(page);
++		*pagep = NULL;
++		*fsdata = NULL;
++		goto out;
+ 	} else if (!once_thru &&
+ 		   nfs_want_read_modify_write(file, page, pos, len)) {
+ 		once_thru = 1;
+@@ -421,6 +432,12 @@ start:
+ 		if (!ret)
+ 			goto start;
+ 	}
++	ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
++ out:
++	if (ret) {
++		put_lseg(lseg);
++		*fsdata = NULL;
++	}
+ 	return ret;
+ }
+ 
+@@ -430,6 +447,7 @@ static int nfs_write_end(struct file *fi
+ {
+ 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ 	int status;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+@@ -456,10 +474,17 @@ static int nfs_write_end(struct file *fi
+ 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+ 	}
+ 
+-	status = nfs_updatepage(file, page, offset, copied);
++	lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
++	status = pnfs_write_end(file, page, pos, len, copied, lseg);
++	if (status)
++		goto out;
++	status = nfs_updatepage(file, page, offset, copied, lseg, fsdata);
+ 
++ out:
+ 	unlock_page(page);
+ 	page_cache_release(page);
++	pnfs_write_end_cleanup(file, fsdata);
++	put_lseg(lseg);
+ 
+ 	if (status < 0)
+ 		return status;
+@@ -570,6 +595,8 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	/* make sure the cache has finished storing the page */
+ 	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+ 
++	/* XXX Do we want to call pnfs_update_layout here? */
++
+ 	lock_page(page);
+ 	mapping = page->mapping;
+ 	if (mapping != dentry->d_inode->i_mapping)
+@@ -580,11 +607,11 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	if (pagelen == 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_flush_incompatible(filp, page);
++	ret = nfs_flush_incompatible(filp, page, NULL);
+ 	if (ret != 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_updatepage(filp, page, 0, pagelen);
++	ret = nfs_updatepage(filp, page, 0, pagelen, NULL, NULL);
+ out_unlock:
+ 	if (!ret)
+ 		return VM_FAULT_LOCKED;
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-31 20:41:19.149170418 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-31 20:42:05.516222809 -0400
+@@ -48,6 +48,7 @@
+ #include "internal.h"
+ #include "fscache.h"
+ #include "dns_resolve.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -278,7 +279,7 @@ nfs_fhget(struct super_block *sb, struct
+ 		 */
+ 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
+ 		if (S_ISREG(inode->i_mode)) {
+-			inode->i_fop = &nfs_file_operations;
++			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ 			inode->i_data.a_ops = &nfs_file_aops;
+ 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ 		} else if (S_ISDIR(inode->i_mode)) {
+@@ -530,6 +531,68 @@ out:
+ 	return err;
+ }
+ 
++static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
++{
++	atomic_set(&l_ctx->count, 1);
++	l_ctx->lockowner = current->files;
++	l_ctx->pid = current->tgid;
++	INIT_LIST_HEAD(&l_ctx->list);
++}
++
++static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *pos;
++
++	list_for_each_entry(pos, &ctx->lock_context.list, list) {
++		if (pos->lockowner != current->files)
++			continue;
++		if (pos->pid != current->tgid)
++			continue;
++		atomic_inc(&pos->count);
++		return pos;
++	}
++	return NULL;
++}
++
++struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *res, *new = NULL;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	spin_lock(&inode->i_lock);
++	res = __nfs_find_lock_context(ctx);
++	if (res == NULL) {
++		spin_unlock(&inode->i_lock);
++		new = kmalloc(sizeof(*new), GFP_KERNEL);
++		if (new == NULL)
++			return NULL;
++		nfs_init_lock_context(new);
++		spin_lock(&inode->i_lock);
++		res = __nfs_find_lock_context(ctx);
++		if (res == NULL) {
++			list_add_tail(&new->list, &ctx->lock_context.list);
++			new->open_context = ctx;
++			res = new;
++			new = NULL;
++		}
++	}
++	spin_unlock(&inode->i_lock);
++	kfree(new);
++	return res;
++}
++
++void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
++{
++	struct nfs_open_context *ctx = l_ctx->open_context;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
++		return;
++	list_del(&l_ctx->list);
++	spin_unlock(&inode->i_lock);
++	kfree(l_ctx);
++}
++
+ /**
+  * nfs_close_context - Common close_context() routine NFSv2/v3
+  * @ctx: pointer to context
+@@ -566,11 +629,11 @@ static struct nfs_open_context *alloc_nf
+ 		path_get(&ctx->path);
+ 		ctx->cred = get_rpccred(cred);
+ 		ctx->state = NULL;
+-		ctx->lockowner = current->files;
+ 		ctx->flags = 0;
+ 		ctx->error = 0;
+ 		ctx->dir_cookie = 0;
+-		atomic_set(&ctx->count, 1);
++		nfs_init_lock_context(&ctx->lock_context);
++		ctx->lock_context.open_context = ctx;
+ 	}
+ 	return ctx;
+ }
+@@ -578,15 +641,16 @@ static struct nfs_open_context *alloc_nf
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ 	if (ctx != NULL)
+-		atomic_inc(&ctx->count);
++		atomic_inc(&ctx->lock_context.count);
+ 	return ctx;
+ }
++EXPORT_SYMBOL(get_nfs_open_context);
+ 
+ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
+ {
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 
+-	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
++	if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ 		return;
+ 	list_del(&ctx->list);
+ 	spin_unlock(&inode->i_lock);
+@@ -933,6 +997,7 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->time_start = jiffies;
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
++EXPORT_SYMBOL(nfs_fattr_init);
+ 
+ struct nfs_fattr *nfs_alloc_fattr(void)
+ {
+@@ -1142,6 +1207,14 @@ static int nfs_update_inode(struct inode
+ 		server->fsid = fattr->fsid;
+ 
+ 	/*
++	 * file needs layout commit, server attributes may be stale
++	 */
++	if (layoutcommit_needed(nfsi) && nfsi->change_attr >= fattr->change_attr) {
++		dprintk("NFS: %s: layoutcommit is needed for file %s/%ld\n",
++			__func__, inode->i_sb->s_id, inode->i_ino);
++		return 0;
++	}
++	/*
+ 	 * Update the read time so we don't revalidate too often.
+ 	 */
+ 	nfsi->read_cache_jiffies = fattr->time_start;
+@@ -1340,9 +1413,10 @@ static int nfs_update_inode(struct inode
+  */
+ void nfs4_clear_inode(struct inode *inode)
+ {
++	pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	/* If we are holding a delegation, return it! */
+ 	nfs_inode_return_delegation_noreclaim(inode);
+-	/* First call standard NFS clear_inode() code */
+ 	nfs_clear_inode(inode);
+ }
+ #endif
+@@ -1367,7 +1441,10 @@ struct inode *nfs_alloc_inode(struct sup
+ 
+ void nfs_destroy_inode(struct inode *inode)
+ {
+-	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
++	struct nfs_inode *nfsi = NFS_I(inode);
++
++	pnfs_destroy_layout(nfsi);
++	kmem_cache_free(nfs_inode_cachep, nfsi);
+ }
+ 
+ static inline void nfs4_init_once(struct nfs_inode *nfsi)
+@@ -1377,6 +1454,11 @@ static inline void nfs4_init_once(struct
+ 	nfsi->delegation = NULL;
+ 	nfsi->delegation_state = 0;
+ 	init_rwsem(&nfsi->rwsem);
++#ifdef CONFIG_NFS_V4_1
++	init_waitqueue_head(&nfsi->lo_waitq);
++	nfsi->pnfs_layout_suspend = 0;
++	nfsi->layout = NULL;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif
+ }
+ 
+@@ -1488,6 +1570,12 @@ static int __init init_nfs_fs(void)
+ 	if (err)
+ 		goto out0;
+ 
++#ifdef CONFIG_NFS_V4_1
++	err = pnfs_initialize();
++	if (err)
++		goto out00;
++#endif /* CONFIG_NFS_V4_1 */
++
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_register(&nfs_rpcstat);
+ #endif
+@@ -1498,6 +1586,10 @@ out:
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++out00:
++	pnfs_uninitialize();
++#endif /* CONFIG_NFS_V4_1 */
+ 	nfs_destroy_directcache();
+ out0:
+ 	nfs_destroy_writepagecache();
+@@ -1531,6 +1623,9 @@ static void __exit exit_nfs_fs(void)
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++	pnfs_uninitialize();
++#endif
+ 	unregister_nfs_fs();
+ 	nfs_fs_proc_exit();
+ 	nfsiod_stop();
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-31 20:41:19.149170418 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-31 20:42:05.517099944 -0400
+@@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
+ 					   struct nfs_fattr *);
+ extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
+ extern int nfs4_check_client_ready(struct nfs_client *clp);
++extern int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++		const struct sockaddr *sa2);
++extern int nfs4_set_client(struct nfs_server *server,
++		const char *hostname,
++		const struct sockaddr *addr,
++		const size_t addrlen,
++		const char *ip_addr,
++		rpc_authflavor_t authflavour,
++		int proto, const struct rpc_timeout *timeparms,
++		u32 minorversion);
+ #ifdef CONFIG_PROC_FS
+ extern int __init nfs_fs_proc_init(void);
+ extern void nfs_fs_proc_exit(void);
+@@ -201,6 +211,8 @@ extern const u32 nfs41_maxwrite_overhead
+ extern struct rpc_procinfo nfs4_procedures[];
+ #endif
+ 
++extern int nfs4_recover_expired_lease(struct nfs_client *clp);
++
+ /* proc.c */
+ void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+ 
+@@ -248,10 +260,31 @@ extern int nfs4_get_rootfh(struct nfs_se
+ #endif
+ 
+ /* read.c */
++extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
++extern int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
+ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+ 
+ /* write.c */
++extern int nfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int pnfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int nfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++			       int how);
++extern int pnfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++				int how, int pnfs);
+ extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
++extern void nfs_mark_list_commit(struct list_head *head);
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+ 		struct page *, struct page *);
+diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
+--- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-31 20:42:05.500123860 -0400
+@@ -79,10 +79,48 @@ config NFS_V4_1
+ 	depends on NFS_V4 && EXPERIMENTAL
+ 	help
+ 	  This option enables support for minor version 1 of the NFSv4 protocol
+-	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
++	  (RFC5661) including support for the parallel NFS (pNFS) features
++	  in the kernel's NFS client.
+ 
+ 	  Unless you're an NFS developer, say N.
+ 
++config PNFS_FILE_LAYOUT
++	tristate "NFS client support for the pNFS nfs-files layout (DEVELOPER ONLY)"
++	depends on NFS_FS && NFS_V4_1
++	default y
++	help
++	  This option enables support for the pNFS nfs-files layout.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFS_OBJLAYOUT
++	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
++	help
++	  Say M here if you want your pNFS client to support the Objects Layout Driver.
++	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
++	  upper level driver (SCSI_OSD_ULD).
++
++	  If unsure, say N.
++
++config PNFS_PANLAYOUT
++	tristate "Provide support for the Panasas OSD Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on PNFS_OBJLAYOUT
++	help
++	  Say M or y here if you want your pNFS client to support the Panasas OSD Layout Driver.
++
++	  If unsure, say N.
++
++config PNFS_BLOCK
++	tristate "Provide a pNFS block client (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1
++	select MD
++	select BLK_DEV_DM
++	help
++	  Say M or y here if you want your pNfs client to support the block protocol
++
++	  If unsure, say N.
++
+ config ROOT_NFS
+ 	bool "Root file system on NFS"
+ 	depends on NFS_FS=y && IP_PNP
+diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
+--- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-31 20:42:05.501268752 -0400
+@@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
+ 			   delegation.o idmap.o \
+ 			   callback.o callback_xdr.o callback_proc.o \
+ 			   nfs4namespace.o
++nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
+ nfs-$(CONFIG_SYSCTL) += sysctl.o
+ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
++
++obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
++nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
++
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
++obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-31 20:41:19.152180625 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-31 20:42:05.518232887 -0400
+@@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs3_dir_inode_operations,
+ 	.file_inode_ops	= &nfs3_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs3_proc_get_root,
+ 	.getattr	= nfs3_proc_getattr,
+ 	.setattr	= nfs3_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-31 20:42:05.519163219 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-31 20:42:05.520222923 -0400
+@@ -0,0 +1,768 @@
++/*
++ *  linux/fs/nfs/nfs4filelayout.c
++ *
++ *  Module for the pnfs nfs4 file layout driver.
++ *  Defines all I/O and Policy interface operations, plus code
++ *  to register itself with the pNFS client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/time.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4_pnfs.h>
++
++#include "nfs4filelayout.h"
++#include "nfs4_fs.h"
++#include "internal.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dean Hildebrand <dhildebz@eecs.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4 file layout driver");
++
++/* Callback operations to the pNFS client */
++struct pnfs_client_operations *pnfs_callback_ops;
++
++/* Forward declaration */
++struct layoutdriver_io_operations filelayout_io_operations;
++
++int
++filelayout_initialize_mountpoint(struct nfs_server *nfss,
++				 const struct nfs_fh *mntfh)
++{
++	int status = nfs4_alloc_init_deviceid_cache(nfss->nfs_client,
++						nfs4_fl_free_deviceid_callback);
++	if (status) {
++		printk(KERN_WARNING "%s: deviceid cache could not be "
++			"initialized\n", __func__);
++		return status;
++	}
++	dprintk("%s: deviceid cache has been initialized successfully\n",
++		__func__);
++	return 0;
++}
++
++/* Uninitialize a mountpoint by destroying its device list */
++int
++filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
++{
++	dprintk("--> %s\n", __func__);
++
++	if (nfss->pnfs_curr_ld && nfss->nfs_client->cl_devid_cache)
++		nfs4_put_deviceid_cache(nfss->nfs_client);
++	return 0;
++}
++
++/* This function is used by the layout driver to calculate the
++ * offset of the file on the dserver based on whether the
++ * layout type is STRIPE_DENSE or STRIPE_SPARSE
++ */
++static loff_t
++filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++
++	switch (flseg->stripe_type) {
++	case STRIPE_SPARSE:
++		return offset;
++
++	case STRIPE_DENSE:
++	{
++		u32 stripe_width;
++		u64 tmp, off;
++		u32 unit = flseg->stripe_unit;
++
++		stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
++		tmp = off = offset - flseg->pattern_offset;
++		do_div(tmp, stripe_width);
++		return tmp * unit + do_div(off, unit);
++	}
++	default:
++		BUG();
++	}
++
++	/* We should never get here... just to stop the gcc warning */
++	return 0;
++}
++
++/*
++ * Call ops for the async read/write cases
++ * In the case of dense layouts, the offset needs to be reset to its
++ * original value.
++ */
++static void filelayout_read_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	if (rdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			rdata->args.offset, rdata->fldata.orig_offset);
++		rdata->args.offset = rdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	rdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_read_release(void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	put_lseg(rdata->pdata.lseg);
++	rdata->pdata.lseg = NULL;
++	rdata->pdata.call_ops->rpc_release(data);
++}
++
++static void filelayout_write_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	if (wdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			wdata->args.offset, wdata->fldata.orig_offset);
++		wdata->args.offset = wdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_write_release(void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	put_lseg(wdata->pdata.lseg);
++	wdata->pdata.lseg = NULL;
++	wdata->pdata.call_ops->rpc_release(data);
++}
++
++struct rpc_call_ops filelayout_read_call_ops = {
++	.rpc_call_prepare = nfs_read_prepare,
++	.rpc_call_done = filelayout_read_call_done,
++	.rpc_release = filelayout_read_release,
++};
++
++struct rpc_call_ops filelayout_write_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_write_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/* Perform sync or async reads.
++ *
++ * An optimization for the NFS file layout driver
++ * allows the original read/write data structs to be passed in the
++ * last argument.
++ *
++ * TODO: join with write_pagelist?
++ */
++static enum pnfs_try_status
++filelayout_read_pagelist(struct nfs_read_data *data, unsigned nr_pages)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
++		__func__, data->inode->i_ino, nr_pages,
++		data->args.pgbase, (size_t)data->args.count, offset);
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s USE DS:ip %x %s\n", __func__,
++		htonl(ds->ds_ip_addr), ds->r_addr);
++
++	/* just try the first data server for the index..*/
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++
++	/*
++	 * Now get the file offset on the dserver
++	 * Set the read offset to this offset, and
++	 * save the original offset in orig_offset
++	 * In the case of aync reads, the offset will be reset in the
++	 * call_ops->rpc_call_done() routine.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/* Perform an asynchronous read */
++	nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
++			  &filelayout_read_call_ops);
++
++	data->pdata.pnfs_error = 0;
++
++	return PNFS_ATTEMPTED;
++}
++
++/* Perform async writes. */
++static enum pnfs_try_status
++filelayout_write_pagelist(struct nfs_write_data *data, unsigned nr_pages, int sync)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu %s\n", __func__,
++		data->inode->i_ino, sync, (size_t) data->args.count, offset,
++		htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
++
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++	/*
++	 * Get the file offset on the dserver. Set the write offset to
++	 * this offset and save the original offset.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/*
++	 * Perform an asynchronous write The offset will be reset in the
++	 * call_ops->rpc_call_done() routine
++	 */
++	nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
++			   &filelayout_write_call_ops, sync);
++
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++}
++
++/*
++ * Create a filelayout layout structure and return it.  The pNFS client
++ * will use the pnfs_layout_hdr type to refer to the layout for this
++ * inode from now on.
++ */
++static struct pnfs_layout_hdr *
++filelayout_alloc_layout(struct inode *inode)
++{
++	struct nfs4_filelayout *flp;
++
++	dprintk("NFS_FILELAYOUT: allocating layout\n");
++	flp =  kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
++	return flp ? &flp->fl_layout : NULL;
++}
++
++/* Free a filelayout layout structure */
++static void
++filelayout_free_layout(struct pnfs_layout_hdr *lo)
++{
++	dprintk("NFS_FILELAYOUT: freeing layout\n");
++	kfree(FILE_LO(lo));
++}
++
++/*
++ * filelayout_check_layout()
++ *
++ * Make sure layout segment parameters are sane WRT the device.
++ *
++ * Notes:
++ * 1) current code insists that # stripe index = # data servers in ds_list
++ *    which is wrong.
++ * 2) pattern_offset is ignored and must == 0 which is wrong;
++ * 3) the pattern_offset needs to be a mutliple of the stripe unit.
++ * 4) stripe unit is multiple of page size
++ */
++
++static int
++filelayout_check_layout(struct pnfs_layout_hdr *lo,
++			struct pnfs_layout_segment *lseg)
++{
++	struct nfs4_filelayout_segment *fl = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	int status = -EINVAL;
++	struct nfs_server *nfss = NFS_SERVER(PNFS_INODE(lo));
++
++	dprintk("--> %s\n", __func__);
++	/* find in list or get from server and reference the deviceid */
++	dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, &fl->dev_id);
++	if (dsaddr == NULL) {
++		dsaddr = get_device_info(PNFS_INODE(lo), &fl->dev_id);
++		if (dsaddr == NULL) {
++			dprintk("%s NO device for dev_id %s\n",
++				__func__, deviceid_fmt(&fl->dev_id));
++			goto out;
++		}
++	}
++	if (fl->first_stripe_index < 0 ||
++	    fl->first_stripe_index > dsaddr->stripe_count) {
++		dprintk("%s Bad first_stripe_index %d\n",
++				__func__, fl->first_stripe_index);
++		goto out_put;
++	}
++
++	if (fl->pattern_offset != 0) {
++		dprintk("%s Unsupported no-zero pattern_offset %Ld\n",
++				__func__, fl->pattern_offset);
++		goto out_put;
++	}
++
++	if (fl->stripe_unit % PAGE_SIZE) {
++		dprintk("%s Stripe unit (%u) not page aligned\n",
++			__func__, fl->stripe_unit);
++		goto out_put;
++	}
++
++	/* XXX only support SPARSE packing. Don't support use MDS open fh */
++	if (!(fl->num_fh == 1 || fl->num_fh == dsaddr->ds_num)) {
++		dprintk("%s num_fh %u not equal to 1 or ds_num %u\n",
++			__func__, fl->num_fh, dsaddr->ds_num);
++		goto out_put;
++	}
++
++	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
++		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
++			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
++			nfss->wsize);
++	}
++
++	nfs4_set_layout_deviceid(lseg, &dsaddr->deviceid);
++
++	status = 0;
++out:
++	dprintk("--> %s returns %d\n", __func__, status);
++	return status;
++out_put:
++	nfs4_put_unset_layout_deviceid(lseg, &dsaddr->deviceid,
++				       nfs4_fl_free_deviceid_callback);
++	goto out;
++}
++
++static void _filelayout_free_lseg(struct pnfs_layout_segment *lseg);
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl);
++
++/* Decode layout and store in layoutid.  Overwrite any existing layout
++ * information for this file.
++ */
++static int
++filelayout_set_layout(struct nfs4_filelayout *flo,
++		      struct nfs4_filelayout_segment *fl,
++		      struct nfs4_layoutget_res *lgr)
++{
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t nfl_util;
++	int i;
++
++	dprintk("%s: set_layout_map Begin\n", __func__);
++
++	memcpy(&fl->dev_id, p, NFS4_PNFS_DEVICEID4_SIZE);
++	p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++	nfl_util = be32_to_cpup(p++);
++	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
++		fl->commit_through_mds = 1;
++	if (nfl_util & NFL4_UFLG_DENSE)
++		fl->stripe_type = STRIPE_DENSE;
++	else
++		fl->stripe_type = STRIPE_SPARSE;
++	fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
++
++	if (!flo->stripe_unit)
++		flo->stripe_unit = fl->stripe_unit;
++	else if (flo->stripe_unit != fl->stripe_unit) {
++		printk(KERN_NOTICE "%s: updating strip_unit from %u to %u\n",
++			__func__, flo->stripe_unit, fl->stripe_unit);
++		flo->stripe_unit = fl->stripe_unit;
++	}
++
++	fl->first_stripe_index = be32_to_cpup(p++);
++	p = xdr_decode_hyper(p, &fl->pattern_offset);
++	fl->num_fh = be32_to_cpup(p++);
++
++	dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu dev_id %s\n",
++		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
++		fl->pattern_offset, deviceid_fmt(&fl->dev_id));
++
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE) {
++		fl->fh_array = vmalloc(fl->num_fh * sizeof(struct nfs_fh));
++		if (fl->fh_array)
++			memset(fl->fh_array, 0,
++				fl->num_fh * sizeof(struct nfs_fh));
++	} else {
++		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh),
++					GFP_KERNEL);
++       }
++	if (!fl->fh_array)
++		return -ENOMEM;
++
++	for (i = 0; i < fl->num_fh; i++) {
++		/* fh */
++		fl->fh_array[i].size = be32_to_cpup(p++);
++		if (sizeof(struct nfs_fh) < fl->fh_array[i].size) {
++			printk(KERN_ERR "Too big fh %d received %d\n",
++				i, fl->fh_array[i].size);
++			/* Layout is now invalid, pretend it doesn't exist */
++			filelayout_free_fh_array(fl);
++			fl->num_fh = 0;
++			break;
++		}
++		memcpy(fl->fh_array[i].data, p, fl->fh_array[i].size);
++		p += XDR_QUADLEN(fl->fh_array[i].size);
++		dprintk("DEBUG: %s: fh len %d\n", __func__,
++					fl->fh_array[i].size);
++	}
++
++	return 0;
++}
++
++static struct pnfs_layout_segment *
++filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
++		      struct nfs4_layoutget_res *lgr)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++	struct pnfs_layout_segment *lseg;
++	int rc;
++
++	dprintk("--> %s\n", __func__);
++	lseg = kzalloc(sizeof(struct pnfs_layout_segment) +
++		       sizeof(struct nfs4_filelayout_segment), GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++
++	rc = filelayout_set_layout(flo, LSEG_LD_DATA(lseg), lgr);
++
++	if (rc != 0 || filelayout_check_layout(layoutid, lseg)) {
++		_filelayout_free_lseg(lseg);
++		lseg = NULL;
++	}
++	return lseg;
++}
++
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
++{
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE)
++		vfree(fl->fh_array);
++	else
++		kfree(fl->fh_array);
++
++	fl->fh_array = NULL;
++}
++
++static void
++_filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	filelayout_free_fh_array(LSEG_LD_DATA(lseg));
++	kfree(lseg);
++}
++
++static void
++filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("--> %s\n", __func__);
++	nfs4_put_unset_layout_deviceid(lseg, lseg->deviceid,
++				   nfs4_fl_free_deviceid_callback);
++	_filelayout_free_lseg(lseg);
++}
++
++/* Allocate a new nfs_write_data struct and initialize */
++static struct nfs_write_data *
++filelayout_clone_write_data(struct nfs_write_data *old)
++{
++	static struct nfs_write_data *new;
++
++	new = nfs_commitdata_alloc();
++	if (!new)
++		goto out;
++	kref_init(&new->refcount);
++	new->parent      = old;
++	kref_get(&old->refcount);
++	new->inode       = old->inode;
++	new->cred        = old->cred;
++	new->args.offset = 0;
++	new->args.count  = 0;
++	new->res.count   = 0;
++	new->res.fattr   = &new->fattr;
++	nfs_fattr_init(&new->fattr);
++	new->res.verf    = &new->verf;
++	new->args.context = get_nfs_open_context(old->args.context);
++	new->pdata.lseg = NULL;
++	new->pdata.call_ops = old->pdata.call_ops;
++	new->pdata.how = old->pdata.how;
++out:
++	return new;
++}
++
++static void filelayout_commit_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static struct rpc_call_ops filelayout_commit_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_commit_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/*
++ * Execute a COMMIT op to the MDS or to each data server on which a page
++ * in 'pages' exists.
++ * Invoke the pnfs_commit_complete callback.
++ */
++enum pnfs_try_status
++filelayout_commit(struct nfs_write_data *data, int sync)
++{
++	LIST_HEAD(head);
++	struct nfs_page *req;
++	loff_t file_offset = 0;
++	u16 idx, i;
++	struct list_head **ds_page_list = NULL;
++	u16 *indices_used;
++	int num_indices_seen = 0;
++	const struct rpc_call_ops *call_ops;
++	struct rpc_clnt *clnt;
++	struct nfs_write_data **clone_list = NULL;
++	struct nfs_write_data *dsdata;
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("%s data %p sync %d\n", __func__, data, sync);
++
++	/* Alloc room for both in one go */
++	ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
++			       (sizeof(u16) + sizeof(struct list_head *)),
++			       GFP_KERNEL);
++	if (!ds_page_list)
++		goto mem_error;
++	indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
++	/*
++	 * Sort pages based on which ds to send to.
++	 * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
++	 * Note we are assuming there is only a single lseg in play.
++	 * When that is not true, we could first sort on lseg, then
++	 * sort within each as we do here.
++	 */
++	while (!list_empty(&data->pages)) {
++		req = nfs_list_entry(data->pages.next);
++		nfs_list_remove_request(req);
++		if (!req->wb_lseg ||
++		    ((struct nfs4_filelayout_segment *)
++		     LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
++			idx = NFS4_PNFS_MAX_MULTI_CNT;
++		else {
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
++		}
++		if (ds_page_list[idx]) {
++			/* Already seen this idx */
++			list_add(&req->wb_list, ds_page_list[idx]);
++		} else {
++			/* New idx not seen so far */
++			list_add_tail(&req->wb_list, &head);
++			indices_used[num_indices_seen++] = idx;
++		}
++		ds_page_list[idx] = &req->wb_list;
++	}
++	/* Once created, clone must be released via call_op */
++	clone_list = kzalloc(num_indices_seen *
++			     sizeof(struct nfs_write_data *), GFP_KERNEL);
++	if (!clone_list)
++		goto mem_error;
++	for (i = 0; i < num_indices_seen - 1; i++) {
++		clone_list[i] = filelayout_clone_write_data(data);
++		if (!clone_list[i])
++			goto mem_error;
++	}
++	clone_list[i] = data;
++	/*
++	 * Now send off the RPCs to each ds.  Note that it is important
++	 * that any RPC to the MDS be sent last (or at least after all
++	 * clones have been made.)
++	 */
++	for (i = 0; i < num_indices_seen; i++) {
++		dsdata = clone_list[i];
++		idx = indices_used[i];
++		list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
++		if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
++			call_ops = data->pdata.call_ops;;
++			clnt = NFS_CLIENT(dsdata->inode);
++			ds = NULL;
++		} else {
++			struct nfs_fh *fh;
++
++			call_ops = &filelayout_commit_call_ops;
++			req = nfs_list_entry(dsdata->pages.next);
++			ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
++			if (!ds) {
++				/* Trigger retry of this chunk through MDS */
++				dsdata->task.tk_status = -EIO;
++				data->pdata.call_ops->rpc_release(dsdata);
++				continue;
++			}
++			clnt = ds->ds_clp->cl_rpcclient;
++			dsdata->fldata.ds_nfs_client = ds->ds_clp;
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
++			if (fh)
++				dsdata->args.fh = fh;
++		}
++		dprintk("%s: Initiating commit: %llu USE DS:\n",
++			__func__, file_offset);
++		print_ds(ds);
++
++		/* Send COMMIT to data server */
++		nfs_initiate_commit(dsdata, clnt, call_ops, sync);
++	}
++	kfree(clone_list);
++	kfree(ds_page_list);
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++
++ mem_error:
++	if (clone_list) {
++		for (i = 0; i < num_indices_seen - 1; i++) {
++			if (!clone_list[i])
++				break;
++			data->pdata.call_ops->rpc_release(clone_list[i]);
++		}
++		kfree(clone_list);
++	}
++	kfree(ds_page_list);
++	/* One of these will be empty, but doesn't hurt to do both */
++	nfs_mark_list_commit(&head);
++	nfs_mark_list_commit(&data->pages);
++	data->pdata.call_ops->rpc_release(data);
++	return PNFS_ATTEMPTED;
++}
++
++/* Return the stripesize for the specified file */
++ssize_t
++filelayout_get_stripesize(struct pnfs_layout_hdr *lo)
++{
++	struct nfs4_filelayout *flo = FILE_LO(lo);
++
++	return flo->stripe_unit;
++}
++
++/*
++ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
++ *
++ * return 1 :  coalesce page
++ * return 0 :  don't coalesce page
++ */
++int
++filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++		   struct nfs_page *req)
++{
++	u64 p_stripe, r_stripe;
++
++	if (pgio->pg_boundary == 0)
++		return 1;
++	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
++	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
++
++	do_div(p_stripe, pgio->pg_boundary);
++	do_div(r_stripe, pgio->pg_boundary);
++
++	return (p_stripe == r_stripe);
++}
++
++struct layoutdriver_io_operations filelayout_io_operations = {
++	.commit                  = filelayout_commit,
++	.read_pagelist           = filelayout_read_pagelist,
++	.write_pagelist          = filelayout_write_pagelist,
++	.alloc_layout            = filelayout_alloc_layout,
++	.free_layout             = filelayout_free_layout,
++	.alloc_lseg              = filelayout_alloc_lseg,
++	.free_lseg               = filelayout_free_lseg,
++	.initialize_mountpoint   = filelayout_initialize_mountpoint,
++	.uninitialize_mountpoint = filelayout_uninitialize_mountpoint,
++};
++
++struct layoutdriver_policy_operations filelayout_policy_operations = {
++	.flags                 = PNFS_USE_RPC_CODE,
++	.get_stripesize        = filelayout_get_stripesize,
++	.pg_test               = filelayout_pg_test,
++};
++
++struct pnfs_layoutdriver_type filelayout_type = {
++	.id = LAYOUT_NFSV4_1_FILES,
++	.name = "LAYOUT_NFSV4_1_FILES",
++	.ld_io_ops = &filelayout_io_operations,
++	.ld_policy_ops = &filelayout_policy_operations,
++};
++
++static int __init nfs4filelayout_init(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
++	       __func__);
++
++	/*
++	 * Need to register file_operations struct with global list to indicate
++	 * that NFS4 file layout is a possible pNFS I/O module
++	 */
++	pnfs_callback_ops = pnfs_register_layoutdriver(&filelayout_type);
++
++	return 0;
++}
++
++static void __exit nfs4filelayout_exit(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
++	       __func__);
++
++	/* Unregister NFS4 file layout driver with pNFS client*/
++	pnfs_unregister_layoutdriver(&filelayout_type);
++}
++
++module_init(nfs4filelayout_init);
++module_exit(nfs4filelayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-31 20:42:05.521233147 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-31 20:42:05.521233147 -0400
+@@ -0,0 +1,635 @@
++/*
++ *  linux/fs/nfs/nfs4filelayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *  Garth Goodson   <Garth.Goodson@netapp.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/hash.h>
++
++#include <linux/nfs4.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_xdr.h>
++
++#include <asm/div64.h>
++
++#include <linux/utsname.h>
++#include <linux/vmalloc.h>
++#include <linux/nfs4_pnfs.h>
++#include "nfs4filelayout.h"
++#include "internal.h"
++#include "nfs4_fs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
++
++DEFINE_SPINLOCK(nfs4_ds_cache_lock);
++static LIST_HEAD(nfs4_data_server_cache);
++
++void
++print_ds(struct nfs4_pnfs_ds *ds)
++{
++	if (ds == NULL) {
++		dprintk("%s NULL device \n", __func__);
++		return;
++	}
++	dprintk("        ip_addr %x\n", ntohl(ds->ds_ip_addr));
++	dprintk("        port %hu\n", ntohs(ds->ds_port));
++	dprintk("        client %p\n", ds->ds_clp);
++	dprintk("        ref count %d\n", atomic_read(&ds->ds_count));
++	if (ds->ds_clp)
++		dprintk("        cl_exchange_flags %x\n",
++					    ds->ds_clp->cl_exchange_flags);
++	dprintk("        ip:port %s\n", ds->r_addr);
++}
++
++void
++print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	int i;
++
++	dprintk("%s dsaddr->ds_num %d\n", __func__,
++		dsaddr->ds_num);
++	for (i = 0; i < dsaddr->ds_num; i++)
++		print_ds(dsaddr->ds_list[i]);
++}
++
++/* Debugging function assuming a 64bit major/minor split of the deviceid */
++char *
++deviceid_fmt(const struct pnfs_deviceid *dev_id)
++{
++	static char buf[17];
++	uint32_t *p = (uint32_t *)dev_id->data;
++	uint64_t major, minor;
++
++	p = xdr_decode_hyper(p, &major);
++	p = xdr_decode_hyper(p, &minor);
++
++	sprintf(buf, "%08llu %08llu", major, minor);
++	return buf;
++}
++
++/* nfs4_ds_cache_lock is held */
++static inline struct nfs4_pnfs_ds *
++_data_server_lookup(u32 ip_addr, u32 port)
++{
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
++			ntohl(ip_addr), ntohs(port));
++
++	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
++		if (ds->ds_ip_addr == ip_addr &&
++		    ds->ds_port == port) {
++			return ds;
++		}
++	}
++	return NULL;
++}
++
++/* Create an rpc to the data server defined in 'dev_list' */
++static int
++nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
++{
++	struct nfs_server	*tmp;
++	struct sockaddr_in	sin;
++	struct rpc_clnt 	*mds_clnt = mds_srv->client;
++	struct nfs_client	*clp = mds_srv->nfs_client;
++	struct sockaddr		*mds_addr;
++	int err = 0;
++
++	dprintk("--> %s ip:port %s au_flavor %d\n", __func__,
++		ds->r_addr, mds_clnt->cl_auth->au_flavor);
++
++	sin.sin_family = AF_INET;
++	sin.sin_addr.s_addr = ds->ds_ip_addr;
++	sin.sin_port = ds->ds_port;
++
++	/*
++	 * If this DS is also the MDS, use the MDS session only if the
++	 * MDS exchangeid flags show the EXCHGID4_FLAG_USE_PNFS_DS pNFS role.
++	 */
++	mds_addr = (struct sockaddr *)&clp->cl_addr;
++	if (nfs_sockaddr_cmp((struct sockaddr *)&sin, mds_addr)) {
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++			printk(KERN_INFO "ip:port %s is not a pNFS Data "
++				"Server\n", ds->r_addr);
++			err = -ENODEV;
++		} else {
++			atomic_inc(&clp->cl_count);
++			ds->ds_clp = clp;
++			dprintk("%s Using MDS Session for DS\n", __func__);
++		}
++		goto out;
++	}
++
++	/* Temporay server for nfs4_set_client */
++	tmp = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
++	if (!tmp)
++		goto out;
++
++	/*
++	 * Set a retrans, timeout interval, and authflavor equual to the MDS
++	 * values. Use the MDS nfs_client cl_ipaddr field so as to use the
++	 * same co_ownerid as the MDS.
++	 */
++	err = nfs4_set_client(tmp,
++			      mds_srv->nfs_client->cl_hostname,
++			      (struct sockaddr *)&sin,
++			      sizeof(struct sockaddr),
++			      mds_srv->nfs_client->cl_ipaddr,
++			      mds_clnt->cl_auth->au_flavor,
++			      IPPROTO_TCP,
++			      mds_clnt->cl_xprt->timeout,
++			      1 /* minorversion */);
++	if (err < 0)
++		goto out_free;
++
++	clp = tmp->nfs_client;
++
++	/* Ask for only the EXCHGID4_FLAG_USE_PNFS_DS pNFS role */
++	dprintk("%s EXCHANGE_ID for clp %p\n", __func__, clp);
++	clp->cl_exchange_flags = EXCHGID4_FLAG_USE_PNFS_DS;
++
++	err = nfs4_recover_expired_lease(clp);
++	if (!err)
++		err = nfs4_check_client_ready(clp);
++	if (err)
++		goto out_put;
++
++	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++		printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
++			ds->r_addr);
++		err = -ENODEV;
++		goto out_put;
++	}
++	/*
++	 * Mask the (possibly) returned EXCHGID4_FLAG_USE_PNFS_MDS pNFS role
++	 * The is_ds_only_session depends on this.
++	 */
++	clp->cl_exchange_flags &= ~EXCHGID4_FLAG_USE_PNFS_MDS;
++	/*
++	 * Set DS lease equal to the MDS lease, renewal is scheduled in
++	 * create_session
++	 */
++	spin_lock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
++	spin_unlock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_last_renewal = jiffies;
++
++	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
++	ds->ds_clp = clp;
++
++	dprintk("%s: ip=%x, port=%hu, rpcclient %p\n", __func__,
++				ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
++				clp->cl_rpcclient);
++out_free:
++	kfree(tmp);
++out:
++	dprintk("%s Returns %d\n", __func__, err);
++	return err;
++out_put:
++	nfs_put_client(clp);
++	goto out_free;
++}
++
++static void
++destroy_ds(struct nfs4_pnfs_ds *ds)
++{
++	dprintk("--> %s\n", __func__);
++	print_ds(ds);
++
++	if (ds->ds_clp)
++		nfs_put_client(ds->ds_clp);
++	kfree(ds);
++}
++
++static void
++nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	struct nfs4_pnfs_ds *ds;
++	int i;
++
++	dprintk("%s: device id=%s\n", __func__,
++		deviceid_fmt(&dsaddr->deviceid.de_id));
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		ds = dsaddr->ds_list[i];
++		if (ds != NULL) {
++			if (atomic_dec_and_lock(&ds->ds_count,
++						&nfs4_ds_cache_lock)) {
++				list_del_init(&ds->ds_node);
++				spin_unlock(&nfs4_ds_cache_lock);
++				destroy_ds(ds);
++			}
++		}
++	}
++	kfree(dsaddr->stripe_indices);
++	kfree(dsaddr);
++}
++
++void
++nfs4_fl_free_deviceid_callback(struct kref *kref)
++{
++	struct nfs4_deviceid *device =
++		container_of(kref, struct nfs4_deviceid, de_kref);
++	struct nfs4_file_layout_dsaddr *dsaddr =
++		container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
++
++	nfs4_fl_free_deviceid(dsaddr);
++}
++
++static void
++nfs4_pnfs_ds_add(struct inode *inode, struct nfs4_pnfs_ds **dsp,
++		 u32 ip_addr, u32 port, char *r_addr, int len)
++{
++	struct nfs4_pnfs_ds *tmp_ds, *ds;
++
++	*dsp = NULL;
++
++	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
++	if (!ds)
++		return;
++
++	spin_lock(&nfs4_ds_cache_lock);
++	tmp_ds = _data_server_lookup(ip_addr, port);
++	if (tmp_ds == NULL) {
++		ds->ds_ip_addr = ip_addr;
++		ds->ds_port = port;
++		strncpy(ds->r_addr, r_addr, len);
++		atomic_set(&ds->ds_count, 1);
++		INIT_LIST_HEAD(&ds->ds_node);
++		ds->ds_clp = NULL;
++		list_add(&ds->ds_node, &nfs4_data_server_cache);
++		*dsp = ds;
++		dprintk("%s add new data server ip 0x%x\n", __func__,
++				ds->ds_ip_addr);
++		spin_unlock(&nfs4_ds_cache_lock);
++	} else {
++		atomic_inc(&tmp_ds->ds_count);
++		*dsp = tmp_ds;
++		dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
++				__func__, tmp_ds->ds_ip_addr,
++				atomic_read(&tmp_ds->ds_count));
++		spin_unlock(&nfs4_ds_cache_lock);
++		kfree(ds);
++	}
++}
++
++static struct nfs4_pnfs_ds *
++decode_and_add_ds(uint32_t **pp, struct inode *inode)
++{
++	struct nfs4_pnfs_ds *ds = NULL;
++	char r_addr[29]; /* max size of ip/port string */
++	int len;
++	u32 ip_addr, port;
++	int tmp[6];
++	uint32_t *p = *pp;
++
++	dprintk("%s enter\n", __func__);
++	/* check and skip r_netid */
++	len = be32_to_cpup(p++);
++	/* "tcp" */
++	if (len != 3) {
++		printk("%s: ERROR: non TCP r_netid len %d\n",
++			__func__, len);
++		goto out_err;
++	}
++	/*
++	 * Read the bytes into a temporary buffer
++	 * XXX: should probably sanity check them
++	 */
++	tmp[0] = be32_to_cpup(p++);
++
++	len = be32_to_cpup(p++);
++	if (len >= sizeof(r_addr)) {
++		printk("%s: ERROR: Device ip/port too long (%d)\n",
++			__func__, len);
++		goto out_err;
++	}
++	memcpy(r_addr, p, len);
++	p += XDR_QUADLEN(len);
++	*pp = p;
++	r_addr[len] = '\0';
++	sscanf(r_addr, "%d.%d.%d.%d.%d.%d", &tmp[0], &tmp[1],
++	       &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
++	ip_addr = htonl((tmp[0]<<24) | (tmp[1]<<16) | (tmp[2]<<8) | (tmp[3]));
++	port = htons((tmp[4] << 8) | (tmp[5]));
++
++	nfs4_pnfs_ds_add(inode, &ds, ip_addr, port, r_addr, len);
++
++	dprintk("%s: addr:port string = %s\n", __func__, r_addr);
++	return ds;
++out_err:
++	dprintk("%s returned NULL\n", __func__);
++	return NULL;
++}
++
++/* Decode opaque device data and return the result */
++static struct nfs4_file_layout_dsaddr*
++decode_device(struct inode *ino, struct pnfs_device *pdev)
++{
++	int i, dummy;
++	u32 cnt, num;
++	u8 *indexp;
++	uint32_t *p = (u32 *)pdev->area, *indicesp;
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	/* Get the stripe count (number of stripe index) */
++	cnt = be32_to_cpup(p++);
++	dprintk("%s stripe count  %d\n", __func__, cnt);
++	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
++		printk(KERN_WARNING "%s: stripe count %d greater than "
++		       "supported maximum %d\n", __func__,
++			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
++		goto out_err;
++	}
++
++	/* Check the multipath list count */
++	indicesp = p;
++	p += XDR_QUADLEN(cnt << 2);
++	num = be32_to_cpup(p++);
++	dprintk("%s ds_num %u\n", __func__, num);
++	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
++		printk(KERN_WARNING "%s: multipath count %d greater than "
++			"supported maximum %d\n", __func__,
++			num, NFS4_PNFS_MAX_MULTI_CNT);
++		goto out_err;
++	}
++	dsaddr = kzalloc(sizeof(*dsaddr) +
++			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
++			GFP_KERNEL);
++	if (!dsaddr)
++		goto out_err;
++
++	dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
++	if (!dsaddr->stripe_indices)
++		goto out_err_free;
++
++	dsaddr->stripe_count = cnt;
++	dsaddr->ds_num = num;
++
++	memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id,
++	       NFS4_PNFS_DEVICEID4_SIZE);
++
++	/* Go back an read stripe indices */
++	p = indicesp;
++	indexp = &dsaddr->stripe_indices[0];
++	for (i = 0; i < dsaddr->stripe_count; i++) {
++		dummy = be32_to_cpup(p++);
++		*indexp = dummy; /* bound by NFS4_PNFS_MAX_MULTI_CNT */
++		indexp++;
++	}
++	/* Skip already read multipath list count */
++	p++;
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		int j;
++
++		dummy = be32_to_cpup(p++); /* multipath count */
++		if (dummy > 1) {
++			printk(KERN_WARNING
++			       "%s: Multipath count %d not supported, "
++			       "skipping all greater than 1\n", __func__,
++				dummy);
++		}
++		for (j = 0; j < dummy; j++) {
++			if (j == 0) {
++				dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
++				if (dsaddr->ds_list[i] == NULL)
++					goto out_err_free;
++			} else {
++				u32 len;
++				/* skip extra multipath */
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				continue;
++			}
++		}
++	}
++	nfs4_init_deviceid_node(&dsaddr->deviceid);
++
++	return dsaddr;
++
++out_err_free:
++	nfs4_fl_free_deviceid(dsaddr);
++out_err:
++	dprintk("%s ERROR: returning NULL\n", __func__);
++	return NULL;
++}
++
++/*
++ * Decode the opaque device specified in 'dev'
++ * and add it to the list of available devices.
++ * If the deviceid is already cached, nfs4_add_deviceid will return
++ * a pointer to the cached struct and throw away the new.
++ */
++static struct nfs4_file_layout_dsaddr*
++decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
++{
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	struct nfs4_deviceid *d;
++
++	dsaddr = decode_device(inode, dev);
++	if (!dsaddr) {
++		printk(KERN_WARNING "%s: Could not decode or add device\n",
++			__func__);
++		return NULL;
++	}
++
++	d = nfs4_add_get_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
++			      &dsaddr->deviceid);
++
++	return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Retrieve the information for dev_id, add it to the list
++ * of available devices, and return it.
++ */
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id)
++{
++	struct pnfs_device *pdev = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
++	int rc, i;
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
++		__func__, inode, max_resp_sz, max_pages);
++
++	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
++	if (pdev == NULL)
++		return NULL;
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(pdev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set pdev->area */
++	pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!pdev->area)
++		goto out_free;
++
++	memcpy(&pdev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE);
++	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
++	pdev->pages = pages;
++	pdev->pgbase = 0;
++	pdev->pglen = PAGE_SIZE * max_pages;
++	pdev->mincount = 0;
++	/* TODO: Update types when CB_NOTIFY_DEVICEID is available */
++	pdev->dev_notify_types = 0;
++
++	rc = pnfs_callback_ops->nfs_getdeviceinfo(server, pdev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	/*
++	 * Found new device, need to decode it and then add it to the
++	 * list of known devices for this mountpoint.
++	 */
++	dsaddr = decode_and_add_device(inode, pdev);
++out_free:
++	if (pdev->area != NULL)
++		vunmap(pdev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(pdev);
++	dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
++	return dsaddr;
++}
++
++struct nfs4_file_layout_dsaddr *
++nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++
++	d = nfs4_find_get_deviceid(clp->cl_devid_cache, id);
++	dprintk("%s device id (%s) nfs4_deviceid %p\n", __func__,
++		deviceid_fmt(id), d);
++	return (d == NULL) ? NULL :
++		container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
++ * Then: ((res + fsi) % dsaddr->stripe_count)
++ */
++static inline u32
++_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u64 tmp;
++
++	tmp = offset - flseg->pattern_offset;
++	do_div(tmp, flseg->stripe_unit);
++	tmp += flseg->first_stripe_index;
++	return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
++}
++
++u32
++nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	u32 j;
++
++	j = _nfs4_fl_calc_j_index(lseg, offset);
++	return FILE_DSADDR(lseg)->stripe_indices[j];
++}
++
++struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u32 i;
++
++	if (flseg->stripe_type == STRIPE_SPARSE) {
++		if (flseg->num_fh == 1)
++			i = 0;
++		else if (flseg->num_fh == 0)
++			return NULL;
++		else
++			i = nfs4_fl_calc_ds_index(lseg, offset);
++	} else
++		i = _nfs4_fl_calc_j_index(lseg, offset);
++	return &flseg->fh_array[i];
++}
++
++struct nfs4_pnfs_ds *
++nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	dsaddr = FILE_DSADDR(lseg);
++	if (dsaddr->ds_list[ds_idx] == NULL) {
++		printk(KERN_ERR "%s: No data server for device id (%s)!!\n",
++			__func__, deviceid_fmt(&flseg->dev_id));
++		return NULL;
++	}
++
++	if (!dsaddr->ds_list[ds_idx]->ds_clp) {
++		int err;
++
++		err = nfs4_pnfs_ds_create(PNFS_NFS_SERVER(lseg->layout),
++					  dsaddr->ds_list[ds_idx]);
++		if (err) {
++			printk(KERN_ERR "%s nfs4_pnfs_ds_create error %d\n",
++			       __func__, err);
++			return NULL;
++		}
++	}
++	dprintk("%s: dev_id=%s, ds_idx=%u\n",
++		__func__, deviceid_fmt(&flseg->dev_id), ds_idx);
++
++	return dsaddr->ds_list[ds_idx];
++}
++
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-31 20:42:05.520222923 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-31 20:42:05.520222923 -0400
+@@ -0,0 +1,96 @@
++/*
++ *  pnfs_nfs4filelayout.h
++ *
++ *  NFSv4 file layout driver data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_NFS4FILELAYOUT_H
++#define FS_NFS_NFS4FILELAYOUT_H
++
++#include <linux/kref.h>
++#include <linux/nfs4_pnfs.h>
++
++#define NFS4_PNFS_DEV_HASH_BITS 5
++#define NFS4_PNFS_DEV_HASH_SIZE (1 << NFS4_PNFS_DEV_HASH_BITS)
++#define NFS4_PNFS_DEV_HASH_MASK (NFS4_PNFS_DEV_HASH_SIZE - 1)
++
++#define NFS4_PNFS_MAX_STRIPE_CNT 4096
++#define NFS4_PNFS_MAX_MULTI_CNT  64 /* 256 fit into a u8 stripe_index */
++#define NFS4_PNFS_MAX_MULTI_DS   2
++
++#define FILE_DSADDR(lseg) (container_of(lseg->deviceid, \
++					struct nfs4_file_layout_dsaddr, \
++					deviceid))
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++/* Individual ip address */
++struct nfs4_pnfs_ds {
++	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
++	u32 			ds_ip_addr;
++	u32 			ds_port;
++	struct nfs_client	*ds_clp;
++	atomic_t		ds_count;
++	char r_addr[29];
++};
++
++struct nfs4_file_layout_dsaddr {
++	struct nfs4_deviceid	deviceid;
++	u32 			stripe_count;
++	u8			*stripe_indices;
++	u32			ds_num;
++	struct nfs4_pnfs_ds	*ds_list[1];
++};
++
++struct nfs4_pnfs_dev_hlist {
++	rwlock_t		dev_lock;
++	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH_SIZE];
++};
++
++struct nfs4_filelayout_segment {
++	u32 stripe_type;
++	u32 commit_through_mds;
++	u32 stripe_unit;
++	u32 first_stripe_index;
++	u64 pattern_offset;
++	struct pnfs_deviceid dev_id;
++	unsigned int num_fh;
++	struct nfs_fh *fh_array;
++};
++
++struct nfs4_filelayout {
++	struct pnfs_layout_hdr fl_layout;
++	u32 stripe_unit;
++};
++
++extern struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
++
++static inline struct nfs4_filelayout *
++FILE_LO(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct nfs4_filelayout, fl_layout);
++}
++
++extern struct pnfs_client_operations *pnfs_callback_ops;
++
++extern void nfs4_fl_free_deviceid_callback(struct kref *);
++extern void print_ds(struct nfs4_pnfs_ds *ds);
++char *deviceid_fmt(const struct pnfs_deviceid *dev_id);
++u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset);
++struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
++					u32 ds_idx);
++extern struct nfs4_file_layout_dsaddr *
++nfs4_fl_find_get_deviceid(struct nfs_client *, struct pnfs_deviceid *dev_id);
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id);
++
++#endif /* FS_NFS_NFS4FILELAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-31 20:41:19.154160465 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-31 20:42:05.519163219 -0400
+@@ -45,8 +45,28 @@ enum nfs4_client_state {
+ 	NFS4CLNT_RECLAIM_NOGRACE,
+ 	NFS4CLNT_DELEGRETURN,
+ 	NFS4CLNT_SESSION_RESET,
+-	NFS4CLNT_SESSION_DRAINING,
+ 	NFS4CLNT_RECALL_SLOT,
++	NFS4CLNT_LAYOUT_RECALL,
++};
++
++enum nfs4_session_state {
++	NFS4_SESSION_INITING,
++	NFS4_SESSION_DRAINING,
++};
++
++struct nfs4_minor_version_ops {
++	u32	minor_version;
++
++	int	(*call_sync)(struct nfs_server *server,
++			struct rpc_message *msg,
++			struct nfs4_sequence_args *args,
++			struct nfs4_sequence_res *res,
++			int cache_reply);
++	int	(*validate_stateid)(struct nfs_delegation *,
++			const nfs4_stateid *);
++	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
++	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
++	const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ };
+ 
+ /*
+@@ -89,7 +109,6 @@ struct nfs_unique_id {
+  */
+ struct nfs4_state_owner {
+ 	struct nfs_unique_id so_owner_id;
+-	struct nfs_client    *so_client;
+ 	struct nfs_server    *so_server;
+ 	struct rb_node	     so_client_node;
+ 
+@@ -99,7 +118,6 @@ struct nfs4_state_owner {
+ 	atomic_t	     so_count;
+ 	unsigned long	     so_flags;
+ 	struct list_head     so_states;
+-	struct list_head     so_delegations;
+ 	struct nfs_seqid_counter so_seqid;
+ 	struct rpc_sequence  so_sequence;
+ };
+@@ -125,10 +143,20 @@ enum {
+  * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+  */
+ 
++struct nfs4_lock_owner {
++	unsigned int lo_type;
++#define NFS4_ANY_LOCK_TYPE	(0U)
++#define NFS4_FLOCK_LOCK_TYPE	(1U << 0)
++#define NFS4_POSIX_LOCK_TYPE	(1U << 1)
++	union {
++		fl_owner_t posix_owner;
++		pid_t flock_owner;
++	} lo_u;
++};
++
+ struct nfs4_lock_state {
+ 	struct list_head	ls_locks;	/* Other lock stateids */
+ 	struct nfs4_state *	ls_state;	/* Pointer to open state */
+-	fl_owner_t		ls_owner;	/* POSIX lock owner */
+ #define NFS_LOCK_INITIALIZED 1
+ 	int			ls_flags;
+ 	struct nfs_seqid_counter	ls_seqid;
+@@ -136,6 +164,7 @@ struct nfs4_lock_state {
+ 	struct nfs_unique_id	ls_id;
+ 	nfs4_stateid		ls_stateid;
+ 	atomic_t		ls_count;
++	struct nfs4_lock_owner	ls_owner;
+ };
+ 
+ /* bits for nfs4_state->flags */
+@@ -219,22 +248,34 @@ extern int nfs4_open_revalidate(struct i
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
+ 		struct nfs4_fs_locations *fs_locations, struct page *page);
++extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+ 
+-extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
+-extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
+ #if defined(CONFIG_NFS_V4_1)
+-extern int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return server->nfs_client->cl_session;
++}
++
++extern int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task);
+ extern void nfs4_destroy_session(struct nfs4_session *session);
+ extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
++extern int nfs4_proc_exchange_id(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_create_session(struct nfs_client *);
+ extern int nfs4_proc_destroy_session(struct nfs4_session *);
+ extern int nfs4_init_session(struct nfs_server *server);
+ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
+ 		struct nfs_fsinfo *fsinfo);
+ #else /* CONFIG_NFS_v4_1 */
+-static inline int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return NULL;
++}
++
++static inline int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task)
+ {
+@@ -247,12 +288,12 @@ static inline int nfs4_init_session(stru
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+-extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
++extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
+ 
+ extern const u32 nfs4_fattr_bitmap[2];
+ extern const u32 nfs4_statfs_bitmap[2];
+ extern const u32 nfs4_pathconf_bitmap[2];
+-extern const u32 nfs4_fsinfo_bitmap[2];
++extern const u32 nfs4_fsinfo_bitmap[3];
+ extern const u32 nfs4_fs_locations_bitmap[2];
+ 
+ /* nfs4renewd.c */
+@@ -284,7 +325,7 @@ extern void nfs41_handle_sequence_flag_e
+ extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
++extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+ 
+ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+@@ -293,6 +334,7 @@ extern void nfs_increment_lock_seqid(int
+ extern void nfs_release_seqid(struct nfs_seqid *seqid);
+ extern void nfs_free_seqid(struct nfs_seqid *seqid);
+ 
++/* write.c */
+ extern const nfs4_stateid zero_stateid;
+ 
+ /* nfs4xdr.c */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-31 20:41:19.157140145 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-31 20:42:05.524099925 -0400
+@@ -49,12 +49,14 @@
+ #include <linux/mount.h>
+ #include <linux/module.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "delegation.h"
+ #include "internal.h"
+ #include "iostat.h"
+ #include "callback.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PROC
+ 
+@@ -67,7 +69,7 @@ struct nfs4_opendata;
+ static int _nfs4_proc_open(struct nfs4_opendata *data);
+ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
+ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
++static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, struct nfs_client *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+@@ -125,11 +127,16 @@ const u32 nfs4_pathconf_bitmap[2] = {
+ 	0
+ };
+ 
+-const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
++const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
+ 			| FATTR4_WORD0_MAXREAD
+ 			| FATTR4_WORD0_MAXWRITE
+ 			| FATTR4_WORD0_LEASE_TIME,
++#ifdef CONFIG_NFS_V4_1
++			FATTR4_WORD1_FS_LAYOUT_TYPES,
++			FATTR4_WORD2_LAYOUT_BLKSIZE
++#else /* CONFIG_NFS_V4_1 */
+ 			0
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ const u32 nfs4_fs_locations_bitmap[2] = {
+@@ -356,7 +363,7 @@ static void nfs41_check_drain_session_co
+ {
+ 	struct rpc_task *task;
+ 
+-	if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
++	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
+ 		if (task)
+ 			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+@@ -370,12 +377,11 @@ static void nfs41_check_drain_session_co
+ 	complete(&ses->complete);
+ }
+ 
+-static void nfs41_sequence_free_slot(const struct nfs_client *clp,
+-			      struct nfs4_sequence_res *res)
++static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+ {
+ 	struct nfs4_slot_table *tbl;
+ 
+-	tbl = &clp->cl_session->fc_slot_table;
++	tbl = &res->sr_session->fc_slot_table;
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+ 		/* just wake up the next guy waiting since
+ 		 * we may have not consumed a slot after all */
+@@ -385,18 +391,17 @@ static void nfs41_sequence_free_slot(con
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+ 	nfs4_free_slot(tbl, res->sr_slotid);
+-	nfs41_check_drain_session_complete(clp->cl_session);
++	nfs41_check_drain_session_complete(res->sr_session);
+ 	spin_unlock(&tbl->slot_tbl_lock);
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ }
+ 
+-static void nfs41_sequence_done(struct nfs_client *clp,
+-				struct nfs4_sequence_res *res,
+-				int rpc_status)
++static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+ {
+ 	unsigned long timestamp;
+ 	struct nfs4_slot_table *tbl;
+ 	struct nfs4_slot *slot;
++	struct nfs_client *clp;
+ 
+ 	/*
+ 	 * sr_status remains 1 if an RPC level error occurred. The server
+@@ -411,13 +416,16 @@ static void nfs41_sequence_done(struct n
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+ 		goto out;
+ 
++	tbl = &res->sr_session->fc_slot_table;
++	slot = tbl->slots + res->sr_slotid;
++
+ 	/* Check the SEQUENCE operation status */
+-	if (res->sr_status == 0) {
+-		tbl = &clp->cl_session->fc_slot_table;
+-		slot = tbl->slots + res->sr_slotid;
++	switch (res->sr_status) {
++	case 0:
+ 		/* Update the slot's sequence and clientid lease timer */
+ 		++slot->seq_nr;
+ 		timestamp = res->sr_renewal_time;
++		clp = res->sr_session->clp;
+ 		spin_lock(&clp->cl_lock);
+ 		if (time_before(clp->cl_last_renewal, timestamp))
+ 			clp->cl_last_renewal = timestamp;
+@@ -425,11 +433,39 @@ static void nfs41_sequence_done(struct n
+ 		/* Check sequence flags */
+ 		if (atomic_read(&clp->cl_count) > 1)
+ 			nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
++		break;
++	case -NFS4ERR_DELAY:
++		/* The server detected a resend of the RPC call and
++		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
++		 * of RFC5661.
++		 */
++		dprintk("%s: slot=%d seq=%d: Operation in progress\n",
++				__func__, res->sr_slotid, slot->seq_nr);
++		goto out_retry;
++	default:
++		/* Just update the slot sequence no. */
++		++slot->seq_nr;
+ 	}
+ out:
+ 	/* The session may be reset by one of the error handlers. */
+ 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
+-	nfs41_sequence_free_slot(clp, res);
++	nfs41_sequence_free_slot(res);
++	return 1;
++out_retry:
++	rpc_delay(task, NFS4_POLL_RETRY_MAX);
++	rpc_restart_call(task);
++	/* FIXME: rpc_restart_call() should be made to return success/fail */
++	if (RPC_ASSASSINATED(task))
++		goto out;
++	return 0;
++}
++
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	if (res->sr_session == NULL)
++		return 1;
++	return nfs41_sequence_done(task, res);
+ }
+ 
+ /*
+@@ -480,12 +516,11 @@ static int nfs41_setup_sequence(struct n
+ 	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+ 		return 0;
+ 
+-	memset(res, 0, sizeof(*res));
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ 	tbl = &session->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
++	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+ 	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+ 		/*
+ 		 * The state manager will wait until the slot table is empty.
+@@ -525,6 +560,7 @@ static int nfs41_setup_sequence(struct n
+ 	res->sr_session = session;
+ 	res->sr_slotid = slotid;
+ 	res->sr_renewal_time = jiffies;
++	res->sr_status_flags = 0;
+ 	/*
+ 	 * sr_status is only set in decode_sequence, and so will remain
+ 	 * set to 1 if an rpc level failure occurs.
+@@ -533,33 +569,36 @@ static int nfs41_setup_sequence(struct n
+ 	return 0;
+ }
+ 
+-int nfs4_setup_sequence(struct nfs_client *clp,
++int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 			struct nfs4_sequence_args *args,
+ 			struct nfs4_sequence_res *res,
+ 			int cache_reply,
+ 			struct rpc_task *task)
+ {
++	struct nfs4_session *session = nfs4_get_session(server);
+ 	int ret = 0;
+ 
++	if (ds_session)
++		session = ds_session;
++	if (session == NULL) {
++		args->sa_session = NULL;
++		res->sr_session = NULL;
++		goto out;
++	}
++
+ 	dprintk("--> %s clp %p session %p sr_slotid %d\n",
+-		__func__, clp, clp->cl_session, res->sr_slotid);
++		__func__, session->clp, session, res->sr_slotid);
+ 
+-	if (!nfs4_has_session(clp))
+-		goto out;
+-	ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
++	ret = nfs41_setup_sequence(session, args, res, cache_reply,
+ 				   task);
+-	if (ret && ret != -EAGAIN) {
+-		/* terminate rpc task */
+-		task->tk_status = ret;
+-		task->tk_action = NULL;
+-	}
+ out:
+ 	dprintk("<-- %s status=%d\n", __func__, ret);
+ 	return ret;
+ }
+ 
+ struct nfs41_call_sync_data {
+-	struct nfs_client *clp;
++	const struct nfs_server *seq_server;
+ 	struct nfs4_sequence_args *seq_args;
+ 	struct nfs4_sequence_res *seq_res;
+ 	int cache_reply;
+@@ -569,9 +608,9 @@ static void nfs41_call_sync_prepare(stru
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	dprintk("--> %s data->clp->cl_session %p\n", __func__,
+-		data->clp->cl_session);
+-	if (nfs4_setup_sequence(data->clp, data->seq_args,
++	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
++
++	if (nfs4_setup_sequence(data->seq_server, NULL, data->seq_args,
+ 				data->seq_res, data->cache_reply, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -587,7 +626,7 @@ static void nfs41_call_sync_done(struct 
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
++	nfs41_sequence_done(task, data->seq_res);
+ }
+ 
+ struct rpc_call_ops nfs41_call_sync_ops = {
+@@ -600,8 +639,7 @@ struct rpc_call_ops nfs41_call_priv_sync
+ 	.rpc_call_done = nfs41_call_sync_done,
+ };
+ 
+-static int nfs4_call_sync_sequence(struct nfs_client *clp,
+-				   struct rpc_clnt *clnt,
++static int nfs4_call_sync_sequence(struct nfs_server *server,
+ 				   struct rpc_message *msg,
+ 				   struct nfs4_sequence_args *args,
+ 				   struct nfs4_sequence_res *res,
+@@ -611,13 +649,13 @@ static int nfs4_call_sync_sequence(struc
+ 	int ret;
+ 	struct rpc_task *task;
+ 	struct nfs41_call_sync_data data = {
+-		.clp = clp,
++		.seq_server = server,
+ 		.seq_args = args,
+ 		.seq_res = res,
+ 		.cache_reply = cache_reply,
+ 	};
+ 	struct rpc_task_setup task_setup = {
+-		.rpc_client = clnt,
++		.rpc_client = server->client,
+ 		.rpc_message = msg,
+ 		.callback_ops = &nfs41_call_sync_ops,
+ 		.callback_data = &data
+@@ -642,10 +680,15 @@ int _nfs4_call_sync_session(struct nfs_s
+ 			    struct nfs4_sequence_res *res,
+ 			    int cache_reply)
+ {
+-	return nfs4_call_sync_sequence(server->nfs_client, server->client,
+-				       msg, args, res, cache_reply, 0);
++	return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
+ }
+ 
++#else
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	return 1;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ int _nfs4_call_sync(struct nfs_server *server,
+@@ -659,18 +702,9 @@ int _nfs4_call_sync(struct nfs_server *s
+ }
+ 
+ #define nfs4_call_sync(server, msg, args, res, cache_reply) \
+-	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
++	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
+ 			&(res)->seq_res, (cache_reply))
+ 
+-static void nfs4_sequence_done(const struct nfs_server *server,
+-			       struct nfs4_sequence_res *res, int rpc_status)
+-{
+-#ifdef CONFIG_NFS_V4_1
+-	if (nfs4_has_session(server->nfs_client))
+-		nfs41_sequence_done(server->nfs_client, res, rpc_status);
+-#endif /* CONFIG_NFS_V4_1 */
+-}
+-
+ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(dir);
+@@ -745,19 +779,14 @@ static struct nfs4_opendata *nfs4_openda
+ 	p->o_arg.server = server;
+ 	p->o_arg.bitmask = server->attr_bitmask;
+ 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+-	if (flags & O_EXCL) {
+-		if (nfs4_has_persistent_session(server->nfs_client)) {
+-			/* GUARDED */
+-			p->o_arg.u.attrs = &p->attrs;
+-			memcpy(&p->attrs, attrs, sizeof(p->attrs));
+-		} else { /* EXCLUSIVE4_1 */
+-			u32 *s = (u32 *) p->o_arg.u.verifier.data;
+-			s[0] = jiffies;
+-			s[1] = current->pid;
+-		}
+-	} else if (flags & O_CREAT) {
++	if (flags & O_CREAT) {
++		u32 *s;
++
+ 		p->o_arg.u.attrs = &p->attrs;
+ 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
++		s = (u32 *) p->o_arg.u.verifier.data;
++		s[0] = jiffies;
++		s[1] = current->pid;
+ 	}
+ 	p->c_arg.fh = &p->o_res.fh;
+ 	p->c_arg.stateid = &p->o_res.stateid;
+@@ -851,8 +880,10 @@ static void update_open_stateflags(struc
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+ {
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+-	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
++		memcpy(state->stateid.u.data, stateid->u.data,
++		       sizeof(state->stateid.u.data));
++	memcpy(state->open_stateid.u.data, stateid->u.data,
++	       sizeof(state->open_stateid.u.data));
+ 	switch (fmode) {
+ 		case FMODE_READ:
+ 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+@@ -880,7 +911,8 @@ static void __update_open_stateid(struct
+ 	 */
+ 	write_seqlock(&state->seqlock);
+ 	if (deleg_stateid != NULL) {
+-		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
++		memcpy(state->stateid.u.data, deleg_stateid->u.data,
++		       sizeof(state->stateid.u.data));
+ 		set_bit(NFS_DELEGATED_STATE, &state->flags);
+ 	}
+ 	if (open_stateid != NULL)
+@@ -911,7 +943,8 @@ static int update_open_stateid(struct nf
+ 
+ 	if (delegation == NULL)
+ 		delegation = &deleg_cur->stateid;
+-	else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
++	else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
++			NFS4_STATEID_SIZE) != 0)
+ 		goto no_delegation_unlock;
+ 
+ 	nfs_mark_delegation_referenced(deleg_cur);
+@@ -973,7 +1006,8 @@ static struct nfs4_state *nfs4_try_open_
+ 			break;
+ 		}
+ 		/* Save the delegation */
+-		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
++		memcpy(stateid.u.data, delegation->stateid.u.data,
++		       sizeof(stateid.u.data));
+ 		rcu_read_unlock();
+ 		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
+ 		if (ret != 0)
+@@ -1127,10 +1161,13 @@ static int nfs4_open_recover(struct nfs4
+ 	 * Check if we need to update the current stateid.
+ 	 */
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+-	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
++	    memcmp(state->stateid.u.data, state->open_stateid.u.data,
++		   sizeof(state->stateid.u.data)) != 0) {
+ 		write_seqlock(&state->seqlock);
+ 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
++			memcpy(state->stateid.u.data,
++			       state->open_stateid.u.data,
++			       sizeof(state->stateid.u.data));
+ 		write_sequnlock(&state->seqlock);
+ 	}
+ 	return 0;
+@@ -1199,8 +1236,8 @@ static int _nfs4_open_delegation_recall(
+ 	if (IS_ERR(opendata))
+ 		return PTR_ERR(opendata);
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+-	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
+-			sizeof(opendata->o_arg.u.delegation.data));
++	memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
++			sizeof(opendata->o_arg.u.delegation.u.data));
+ 	ret = nfs4_open_recover(opendata, state);
+ 	nfs4_opendata_put(opendata);
+ 	return ret;
+@@ -1258,8 +1295,8 @@ static void nfs4_open_confirm_done(struc
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+-				sizeof(data->o_res.stateid.data));
++		memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
++				sizeof(data->o_res.stateid.u.data));
+ 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 		renew_lease(data->o_res.server, data->timestamp);
+ 		data->rpc_done = 1;
+@@ -1356,13 +1393,13 @@ static void nfs4_open_prepare(struct rpc
+ 	}
+ 	/* Update sequence id. */
+ 	data->o_arg.id = sp->so_owner_id.id;
+-	data->o_arg.clientid = sp->so_client->cl_clientid;
++	data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
+ 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
+ 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
+ 	}
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
++	if (nfs4_setup_sequence(data->o_arg.server, NULL,
+ 				&data->o_arg.seq_args,
+ 				&data->o_res.seq_res, 1, task))
+ 		return;
+@@ -1385,8 +1422,8 @@ static void nfs4_open_done(struct rpc_ta
+ 
+ 	data->rpc_status = task->tk_status;
+ 
+-	nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->o_res.seq_res))
++		return;
+ 
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+@@ -1539,9 +1576,8 @@ static int _nfs4_proc_open(struct nfs4_o
+ 	return 0;
+ }
+ 
+-static int nfs4_recover_expired_lease(struct nfs_server *server)
++int nfs4_recover_expired_lease(struct nfs_client *clp)
+ {
+-	struct nfs_client *clp = server->nfs_client;
+ 	unsigned int loop;
+ 	int ret;
+ 
+@@ -1557,6 +1593,7 @@ static int nfs4_recover_expired_lease(st
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(nfs4_recover_expired_lease);
+ 
+ /*
+  * OPEN_EXPIRED:
+@@ -1646,7 +1683,7 @@ static int _nfs4_do_open(struct inode *d
+ 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+ 		goto out_err;
+ 	}
+-	status = nfs4_recover_expired_lease(server);
++	status = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (status != 0)
+ 		goto err_put_state_owner;
+ 	if (path->dentry->d_inode != NULL)
+@@ -1773,7 +1810,7 @@ static int _nfs4_do_setattr(struct inode
+ 	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ 		/* Use that stateid */
+ 	} else if (state != NULL) {
+-		nfs4_copy_stateid(&arg.stateid, state, current->files);
++		nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
+ 	} else
+ 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+ 
+@@ -1838,7 +1875,8 @@ static void nfs4_close_done(struct rpc_t
+ 	struct nfs4_state *state = calldata->state;
+ 	struct nfs_server *server = NFS_SERVER(calldata->inode);
+ 
+-	nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+         /* hmm. we are done with the inode, and in the process of freeing
+@@ -1858,7 +1896,7 @@ static void nfs4_close_done(struct rpc_t
+ 			if (calldata->arg.fmode == 0)
+ 				break;
+ 		default:
+-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
++			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ 				rpc_restart_call_prepare(task);
+ 	}
+ 	nfs_release_seqid(calldata->arg.seqid);
+@@ -1903,7 +1941,7 @@ static void nfs4_close_prepare(struct rp
+ 
+ 	nfs_fattr_init(calldata->res.fattr);
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
++	if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), NULL,
+ 				&calldata->arg.seq_args, &calldata->res.seq_res,
+ 				1, task))
+ 		return;
+@@ -2323,6 +2361,9 @@ nfs4_proc_setattr(struct dentry *dentry,
+ 	struct nfs4_state *state = NULL;
+ 	int status;
+ 
++	if (pnfs_ld_layoutret_on_setattr(inode))
++		pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	nfs_fattr_init(fattr);
+ 	
+ 	/* Search for an existing open(O_WRITE) file */
+@@ -2648,8 +2689,9 @@ static int nfs4_proc_unlink_done(struct 
+ {
+ 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
+ 
+-	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
+-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
++	if (!nfs4_sequence_done(task, &res->seq_res))
++		return 0;
++	if (nfs4_async_handle_error(task, res->server, NULL, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+ 	nfs_post_op_update_inode(dir, res->dir_attr);
+@@ -3090,18 +3132,31 @@ static int nfs4_proc_pathconf(struct nfs
+ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+ {
+ 	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
+-	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
+ 
+-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, server->nfs_client);
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
+ 
+ 	nfs_invalidate_atime(data->inode);
+-	if (task->tk_status > 0)
++	if (task->tk_status > 0 && client == server->nfs_client)
+ 		renew_lease(server, data->timestamp);
+ 	return 0;
+ }
+@@ -3112,20 +3167,56 @@ static void nfs4_proc_read_setup(struct 
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ }
+ 
++static void pnfs4_update_write_done(struct nfs_inode *nfsi, struct nfs_write_data *data)
++{
++#ifdef CONFIG_NFS_V4_1
++	pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++	pnfs_need_layoutcommit(nfsi, data->args.context);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
+ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
++	struct nfs_server *server = NFS_SERVER(inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++#ifdef CONFIG_NFS_V4_1
++	/* restore original count after retry? */
++	if (data->pdata.orig_count) {
++		dprintk("%s: restoring original count %u\n", __func__,
++			data->pdata.orig_count);
++		data->args.count = data->pdata.orig_count;
++	}
++
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
++
++	/*
++	 * MDS write: renew lease
++	 * DS write: update lastbyte written, mark for layout commit
++	 */
+ 	if (task->tk_status >= 0) {
+-		renew_lease(NFS_SERVER(inode), data->timestamp);
+-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		if (client == server->nfs_client) {
++			renew_lease(server, data->timestamp);
++			nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		} else
++			pnfs4_update_write_done(NFS_I(inode), data);
+ 	}
+ 	return 0;
+ }
+@@ -3138,20 +3229,42 @@ static void nfs4_proc_write_setup(struct
+ 	data->res.server = server;
+ 	data->timestamp   = jiffies;
+ 
++#ifdef CONFIG_NFS_V4_1
++	/* writes to DS use pnfs vector */
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_WRITE];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+ }
+ 
+ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
++	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
++
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS commit\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL, NULL) == -EAGAIN) {
+ 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ 		return -EAGAIN;
+ 	}
+-	nfs_refresh_inode(inode, data->res.fattr);
++	if (client == server->nfs_client)
++		nfs_refresh_inode(inode, data->res.fattr);
+ 	return 0;
+ }
+ 
+@@ -3161,6 +3274,12 @@ static void nfs4_proc_commit_setup(struc
+ 	
+ 	data->args.bitmask = server->cache_consistency_bitmask;
+ 	data->res.server = server;
++#if defined(CONFIG_NFS_V4_1)
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_COMMIT];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
+@@ -3464,9 +3583,12 @@ static int nfs4_proc_set_acl(struct inod
+ }
+ 
+ static int
+-_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
++nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state, struct nfs_client *clp)
+ {
+-	if (!clp || task->tk_status >= 0)
++	if (!clp)
++		clp = server->nfs_client;
++
++	if (task->tk_status >= 0)
+ 		return 0;
+ 	switch(task->tk_status) {
+ 		case -NFS4ERR_ADMIN_REVOKED:
+@@ -3491,8 +3613,9 @@ _nfs4_async_handle_error(struct rpc_task
+ 		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ 		case -NFS4ERR_SEQ_FALSE_RETRY:
+ 		case -NFS4ERR_SEQ_MISORDERED:
+-			dprintk("%s ERROR %d, Reset session\n", __func__,
+-				task->tk_status);
++			dprintk("%s ERROR %d, Reset session. Exchangeid "
++				"flags 0x%x\n", __func__, task->tk_status,
++				clp->cl_exchange_flags);
+ 			nfs4_schedule_state_recovery(clp);
+ 			task->tk_status = 0;
+ 			return -EAGAIN;
+@@ -3512,6 +3635,8 @@ _nfs4_async_handle_error(struct rpc_task
+ 	task->tk_status = nfs4_map_errors(task->tk_status);
+ 	return 0;
+ do_state_recovery:
++	if (is_ds_only_client(clp))
++		return 0;
+ 	rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+ 	nfs4_schedule_state_recovery(clp);
+ 	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+@@ -3520,12 +3645,6 @@ do_state_recovery:
+ 	return -EAGAIN;
+ }
+ 
+-static int
+-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+-{
+-	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+-}
+-
+ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
+ 		unsigned short port, struct rpc_cred *cred,
+ 		struct nfs4_setclientid_res *res)
+@@ -3641,8 +3760,8 @@ static void nfs4_delegreturn_done(struct
+ {
+ 	struct nfs4_delegreturndata *data = calldata;
+ 
+-	nfs4_sequence_done(data->res.server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_STALE_STATEID:
+@@ -3651,8 +3770,8 @@ static void nfs4_delegreturn_done(struct
+ 		renew_lease(data->res.server, data->timestamp);
+ 		break;
+ 	default:
+-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
+-				-EAGAIN) {
++		if (nfs4_async_handle_error(task, data->res.server, NULL, NULL)
++				== -EAGAIN) {
+ 			nfs_restart_rpc(task, data->res.server->nfs_client);
+ 			return;
+ 		}
+@@ -3672,7 +3791,7 @@ static void nfs4_delegreturn_prepare(str
+ 
+ 	d_data = (struct nfs4_delegreturndata *)data;
+ 
+-	if (nfs4_setup_sequence(d_data->res.server->nfs_client,
++	if (nfs4_setup_sequence(d_data->res.server, NULL,
+ 				&d_data->args.seq_args,
+ 				&d_data->res.seq_res, 1, task))
+ 		return;
+@@ -3892,15 +4011,16 @@ static void nfs4_locku_done(struct rpc_t
+ {
+ 	struct nfs4_unlockdata *calldata = data;
+ 
+-	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
+-			   task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	switch (task->tk_status) {
+ 		case 0:
+-			memcpy(calldata->lsp->ls_stateid.data,
+-					calldata->res.stateid.data,
+-					sizeof(calldata->lsp->ls_stateid.data));
++			memcpy(calldata->lsp->ls_stateid.u.data,
++					calldata->res.stateid.u.data,
++					sizeof(calldata->lsp->ls_stateid.u.
++					       data));
+ 			renew_lease(calldata->server, calldata->timestamp);
+ 			break;
+ 		case -NFS4ERR_BAD_STATEID:
+@@ -3909,7 +4029,7 @@ static void nfs4_locku_done(struct rpc_t
+ 		case -NFS4ERR_EXPIRED:
+ 			break;
+ 		default:
+-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
++			if (nfs4_async_handle_error(task, calldata->server, NULL, NULL) == -EAGAIN)
+ 				nfs_restart_rpc(task,
+ 						 calldata->server->nfs_client);
+ 	}
+@@ -3927,7 +4047,7 @@ static void nfs4_locku_prepare(struct rp
+ 		return;
+ 	}
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence(calldata->server->nfs_client,
++	if (nfs4_setup_sequence(calldata->server, NULL,
+ 				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 1, task))
+ 		return;
+@@ -4082,7 +4202,8 @@ static void nfs4_lock_prepare(struct rpc
+ 	} else
+ 		data->arg.new_lock_owner = 0;
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
++	if (nfs4_setup_sequence(data->server, NULL,
++				&data->arg.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -4101,8 +4222,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 
+-	nfs4_sequence_done(data->server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	data->rpc_status = task->tk_status;
+ 	if (RPC_ASSASSINATED(task))
+@@ -4114,8 +4235,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 			goto out;
+ 	}
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+-					sizeof(data->lsp->ls_stateid.data));
++		memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
++					sizeof(data->lsp->ls_stateid.u.data));
+ 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ 		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ 	}
+@@ -4424,6 +4545,34 @@ out:
+ 	return err;
+ }
+ 
++static void nfs4_release_lockowner_release(void *calldata)
++{
++	kfree(calldata);
++}
++
++const struct rpc_call_ops nfs4_release_lockowner_ops = {
++	.rpc_release = nfs4_release_lockowner_release,
++};
++
++void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
++{
++	struct nfs_server *server = lsp->ls_state->owner->so_server;
++	struct nfs_release_lockowner_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
++	};
++
++	if (server->nfs_client->cl_mvops->minor_version != 0)
++		return;
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (!args)
++		return;
++	args->lock_owner.clientid = server->nfs_client->cl_clientid;
++	args->lock_owner.id = lsp->ls_id.id;
++	msg.rpc_argp = args;
++	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
++}
++
+ #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+ 
+ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+@@ -4526,7 +4675,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	nfs4_verifier verifier;
+ 	struct nfs41_exchange_id_args args = {
+ 		.client = clp,
+-		.flags = clp->cl_exchange_flags,
++		.flags = clp->cl_exchange_flags & ~EXCHGID4_FLAG_CONFIRMED_R,
+ 	};
+ 	struct nfs41_exchange_id_res res = {
+ 		.client = clp,
+@@ -4574,6 +4723,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	dprintk("<-- %s status= %d\n", __func__, status);
+ 	return status;
+ }
++EXPORT_SYMBOL(nfs4_proc_exchange_id);
+ 
+ struct nfs4_get_lease_time_data {
+ 	struct nfs4_get_lease_time_args *args;
+@@ -4611,7 +4761,8 @@ static void nfs4_get_lease_time_done(str
+ 			(struct nfs4_get_lease_time_data *)calldata;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
++	if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
++		return;
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_DELAY:
+ 	case -NFS4ERR_GRACE:
+@@ -4805,13 +4956,6 @@ struct nfs4_session *nfs4_alloc_session(
+ 	if (!session)
+ 		return NULL;
+ 
+-	/*
+-	 * The create session reply races with the server back
+-	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+-	 * so that the client back channel can find the
+-	 * nfs_client struct
+-	 */
+-	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	init_completion(&session->complete);
+ 
+ 	tbl = &session->fc_slot_table;
+@@ -4824,6 +4968,8 @@ struct nfs4_session *nfs4_alloc_session(
+ 	spin_lock_init(&tbl->slot_tbl_lock);
+ 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ 
++	session->session_state = 1<<NFS4_SESSION_INITING;
++
+ 	session->clp = clp;
+ 	return session;
+ }
+@@ -5040,6 +5186,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (!nfs4_has_session(clp))
+ 		return 0;
+ 
++	session = clp->cl_session;
++	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
++		return 0;
++
+ 	rsize = server->rsize;
+ 	if (rsize == 0)
+ 		rsize = NFS_MAX_FILE_IO_SIZE;
+@@ -5047,11 +5197,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (wsize == 0)
+ 		wsize = NFS_MAX_FILE_IO_SIZE;
+ 
+-	session = clp->cl_session;
+ 	session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ 	session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ 
+-	ret = nfs4_recover_expired_lease(server);
++	ret = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (!ret)
+ 		ret = nfs4_check_client_ready(clp);
+ 	return ret;
+@@ -5060,69 +5209,70 @@ int nfs4_init_session(struct nfs_server 
+ /*
+  * Renew the cl_session lease.
+  */
+-static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+-{
++struct nfs4_sequence_data {
++	struct nfs_client *clp;
+ 	struct nfs4_sequence_args args;
+ 	struct nfs4_sequence_res res;
+-
+-	struct rpc_message msg = {
+-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+-		.rpc_argp = &args,
+-		.rpc_resp = &res,
+-		.rpc_cred = cred,
+-	};
+-
+-	args.sa_cache_this = 0;
+-
+-	return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
+-				       &res, args.sa_cache_this, 1);
+-}
++};
+ 
+ static void nfs41_sequence_release(void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(calldata);
++}
++
++static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
+ }
+ 
+ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+-	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
++	if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
++		return;
+ 
+ 	if (task->tk_status < 0) {
+ 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+ 		if (atomic_read(&clp->cl_count) == 1)
+ 			goto out;
+ 
+-		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+-								== -EAGAIN) {
+-			nfs_restart_rpc(task, clp);
++		if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
++			rpc_restart_call_prepare(task);
+ 			return;
+ 		}
+ 	}
+ 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
+ out:
+-	kfree(task->tk_msg.rpc_argp);
+-	kfree(task->tk_msg.rpc_resp);
+-
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 	struct nfs4_sequence_args *args;
+ 	struct nfs4_sequence_res *res;
+ 
+-	clp = (struct nfs_client *)data;
+ 	args = task->tk_msg.rpc_argp;
+ 	res = task->tk_msg.rpc_resp;
+ 
+-	if (nfs4_setup_sequence(clp, args, res, 0, task))
++	if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
+@@ -5133,32 +5283,67 @@ static const struct rpc_call_ops nfs41_s
+ 	.rpc_release = nfs41_sequence_release,
+ };
+ 
+-static int nfs41_proc_async_sequence(struct nfs_client *clp,
+-				     struct rpc_cred *cred)
++static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+ {
+-	struct nfs4_sequence_args *args;
+-	struct nfs4_sequence_res *res;
++	struct nfs4_sequence_data *calldata;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+ 		.rpc_cred = cred,
+ 	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = clp->cl_rpcclient,
++		.rpc_message = &msg,
++		.callback_ops = &nfs41_sequence_ops,
++		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
++	};
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+-		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_NOFS);
+-	res = kzalloc(sizeof(*res), GFP_NOFS);
+-	if (!args || !res) {
+-		kfree(args);
+-		kfree(res);
++		return ERR_PTR(-EIO);
++	calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
++	if (calldata == NULL) {
+ 		nfs_put_client(clp);
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 	}
+-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+-	msg.rpc_argp = args;
+-	msg.rpc_resp = res;
++	calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	msg.rpc_argp = &calldata->args;
++	msg.rpc_resp = &calldata->res;
++	calldata->clp = clp;
++	task_setup_data.callback_data = calldata;
+ 
+-	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			      &nfs41_sequence_ops, (void *)clp);
++	return rpc_run_task(&task_setup_data);
++}
++
++static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret = 0;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task))
++		ret = PTR_ERR(task);
++	else
++		rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
++}
++
++static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	ret = rpc_wait_for_completion_task(task);
++	if (!ret)
++		ret = task->tk_status;
++	rpc_put_task(task);
++out:
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
+ }
+ 
+ struct nfs4_reclaim_complete_data {
+@@ -5172,13 +5357,31 @@ static void nfs4_reclaim_complete_prepar
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+ 
+ 	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+-	if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
++	if (nfs41_setup_sequence(calldata->clp->cl_session,
++				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 0, task))
+ 		return;
+ 
+ 	rpc_call_start(task);
+ }
+ 
++static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case 0:
++	case -NFS4ERR_COMPLETE_ALREADY:
++	case -NFS4ERR_WRONG_CRED: /* What to do here? */
++		break;
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
++}
++
+ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
+ {
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+@@ -5186,32 +5389,13 @@ static void nfs4_reclaim_complete_done(s
+ 	struct nfs4_sequence_res *res = &calldata->res.seq_res;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(clp, res, task->tk_status);
+-	switch (task->tk_status) {
+-	case 0:
+-	case -NFS4ERR_COMPLETE_ALREADY:
+-		break;
+-	case -NFS4ERR_BADSESSION:
+-	case -NFS4ERR_DEADSESSION:
+-		/*
+-		 * Handle the session error, but do not retry the operation, as
+-		 * we have no way of telling whether the clientid had to be
+-		 * reset before we got our reply.  If reset, a new wave of
+-		 * reclaim operations will follow, containing their own reclaim
+-		 * complete.  We don't want our retry to get on the way of
+-		 * recovery by incorrectly indicating to the server that we're
+-		 * done reclaiming state since the process had to be restarted.
+-		 */
+-		_nfs4_async_handle_error(task, NULL, clp, NULL);
+-		break;
+-	default:
+-		if (_nfs4_async_handle_error(
+-				task, NULL, clp, NULL) == -EAGAIN) {
+-			rpc_restart_call_prepare(task);
+-			return;
+-		}
+-	}
++	if (!nfs41_sequence_done(task, res))
++		return;
+ 
++	if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
++		rpc_restart_call_prepare(task);
++		return;
++	}
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+@@ -5268,6 +5452,404 @@ out:
+ 	dprintk("<-- %s status=%d\n", __func__, status);
+ 	return status;
+ }
++
++static void
++nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
++				&lgp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	pnfs_get_layout_done(lgp, task->tk_status);
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	lgp->status = task->tk_status;
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_layoutget_release(void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++
++	dprintk("--> %s\n", __func__);
++	pnfs_layout_release(NFS_I(lgp->args.inode)->layout, NULL);
++	if (lgp->res.layout.buf != NULL)
++		free_page((unsigned long) lgp->res.layout.buf);
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_layoutget_call_ops = {
++	.rpc_call_prepare = nfs4_layoutget_prepare,
++	.rpc_call_done = nfs4_layoutget_done,
++	.rpc_release = nfs4_layoutget_release,
++};
++
++/* FIXME: We need to call nfs4_handle_exception
++ * and deal with retries.
++ * Currently we can't since we release lgp and its contents.
++ */
++static int _nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
++		.rpc_argp = &lgp->args,
++		.rpc_resp = &lgp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutget_call_ops,
++		.callback_data = lgp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
++	if (lgp->res.layout.buf == NULL) {
++		nfs4_layoutget_release(lgp);
++		return -ENOMEM;
++	}
++
++	lgp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = lgp->status;
++	if (status != 0)
++		goto out;
++	status = pnfs_layout_process(lgp);
++out:
++	rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server, _nfs4_proc_layoutget(lgp),
++					    &exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *data)
++{
++	struct nfs4_layoutcommit_data *ldata =
++		(struct nfs4_layoutcommit_data *)data;
++	struct nfs_server *server = NFS_SERVER(ldata->args.inode);
++
++	if (nfs4_setup_sequence(server, NULL, &ldata->args.seq_args,
++				&ldata->res.seq_res, 1, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void
++nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutcommit_data *data =
++		(struct nfs4_layoutcommit_data *)calldata;
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	data->status = task->tk_status;
++}
++
++static void nfs4_layoutcommit_release(void *lcdata)
++{
++	struct nfs4_layoutcommit_data *data =
++		(struct nfs4_layoutcommit_data *)lcdata;
++
++	put_rpccred(data->cred);
++	pnfs_cleanup_layoutcommit(lcdata);
++	pnfs_layoutcommit_free(lcdata);
++	/* Matched by get_layout in pnfs_layoutcommit_inode */
++	put_layout(data->args.inode);
++}
++
++static const struct rpc_call_ops nfs4_layoutcommit_ops = {
++	.rpc_call_prepare = nfs4_layoutcommit_prepare,
++	.rpc_call_done = nfs4_layoutcommit_done,
++	.rpc_release = nfs4_layoutcommit_release,
++};
++
++/* Execute a layoutcommit to the server */
++static int
++_nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
++{
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
++		.rpc_argp = &data->args,
++		.rpc_resp = &data->res,
++		.rpc_cred = data->cred,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.task = &data->task,
++		.rpc_client = NFS_CLIENT(data->args.inode),
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutcommit_ops,
++		.callback_data = data,
++		.flags = RPC_TASK_ASYNC,
++	};
++	struct rpc_task *task;
++	int status = 0;
++
++	dprintk("NFS: %4d initiating layoutcommit call. %llu@%llu lbw: %llu "
++		"type: %d issync %d\n",
++		data->task.tk_pid,
++		data->args.range.length,
++		data->args.range.offset,
++		data->args.lastbytewritten,
++		data->args.layout_type, issync);
++
++	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = data->status;
++out:
++	dprintk("%s: status %d\n", __func__, status);
++	rpc_put_task(task);
++	return 0;
++}
++
++int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
++{
++	struct nfs4_exception exception = { };
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++					_nfs4_proc_layoutcommit(data, issync),
++					&exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void
++nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
++				&lrp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_layoutreturn_release(void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
++
++	dprintk("--> %s return_type %d lo %p\n", __func__,
++		lrp->args.return_type, lo);
++
++	if (lrp->args.return_type == RETURN_FILE) {
++		if (!lrp->res.lrs_present)
++			pnfs_set_layout_stateid(lo, &zero_stateid);
++		pnfs_layout_release(lo, &lrp->args.range);
++	}
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
++	.rpc_call_prepare = nfs4_layoutreturn_prepare,
++	.rpc_call_done = nfs4_layoutreturn_done,
++	.rpc_release = nfs4_layoutreturn_release,
++};
++
++int _nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
++{
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
++		.rpc_argp = &lrp->args,
++		.rpc_resp = &lrp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutreturn_call_ops,
++		.callback_data = lrp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++	lrp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = task->tk_status;
++out:
++	dprintk("<-- %s\n", __func__);
++	rpc_put_task(task);
++	return status;
++}
++
++int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
++{
++	struct nfs_server *server = NFS_SERVER(lrp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_proc_layoutreturn(lrp, issync),
++				&exception);
++	} while (exception.retry);
++
++	return err;
++}
++
++/*
++ * Retrieve the list of Data Server devices from the MDS.
++ */
++static int _nfs4_getdevicelist(struct nfs_server *server,
++				    const struct nfs_fh *fh,
++				    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_getdevicelist_args args = {
++		.fh = fh,
++		.layoutclass = server->pnfs_curr_ld->id,
++	};
++	struct nfs4_getdevicelist_res res = {
++		.devlist = devlist,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_proc_getdevicelist(struct nfs_server *server,
++			    const struct nfs_fh *fh,
++			    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_exception exception = { };
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_getdevicelist(server, fh, devlist),
++				&exception);
++	} while (exception.retry);
++
++	dprintk("nfs4_pnfs_getdevlist: err=%d, num_devs=%u\n",
++		err, devlist->num_devs);
++
++	return err;
++}
++
++int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
++{
++	struct nfs4_getdeviceinfo_args args = {
++		.pdev = pdev,
++	};
++	struct nfs4_getdeviceinfo_res res = {
++		.pdev = pdev,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++
++	return status;
++}
++
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+@@ -5325,28 +5907,30 @@ struct nfs4_state_maintenance_ops nfs41_
+ };
+ #endif
+ 
+-/*
+- * Per minor version reboot and network partition recovery ops
+- */
+-
+-struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
+-	&nfs40_reboot_recovery_ops,
+-#if defined(CONFIG_NFS_V4_1)
+-	&nfs41_reboot_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
++	.minor_version = 0,
++	.call_sync = _nfs4_call_sync,
++	.validate_stateid = nfs4_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
++	.state_renewal_ops = &nfs40_state_renewal_ops,
+ };
+ 
+-struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
+-	&nfs40_nograce_recovery_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_nograce_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
++	.minor_version = 1,
++	.call_sync = _nfs4_call_sync_session,
++	.validate_stateid = nfs41_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
++	.state_renewal_ops = &nfs41_state_renewal_ops,
+ };
++#endif
+ 
+-struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
+-	&nfs40_state_renewal_ops,
++const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
++	[0] = &nfs_v4_0_minor_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_state_renewal_ops,
++	[1] = &nfs_v4_1_minor_ops,
+ #endif
+ };
+ 
+@@ -5364,6 +5948,7 @@ const struct nfs_rpc_ops nfs_v4_clientop
+ 	.dentry_ops	= &nfs4_dentry_operations,
+ 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+ 	.file_inode_ops	= &nfs4_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs4_proc_get_root,
+ 	.getattr	= nfs4_proc_getattr,
+ 	.setattr	= nfs4_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-31 20:42:05.526213255 -0400
+@@ -54,17 +54,17 @@
+ void
+ nfs4_renew_state(struct work_struct *work)
+ {
+-	struct nfs4_state_maintenance_ops *ops;
++	const struct nfs4_state_maintenance_ops *ops;
+ 	struct nfs_client *clp =
+ 		container_of(work, struct nfs_client, cl_renewd.work);
+ 	struct rpc_cred *cred;
+ 	long lease;
+ 	unsigned long last, now;
+ 
+-	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
++	ops = clp->cl_mvops->state_renewal_ops;
+ 	dprintk("%s: start\n", __func__);
+ 	/* Are there any active superblocks? */
+-	if (list_empty(&clp->cl_superblocks))
++	if (list_empty(&clp->cl_superblocks) && !is_ds_only_client(clp))
+ 		goto out;
+ 	spin_lock(&clp->cl_lock);
+ 	lease = clp->cl_lease_time;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-31 20:41:19.158078621 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-31 20:42:05.527232994 -0400
+@@ -48,11 +48,13 @@
+ #include <linux/random.h>
+ #include <linux/workqueue.h>
+ #include <linux/bitops.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define OPENOWNER_POOL_SIZE	8
+ 
+@@ -126,6 +128,11 @@ static int nfs41_setup_state_renewal(str
+ 	int status;
+ 	struct nfs_fsinfo fsinfo;
+ 
++	if (is_ds_only_client(clp)) {
++		nfs4_schedule_state_renewal(clp);
++		return 0;
++	}
++
+ 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+ 	if (status == 0) {
+ 		/* Update lease time and schedule renewal */
+@@ -145,7 +152,9 @@ static void nfs4_end_drain_session(struc
+ 	struct nfs4_session *ses = clp->cl_session;
+ 	int max_slots;
+ 
+-	if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
++	if (ses == NULL)
++		return;
++	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		spin_lock(&ses->fc_slot_table.slot_tbl_lock);
+ 		max_slots = ses->fc_slot_table.max_slots;
+ 		while (max_slots--) {
+@@ -167,7 +176,7 @@ static int nfs4_begin_drain_session(stru
+ 	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
++	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ 	if (tbl->highest_used_slotid != -1) {
+ 		INIT_COMPLETION(ses->complete);
+ 		spin_unlock(&tbl->slot_tbl_lock);
+@@ -371,7 +380,6 @@ nfs4_alloc_state_owner(void)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+ 	INIT_LIST_HEAD(&sp->so_states);
+-	INIT_LIST_HEAD(&sp->so_delegations);
+ 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+ 	sp->so_seqid.sequence = &sp->so_sequence;
+ 	spin_lock_init(&sp->so_sequence.lock);
+@@ -384,7 +392,7 @@ static void
+ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+ {
+ 	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+-		struct nfs_client *clp = sp->so_client;
++		struct nfs_client *clp = sp->so_server->nfs_client;
+ 
+ 		spin_lock(&clp->cl_lock);
+ 		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+@@ -406,7 +414,6 @@ struct nfs4_state_owner *nfs4_get_state_
+ 	new = nfs4_alloc_state_owner();
+ 	if (new == NULL)
+ 		return NULL;
+-	new->so_client = clp;
+ 	new->so_server = server;
+ 	new->so_cred = cred;
+ 	spin_lock(&clp->cl_lock);
+@@ -423,7 +430,7 @@ struct nfs4_state_owner *nfs4_get_state_
+ 
+ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+ {
+-	struct nfs_client *clp = sp->so_client;
++	struct nfs_client *clp = sp->so_server->nfs_client;
+ 	struct rpc_cred *cred = sp->so_cred;
+ 
+ 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+@@ -583,8 +590,24 @@ static void __nfs4_close(struct path *pa
+ 	if (!call_close) {
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+-	} else
++	} else {
++		u32 roc_iomode;
++		struct nfs_inode *nfsi = NFS_I(state->inode);
++
++		if (has_layout(nfsi) &&
++		    (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
++			struct pnfs_layout_range range = {
++				.iomode = roc_iomode,
++				.offset = 0,
++				.length = NFS4_MAX_UINT64,
++			};
++
++			pnfs_return_layout(state->inode, &range, NULL,
++					   RETURN_FILE, wait);
++		}
++
+ 		nfs4_do_close(path, state, gfp_mask, wait);
++	}
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+@@ -602,12 +625,21 @@ void nfs4_close_sync(struct path *path, 
+  * that is compatible with current->files
+  */
+ static struct nfs4_lock_state *
+-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *pos;
+ 	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+-		if (pos->ls_owner != fl_owner)
++		if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ 			continue;
++		switch (pos->ls_owner.lo_type) {
++		case NFS4_POSIX_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.posix_owner != fl_owner)
++				continue;
++			break;
++		case NFS4_FLOCK_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.flock_owner != fl_pid)
++				continue;
++		}
+ 		atomic_inc(&pos->ls_count);
+ 		return pos;
+ 	}
+@@ -619,10 +651,10 @@ __nfs4_find_lock_state(struct nfs4_state
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp;
+-	struct nfs_client *clp = state->owner->so_client;
++	struct nfs_client *clp = state->owner->so_server->nfs_client;
+ 
+ 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+@@ -633,7 +665,18 @@ static struct nfs4_lock_state *nfs4_allo
+ 	lsp->ls_seqid.sequence = &lsp->ls_sequence;
+ 	atomic_set(&lsp->ls_count, 1);
+ 	lsp->ls_state = state;
+-	lsp->ls_owner = fl_owner;
++	lsp->ls_owner.lo_type = type;
++	switch (lsp->ls_owner.lo_type) {
++	case NFS4_FLOCK_LOCK_TYPE:
++		lsp->ls_owner.lo_u.flock_owner = fl_pid;
++		break;
++	case NFS4_POSIX_LOCK_TYPE:
++		lsp->ls_owner.lo_u.posix_owner = fl_owner;
++		break;
++	default:
++		kfree(lsp);
++		return NULL;
++	}
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ 	spin_unlock(&clp->cl_lock);
+@@ -643,7 +686,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 
+ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+ {
+-	struct nfs_client *clp = lsp->ls_state->owner->so_client;
++	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ 
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+@@ -657,13 +700,13 @@ static void nfs4_free_lock_state(struct 
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
++static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp, *new = NULL;
+ 	
+ 	for(;;) {
+ 		spin_lock(&state->state_lock);
+-		lsp = __nfs4_find_lock_state(state, owner);
++		lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ 		if (lsp != NULL)
+ 			break;
+ 		if (new != NULL) {
+@@ -674,7 +717,7 @@ static struct nfs4_lock_state *nfs4_get_
+ 			break;
+ 		}
+ 		spin_unlock(&state->state_lock);
+-		new = nfs4_alloc_lock_state(state, owner);
++		new = nfs4_alloc_lock_state(state, owner, pid, type);
+ 		if (new == NULL)
+ 			return NULL;
+ 	}
+@@ -701,6 +744,8 @@ void nfs4_put_lock_state(struct nfs4_loc
+ 	if (list_empty(&state->lock_states))
+ 		clear_bit(LK_STATE_IN_USE, &state->flags);
+ 	spin_unlock(&state->state_lock);
++	if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
++		nfs4_release_lockowner(lsp);
+ 	nfs4_free_lock_state(lsp);
+ }
+ 
+@@ -728,7 +773,12 @@ int nfs4_set_lock_state(struct nfs4_stat
+ 
+ 	if (fl->fl_ops != NULL)
+ 		return 0;
+-	lsp = nfs4_get_lock_state(state, fl->fl_owner);
++	if (fl->fl_flags & FL_POSIX)
++		lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
++	else if (fl->fl_flags & FL_FLOCK)
++		lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
++	else
++		return -EINVAL;
+ 	if (lsp == NULL)
+ 		return -ENOMEM;
+ 	fl->fl_u.nfs4_fl.owner = lsp;
+@@ -740,7 +790,7 @@ int nfs4_set_lock_state(struct nfs4_stat
+  * Byte-range lock aware utility to initialize the stateid of read/write
+  * requests.
+  */
+-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
++void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+ {
+ 	struct nfs4_lock_state *lsp;
+ 	int seq;
+@@ -753,7 +803,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 		return;
+ 
+ 	spin_lock(&state->state_lock);
+-	lsp = __nfs4_find_lock_state(state, fl_owner);
++	lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ 	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ 		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ 	spin_unlock(&state->state_lock);
+@@ -1031,8 +1081,8 @@ restart:
+ 				 * Open state on this file cannot be recovered
+ 				 * All we can do is revert to using the zero stateid.
+ 				 */
+-				memset(state->stateid.data, 0,
+-					sizeof(state->stateid.data));
++				memset(state->stateid.u.data, 0,
++					sizeof(state->stateid.u.data));
+ 				/* Mark the file as being 'closed' */
+ 				state->state = 0;
+ 				break;
+@@ -1041,11 +1091,11 @@ restart:
+ 			case -NFS4ERR_BAD_STATEID:
+ 			case -NFS4ERR_RECLAIM_BAD:
+ 			case -NFS4ERR_RECLAIM_CONFLICT:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 				break;
+ 			case -NFS4ERR_EXPIRED:
+ 			case -NFS4ERR_NO_GRACE:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 			case -NFS4ERR_STALE_CLIENTID:
+ 			case -NFS4ERR_BADSESSION:
+ 			case -NFS4ERR_BADSLOT:
+@@ -1120,8 +1170,7 @@ static void nfs4_state_end_reclaim_reboo
+ 	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ 		return;
+ 
+-	nfs4_reclaim_complete(clp,
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++	nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+ 
+ 	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ 		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+@@ -1211,8 +1260,8 @@ restart:
+ static int nfs4_check_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_maintenance_ops *ops =
+-		nfs4_state_renewal_ops[clp->cl_minorversion];
++	const struct nfs4_state_maintenance_ops *ops =
++		clp->cl_mvops->state_renewal_ops;
+ 	int status = -NFS4ERR_EXPIRED;
+ 
+ 	/* Is the client already known to have an expired lease? */
+@@ -1235,8 +1284,8 @@ out:
+ static int nfs4_reclaim_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_recovery_ops *ops =
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion];
++	const struct nfs4_state_recovery_ops *ops =
++		clp->cl_mvops->reboot_recovery_ops;
+ 	int status = -ENOENT;
+ 
+ 	cred = ops->get_clid_cred(clp);
+@@ -1421,6 +1470,7 @@ static void nfs4_state_manager(struct nf
+ 			}
+ 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ 			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
++			pnfs_destroy_all_layouts(clp);
+ 		}
+ 
+ 		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+@@ -1444,7 +1494,7 @@ static void nfs4_state_manager(struct nf
+ 		/* First recover reboot state... */
+ 		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->reboot_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ 				continue;
+@@ -1458,7 +1508,7 @@ static void nfs4_state_manager(struct nf
+ 		/* Now recover expired state... */
+ 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_nograce_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->nograce_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-31 20:41:19.160150207 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-31 20:42:05.530092192 -0400
+@@ -50,8 +50,10 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+ 
+@@ -89,7 +91,7 @@ static int nfs4_stat_to_errno(int);
+ #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
+ #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
+ 				((3+NFS4_FHSIZE) >> 2))
+-#define nfs4_fattr_bitmap_maxsz 3
++#define nfs4_fattr_bitmap_maxsz 4
+ #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+ #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+ #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+@@ -111,7 +113,11 @@ static int nfs4_stat_to_errno(int);
+ #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+ #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
+ #define encode_fsinfo_maxsz	(encode_getattr_maxsz)
+-#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
++/* The 5 accounts for the PNFS attributes, and assumes that at most three
++ * layout types will be returned.
++ */
++#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + \
++				 nfs4_fattr_bitmap_maxsz + 8 + 5)
+ #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
+ #define decode_renew_maxsz	(op_decode_hdr_maxsz)
+ #define encode_setclientid_maxsz \
+@@ -202,14 +208,17 @@ static int nfs4_stat_to_errno(int);
+ #define encode_link_maxsz	(op_encode_hdr_maxsz + \
+ 				nfs4_name_maxsz)
+ #define decode_link_maxsz	(op_decode_hdr_maxsz + decode_change_info_maxsz)
++#define encode_lockowner_maxsz	(7)
+ #define encode_lock_maxsz	(op_encode_hdr_maxsz + \
+ 				 7 + \
+-				 1 + encode_stateid_maxsz + 8)
++				 1 + encode_stateid_maxsz + 1 + \
++				 encode_lockowner_maxsz)
+ #define decode_lock_denied_maxsz \
+ 				(8 + decode_lockowner_maxsz)
+ #define decode_lock_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+-#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 12)
++#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 5 + \
++				encode_lockowner_maxsz)
+ #define decode_lockt_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+ #define encode_locku_maxsz	(op_encode_hdr_maxsz + 3 + \
+@@ -217,6 +226,11 @@ static int nfs4_stat_to_errno(int);
+ 				 4)
+ #define decode_locku_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_stateid_maxsz)
++#define encode_release_lockowner_maxsz \
++				(op_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define decode_release_lockowner_maxsz \
++				(op_decode_hdr_maxsz)
+ #define encode_access_maxsz	(op_encode_hdr_maxsz + 1)
+ #define decode_access_maxsz	(op_decode_hdr_maxsz + 2)
+ #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
+@@ -302,6 +316,35 @@ static int nfs4_stat_to_errno(int);
+ 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+ #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
+ #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
++#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
++				encode_verifier_maxsz)
++#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 +  \
++				decode_verifier_maxsz +             \
++				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM *  \
++				NFS4_PNFS_DEVICEID4_SIZE))
++#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
++				XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE))
++#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
++				4 /*layout type */ + \
++				4 /* opaque devaddr4 length */ +\
++				4 /* notification bitmap length */ + \
++				4 /* notification bitmap */)
++#define encode_layoutget_maxsz	(op_encode_hdr_maxsz + 10 + \
++				encode_stateid_maxsz)
++#define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
++				decode_stateid_maxsz + \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
++#define encode_layoutcommit_maxsz (18 +                           \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
++				op_encode_hdr_maxsz +          \
++				encode_stateid_maxsz)
++#define decode_layoutcommit_maxsz (3 + op_decode_hdr_maxsz)
++#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
++				encode_stateid_maxsz + \
++				1 /* FIXME: opaque lrf_body always empty at
++				   *the moment */)
++#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
++				1 + decode_stateid_maxsz)
+ #else /* CONFIG_NFS_V4_1 */
+ #define encode_sequence_maxsz	0
+ #define decode_sequence_maxsz	0
+@@ -471,6 +514,12 @@ static int nfs4_stat_to_errno(int);
+ 				decode_sequence_maxsz + \
+ 				decode_putfh_maxsz + \
+ 				decode_locku_maxsz)
++#define NFS4_enc_release_lockowner_sz \
++				(compound_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define NFS4_dec_release_lockowner_sz \
++				(compound_decode_hdr_maxsz + \
++				 decode_lockowner_maxsz)
+ #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+ 				encode_sequence_maxsz + \
+ 				encode_putfh_maxsz + \
+@@ -685,6 +734,60 @@ static int nfs4_stat_to_errno(int);
+ #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
+ 					 decode_sequence_maxsz + \
+ 					 decode_reclaim_complete_maxsz)
++#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_getdevicelist_maxsz)
++#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_getdevicelist_maxsz)
++#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
++				encode_sequence_maxsz +\
++				encode_getdeviceinfo_maxsz)
++#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz +    \
++				decode_sequence_maxsz + \
++				decode_getdeviceinfo_maxsz)
++#define NFS4_enc_layoutget_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz +        \
++				encode_layoutget_maxsz)
++#define NFS4_dec_layoutget_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz +        \
++				decode_layoutget_maxsz)
++#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_layoutcommit_maxsz + \
++				encode_getattr_maxsz)
++#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutcommit_maxsz + \
++				decode_getattr_maxsz)
++#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_layoutreturn_maxsz)
++#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutreturn_maxsz)
++#define NFS4_enc_dswrite_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_write_maxsz)
++#define NFS4_dec_dswrite_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_write_maxsz)
++#define NFS4_enc_dscommit_sz	(compound_encode_hdr_maxsz + \
++				encode_putfh_maxsz + \
++				encode_commit_maxsz)
++#define NFS4_dec_dscommit_sz	(compound_decode_hdr_maxsz + \
++				decode_putfh_maxsz + \
++				decode_commit_maxsz)
+ 
+ const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ 				      compound_encode_hdr_maxsz +
+@@ -915,7 +1018,7 @@ static void encode_close(struct xdr_stre
+ 	p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_CLOSE);
+ 	*p++ = cpu_to_be32(arg->seqid->sequence->counter);
+-	xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_close_maxsz;
+ }
+@@ -989,6 +1092,35 @@ static void encode_getattr_two(struct xd
+ 	hdr->replen += decode_getattr_maxsz;
+ }
+ 
++static void
++encode_getattr_three(struct xdr_stream *xdr,
++		     uint32_t bm0, uint32_t bm1, uint32_t bm2,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_GETATTR);
++	if (bm2) {
++		p = reserve_space(xdr, 16);
++		*p++ = cpu_to_be32(3);
++		*p++ = cpu_to_be32(bm0);
++		*p++ = cpu_to_be32(bm1);
++		*p = cpu_to_be32(bm2);
++	} else if (bm1) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(2);
++		*p++ = cpu_to_be32(bm0);
++		*p = cpu_to_be32(bm1);
++	} else {
++		p = reserve_space(xdr, 8);
++		*p++ = cpu_to_be32(1);
++		*p = cpu_to_be32(bm0);
++	}
++	hdr->nops++;
++	hdr->replen += decode_getattr_maxsz;
++}
++
+ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+ 	encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
+@@ -997,8 +1129,11 @@ static void encode_getfattr(struct xdr_s
+ 
+ static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+-	encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+-			   bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
++	encode_getattr_three(xdr,
++			     bitmask[0] & nfs4_fsinfo_bitmap[0],
++			     bitmask[1] & nfs4_fsinfo_bitmap[1],
++			     bitmask[2] & nfs4_fsinfo_bitmap[2],
++			     hdr);
+ }
+ 
+ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+@@ -1042,6 +1177,17 @@ static inline uint64_t nfs4_lock_length(
+ 	return fl->fl_end - fl->fl_start + 1;
+ }
+ 
++static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 28);
++	p = xdr_encode_hyper(p, lowner->clientid);
++	*p++ = cpu_to_be32(16);
++	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
++	xdr_encode_hyper(p, lowner->id);
++}
++
+ /*
+  * opcode,type,reclaim,offset,length,new_lock_owner = 32
+  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+@@ -1058,18 +1204,16 @@ static void encode_lock(struct xdr_strea
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	*p = cpu_to_be32(args->new_lock_owner);
+ 	if (args->new_lock_owner){
+-		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
++		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 		*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+-		p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
++					    NFS4_STATEID_SIZE);
+ 		*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+-		p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-		*p++ = cpu_to_be32(16);
+-		p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-		xdr_encode_hyper(p, args->lock_owner.id);
++		encode_lockowner(xdr, &args->lock_owner);
+ 	}
+ 	else {
+ 		p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+-		p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
+ 		*p = cpu_to_be32(args->lock_seqid->sequence->counter);
+ 	}
+ 	hdr->nops++;
+@@ -1080,15 +1224,12 @@ static void encode_lockt(struct xdr_stre
+ {
+ 	__be32 *p;
+ 
+-	p = reserve_space(xdr, 52);
++	p = reserve_space(xdr, 24);
+ 	*p++ = cpu_to_be32(OP_LOCKT);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+-	p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-	*p++ = cpu_to_be32(16);
+-	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-	xdr_encode_hyper(p, args->lock_owner.id);
++	encode_lockowner(xdr, &args->lock_owner);
+ 	hdr->nops++;
+ 	hdr->replen += decode_lockt_maxsz;
+ }
+@@ -1101,13 +1242,25 @@ static void encode_locku(struct xdr_stre
+ 	*p++ = cpu_to_be32(OP_LOCKU);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	*p++ = cpu_to_be32(args->seqid->sequence->counter);
+-	p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
++				    NFS4_STATEID_SIZE);
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	hdr->nops++;
+ 	hdr->replen += decode_locku_maxsz;
+ }
+ 
++static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
++	encode_lockowner(xdr, lowner);
++	hdr->nops++;
++	hdr->replen += decode_release_lockowner_maxsz;
++}
++
+ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
+ {
+ 	int len = name->len;
+@@ -1172,7 +1325,7 @@ static inline void encode_createmode(str
+ 		break;
+ 	default:
+ 		clp = arg->server->nfs_client;
+-		if (clp->cl_minorversion > 0) {
++		if (clp->cl_mvops->minor_version > 0) {
+ 			if (nfs4_has_persistent_session(clp)) {
+ 				*p = cpu_to_be32(NFS4_CREATE_GUARDED);
+ 				encode_attrs(xdr, arg->u.attrs, arg->server);
+@@ -1251,7 +1404,7 @@ static inline void encode_claim_delegate
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	encode_string(xdr, name->len, name->name);
+ }
+ 
+@@ -1282,7 +1435,7 @@ static void encode_open_confirm(struct x
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	hdr->nops++;
+ 	hdr->replen += decode_open_confirm_maxsz;
+@@ -1294,7 +1447,7 @@ static void encode_open_downgrade(struct
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	encode_share_access(xdr, arg->fmode);
+ 	hdr->nops++;
+@@ -1324,17 +1477,17 @@ static void encode_putrootfh(struct xdr_
+ 	hdr->replen += decode_putrootfh_maxsz;
+ }
+ 
+-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
++static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
+ {
+ 	nfs4_stateid stateid;
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, NFS4_STATEID_SIZE);
+ 	if (ctx->state != NULL) {
+-		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+-		xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
++		nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
++		xdr_encode_opaque_fixed(p, stateid.u.data, NFS4_STATEID_SIZE);
+ 	} else
+-		xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++		xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+@@ -1344,7 +1497,7 @@ static void encode_read(struct xdr_strea
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_READ);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 12);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1448,7 +1601,7 @@ encode_setacl(struct xdr_stream *xdr, st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ 	p = reserve_space(xdr, 2*4);
+ 	*p++ = cpu_to_be32(1);
+ 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
+@@ -1479,7 +1632,7 @@ static void encode_setattr(struct xdr_st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setattr_maxsz;
+ 	encode_attrs(xdr, arg->iap, server);
+@@ -1523,7 +1676,7 @@ static void encode_write(struct xdr_stre
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_WRITE);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 16);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1542,7 +1695,7 @@ static void encode_delegreturn(struct xd
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 
+ 	*p++ = cpu_to_be32(OP_DELEGRETURN);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_delegreturn_maxsz;
+ }
+@@ -1696,6 +1849,162 @@ static void encode_sequence(struct xdr_s
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#ifdef CONFIG_NFS_V4_1
++static void
++encode_getdevicelist(struct xdr_stream *xdr,
++		     const struct nfs4_getdevicelist_args *args,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++	nfs4_verifier dummy = {
++		.data = "dummmmmy",
++	};
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_GETDEVICELIST);
++	*p++ = cpu_to_be32(args->layoutclass);
++	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
++	xdr_encode_hyper(p, 0ULL);                          /* cookie */
++	encode_nfs4_verifier(xdr, &dummy);
++	hdr->nops++;
++}
++
++static void
++encode_getdeviceinfo(struct xdr_stream *xdr,
++		     const struct nfs4_getdeviceinfo_args *args,
++		     struct compound_hdr *hdr)
++{
++	int has_bitmap = (args->pdev->dev_notify_types != 0);
++	int len = 16 + NFS4_PNFS_DEVICEID4_SIZE + (has_bitmap * 4);
++	__be32 *p;
++
++	p = reserve_space(xdr, len);
++	*p++ = cpu_to_be32(OP_GETDEVICEINFO);
++	p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
++				    NFS4_PNFS_DEVICEID4_SIZE);
++	*p++ = cpu_to_be32(args->pdev->layout_type);
++	*p++ = cpu_to_be32(args->pdev->pglen + len);	/* gdia_maxcount */
++	*p++ = cpu_to_be32(has_bitmap);			/* bitmap length [01] */
++	if (has_bitmap)
++		*p = cpu_to_be32(args->pdev->dev_notify_types);
++	hdr->nops++;
++}
++
++static void
++encode_layoutget(struct xdr_stream *xdr,
++		      const struct nfs4_layoutget_args *args,
++		      struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTGET);
++	*p++ = cpu_to_be32(0);     /* Signal layout available */
++	*p++ = cpu_to_be32(args->type);
++	*p++ = cpu_to_be32(args->range.iomode);
++	p = xdr_encode_hyper(p, args->range.offset);
++	p = xdr_encode_hyper(p, args->range.length);
++	p = xdr_encode_hyper(p, args->minlength);
++	pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++	p = xdr_encode_opaque_fixed(p, &stateid.u.data, NFS4_STATEID_SIZE);
++	*p = cpu_to_be32(args->maxcount);
++
++	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
++		__func__,
++		args->type,
++		args->range.iomode,
++		(unsigned long)args->range.offset,
++		(unsigned long)args->range.length,
++		args->maxcount);
++	hdr->nops++;
++	hdr->replen += decode_layoutget_maxsz;
++}
++
++static int
++encode_layoutcommit(struct xdr_stream *xdr,
++		    const struct nfs4_layoutcommit_args *args,
++		    struct compound_hdr *hdr)
++{
++	struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++	__be32 *p;
++
++	dprintk("%s: %llu@%llu lbw: %llu type: %d\n", __func__,
++		args->range.length, args->range.offset, args->lastbytewritten,
++		args->layout_type);
++
++	p = reserve_space(xdr, 40 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
++	p = xdr_encode_hyper(p, args->range.offset);
++	p = xdr_encode_hyper(p, args->range.length);
++	*p++ = cpu_to_be32(0);     /* reclaim */
++	p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(1);     /* newoffset = TRUE */
++	p = xdr_encode_hyper(p, args->lastbytewritten);
++	*p = cpu_to_be32(args->time_modify_changed != 0);
++	if (args->time_modify_changed) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(0);
++		*p++ = cpu_to_be32(args->time_modify.tv_sec);
++		*p = cpu_to_be32(args->time_modify.tv_nsec);
++	}
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(args->layout_type);
++
++	if (ld_io_ops->encode_layoutcommit) {
++		ld_io_ops->encode_layoutcommit(NFS_I(args->inode)->layout,
++					       xdr, args);
++	} else {
++		p = reserve_space(xdr, 4);
++		xdr_encode_opaque(p, NULL, 0);
++	}
++
++	hdr->nops++;
++	hdr->replen += decode_layoutcommit_maxsz;
++	return 0;
++}
++
++static void
++encode_layoutreturn(struct xdr_stream *xdr,
++		    const struct nfs4_layoutreturn_args *args,
++		    struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
++	*p++ = cpu_to_be32(args->reclaim);
++	*p++ = cpu_to_be32(args->layout_type);
++	*p++ = cpu_to_be32(args->range.iomode);
++	*p = cpu_to_be32(args->return_type);
++	if (args->return_type == RETURN_FILE) {
++		struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++
++		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
++		p = xdr_encode_hyper(p, args->range.offset);
++		p = xdr_encode_hyper(p, args->range.length);
++		pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++		p = xdr_encode_opaque_fixed(p, &stateid.u.data,
++					    NFS4_STATEID_SIZE);
++		dprintk("%s: call %pF\n", __func__,
++		ld_io_ops->encode_layoutreturn);
++		if (ld_io_ops->encode_layoutreturn) {
++			ld_io_ops->encode_layoutreturn(
++				NFS_I(args->inode)->layout, xdr, args);
++		} else {
++			p = reserve_space(xdr, 4);
++			*p = cpu_to_be32(0);
++		}
++	}
++	hdr->nops++;
++	hdr->replen += decode_layoutreturn_maxsz;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" ENCODE ROUTINES.
+  */
+@@ -1704,7 +2013,7 @@ static u32 nfs4_xdr_minorversion(const s
+ {
+ #if defined(CONFIG_NFS_V4_1)
+ 	if (args->sa_session)
+-		return args->sa_session->clp->cl_minorversion;
++		return args->sa_session->clp->cl_mvops->minor_version;
+ #endif /* CONFIG_NFS_V4_1 */
+ 	return 0;
+ }
+@@ -2048,6 +2357,20 @@ static int nfs4_xdr_enc_locku(struct rpc
+ 	return 0;
+ }
+ 
++static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = 0,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
+ /*
+  * Encode a READLINK request
+  */
+@@ -2330,7 +2653,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 	struct compound_hdr hdr = {
+ 		.nops	= 0,
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2395,7 +2718,7 @@ static int nfs4_xdr_enc_exchange_id(stru
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2413,7 +2736,7 @@ static int nfs4_xdr_enc_create_session(s
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2431,7 +2754,7 @@ static int nfs4_xdr_enc_destroy_session(
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = session->clp->cl_minorversion,
++		.minorversion = session->clp->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2469,7 +2792,7 @@ static int nfs4_xdr_enc_get_lease_time(s
+ 	struct compound_hdr hdr = {
+ 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2499,6 +2822,159 @@ static int nfs4_xdr_enc_reclaim_complete
+ 	return 0;
+ }
+ 
++/*
++ * Encode GETDEVICELIST request
++ */
++static int
++nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, uint32_t *p,
++			   struct nfs4_getdevicelist_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_getdevicelist(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode GETDEVICEINFO request
++ */
++static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
++				      struct nfs4_getdeviceinfo_args *args)
++{
++	struct xdr_stream xdr;
++	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++	int replen;
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_getdeviceinfo(&xdr, args, &hdr);
++
++	/* set up reply kvec. Subtract notification bitmap max size (8)
++	 * so that notification bitmap is put in xdr_buf tail */
++	replen = (RPC_REPHDRSIZE + auth->au_rslack +
++		  NFS4_dec_getdeviceinfo_sz - 8) << 2;
++	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pdev->pages,
++			 args->pdev->pgbase, args->pdev->pglen);
++	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
++		__func__, replen, args->pdev->pages,
++		args->pdev->pgbase, args->pdev->pglen);
++
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTGET request
++ */
++static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
++				  struct nfs4_layoutget_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutget(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTCOMMIT request
++ */
++static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_layoutcommit_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_layoutcommit(&xdr, args, &hdr);
++	encode_getfattr(&xdr, args->bitmask, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode LAYOUTRETURN request
++ */
++static int nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_layoutreturn_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutreturn(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server WRITE request
++ */
++static int nfs4_xdr_enc_dswrite(struct rpc_rqst *req, uint32_t *p,
++				struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_write(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server COMMIT request
++ */
++static int nfs4_xdr_enc_dscommit(struct rpc_rqst *req, uint32_t *p,
++				 struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_commit(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+@@ -2599,14 +3075,17 @@ static int decode_attr_bitmap(struct xdr
+ 		goto out_overflow;
+ 	bmlen = be32_to_cpup(p);
+ 
+-	bitmap[0] = bitmap[1] = 0;
++	bitmap[0] = bitmap[1] = bitmap[2] = 0;
+ 	p = xdr_inline_decode(xdr, (bmlen << 2));
+ 	if (unlikely(!p))
+ 		goto out_overflow;
+ 	if (bmlen > 0) {
+ 		bitmap[0] = be32_to_cpup(p++);
+-		if (bmlen > 1)
+-			bitmap[1] = be32_to_cpup(p);
++		if (bmlen > 1) {
++			bitmap[1] = be32_to_cpup(p++);
++			if (bmlen > 2)
++				bitmap[2] = be32_to_cpup(p);
++		}
+ 	}
+ 	return 0;
+ out_overflow:
+@@ -2635,8 +3114,9 @@ static int decode_attr_supported(struct 
+ 		decode_attr_bitmap(xdr, bitmask);
+ 		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+ 	} else
+-		bitmask[0] = bitmask[1] = 0;
+-	dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
++		bitmask[0] = bitmask[1] = bitmask[2] = 0;
++	dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__,
++		bitmask[0], bitmask[1], bitmask[2]);
+ 	return 0;
+ }
+ 
+@@ -3565,7 +4045,7 @@ static int decode_opaque_fixed(struct xd
+ 
+ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+ {
+-	return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
++	return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+@@ -3621,7 +4101,7 @@ out_overflow:
+ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3647,7 +4127,7 @@ xdr_error:
+ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3679,7 +4159,7 @@ xdr_error:
+ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3705,7 +4185,7 @@ static int decode_getfattr(struct xdr_st
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0},
++		 bitmap[3] = {0},
+ 		 type;
+ 	int status;
+ 	umode_t fmode = 0;
+@@ -3824,24 +4304,101 @@ xdr_error:
+ 	return status;
+ }
+ 
+-
+-static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * Decode potentially multiple layout types. Currently we only support
++ * one layout driver per file system.
++ */
++static int decode_pnfs_list(struct xdr_stream *xdr, uint32_t *layoutclass)
+ {
+-	__be32 *savep;
+-	uint32_t attrlen, bitmap[2];
+-	int status;
++	uint32_t *p;
++	int num;
+ 
+-	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+-		goto xdr_error;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	num = be32_to_cpup(p);
+ 
+-	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++	/* pNFS is not supported by the underlying file system */
++	if (num == 0) {
++		*layoutclass = 0;
++		return 0;
++	}
+ 
+-	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+-		goto xdr_error;
++	/* TODO: We will eventually support multiple layout drivers ? */
++	if (num > 1)
++		printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
++			"per filesystem not supported\n", __func__);
++
++	/* Decode and set first layout type */
++	p = xdr_inline_decode(xdr, num * 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	*layoutclass = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++/*
++ * The type of file system exported
++ */
++static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
++				uint32_t *layoutclass)
++{
++	int status = 0;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
++	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
++		return -EIO;
++	if (likely(bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES)) {
++		status = decode_pnfs_list(xdr, layoutclass);
++		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
++	}
++	return status;
++}
++
++/*
++ * The prefered block size for layout directed io
++ */
++static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
++				      uint32_t *res)
++{
++	__be32 *p;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
++	*res = 0;
++	if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p)) {
++			print_overflow_msg(__func__, xdr);
++			return -EIO;
++		}
++		*res = be32_to_cpup(p);
++		bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
++	}
++	return 0;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
++static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++{
++	__be32 *savep;
++	uint32_t attrlen, bitmap[3];
++	int status;
++
++	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
++		goto xdr_error;
++
++	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++
++	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
++		goto xdr_error;
+ 	if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+ 		goto xdr_error;
+ 	if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+@@ -3850,6 +4407,14 @@ static int decode_fsinfo(struct xdr_stre
+ 	if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+ 		goto xdr_error;
+ 	fsinfo->wtpref = fsinfo->wtmax;
++#if defined(CONFIG_NFS_V4_1)
++	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
++	if (status)
++		goto xdr_error;
++	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
++	if (status)
++		goto xdr_error;
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ 	status = verify_attr_len(xdr, savep, attrlen);
+ xdr_error:
+@@ -3973,6 +4538,11 @@ static int decode_locku(struct xdr_strea
+ 	return status;
+ }
+ 
++static int decode_release_lockowner(struct xdr_stream *xdr)
++{
++	return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
++}
++
+ static int decode_lookup(struct xdr_stream *xdr)
+ {
+ 	return decode_op_hdr(xdr, OP_LOOKUP);
+@@ -4333,7 +4903,7 @@ static int decode_getacl(struct xdr_stre
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0};
++		 bitmap[3] = {0};
+ 	struct kvec *iov = req->rq_rcv_buf.head;
+ 	int status;
+ 
+@@ -4682,6 +5252,226 @@ out_overflow:
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * TODO: Need to handle case when EOF != true;
++ */
++static int decode_getdevicelist(struct xdr_stream *xdr,
++				struct pnfs_devicelist *res)
++{
++	__be32 *p;
++	int status, i;
++	struct nfs_writeverf verftemp;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 8 + 8 + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++
++	/* TODO: Skip cookie for now */
++	p += 2;
++
++	/* Read verifier */
++	p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
++
++	res->num_devs = be32_to_cpup(p);
++
++	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
++
++	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM)
++		return -NFS4ERR_REP_TOO_BIG;
++
++	p = xdr_inline_decode(xdr,
++			      res->num_devs * NFS4_PNFS_DEVICEID4_SIZE + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	for (i = 0; i < res->num_devs; i++)
++		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
++					    NFS4_PNFS_DEVICEID4_SIZE);
++	res->eof = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_getdeviceinfo(struct xdr_stream *xdr,
++				struct pnfs_device *pdev)
++{
++	__be32 *p;
++	uint32_t len, type;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
++	if (status) {
++		if (status == -ETOOSMALL) {
++			p = xdr_inline_decode(xdr, 4);
++			if (unlikely(!p))
++				goto out_overflow;
++			pdev->mincount = be32_to_cpup(p);
++			dprintk("%s: Min count too small. mincnt = %u\n",
++				__func__, pdev->mincount);
++		}
++		return status;
++	}
++
++	p = xdr_inline_decode(xdr, 8);
++	if (unlikely(!p))
++		goto out_overflow;
++	type = be32_to_cpup(p++);
++	if (type != pdev->layout_type) {
++		dprintk("%s: layout mismatch req: %u pdev: %u\n",
++			__func__, pdev->layout_type, type);
++		return -EINVAL;
++	}
++	/*
++	 * Get the length of the opaque device_addr4. xdr_read_pages places
++	 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
++	 * and places the remaining xdr data in xdr_buf->tail
++	 */
++	pdev->mincount = be32_to_cpup(p);
++	xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
++
++	/* At most one bitmap word */
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	len = be32_to_cpup(p);
++	if (len) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p))
++			goto out_overflow;
++		pdev->dev_notify_types = be32_to_cpup(p);
++	} else
++		pdev->dev_notify_types = 0;
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
++			    struct nfs4_layoutget_res *res)
++{
++	__be32 *p;
++	int status;
++	u32 layout_count, dummy;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTGET);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->return_on_close = be32_to_cpup(p++);
++	p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
++	layout_count = be32_to_cpup(p);
++	if (!layout_count) {
++		dprintk("%s: server responded with empty layout array\n",
++			__func__);
++		return -EINVAL;
++	}
++
++	p = xdr_inline_decode(xdr, 24);
++	if (unlikely(!p))
++		goto out_overflow;
++	p = xdr_decode_hyper(p, &res->range.offset);
++	p = xdr_decode_hyper(p, &res->range.length);
++	res->range.iomode = be32_to_cpup(p++);
++	res->type = be32_to_cpup(p++);
++
++	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
++	if (unlikely(status))
++		return status;
++
++	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
++		__func__,
++		(unsigned long)res->range.offset,
++		(unsigned long)res->range.length,
++		res->range.iomode,
++		res->type,
++		res->layout.len);
++
++	/* presuambly, nfs4_proc_layoutget allocated a single page */
++	if (res->layout.len > PAGE_SIZE)
++		return -ENOMEM;
++	memcpy(res->layout.buf, p, res->layout.len);
++
++	/* FIXME: the whole layout array should be passed up to the pnfs
++	 * client */
++	if (layout_count > 1) {
++		dprintk("%s: server responded with %d layouts, dropping tail\n",
++			__func__, layout_count);
++
++		while (--layout_count) {
++			p = xdr_inline_decode(xdr, 24);
++			if (unlikely(!p))
++				goto out_overflow;
++			status = decode_opaque_inline(xdr, &dummy, (char **)&p);
++			if (unlikely(status))
++				return status;
++		}
++	}
++
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutreturn(struct xdr_stream *xdr,
++			       struct nfs4_layoutreturn_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->lrs_present = be32_to_cpup(p);
++	if (res->lrs_present)
++		status = decode_stateid(xdr, &res->stateid);
++	return status;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutcommit(struct xdr_stream *xdr,
++				    struct rpc_rqst *req,
++				    struct nfs4_layoutcommit_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->sizechanged = be32_to_cpup(p);
++
++	if (res->sizechanged) {
++		p = xdr_inline_decode(xdr, 8);
++		if (unlikely(!p))
++			goto out_overflow;
++		xdr_decode_hyper(p, &res->newsize);
++	}
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" DECODE ROUTINES.
+  */
+@@ -5259,6 +6049,19 @@ out:
+ 	return status;
+ }
+ 
++static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (!status)
++		status = decode_release_lockowner(&xdr);
++	return status;
++}
++
+ /*
+  * Decode READLINK response
+  */
+@@ -5696,6 +6499,186 @@ static int nfs4_xdr_dec_reclaim_complete
+ 		status = decode_reclaim_complete(&xdr, (void *)NULL);
+ 	return status;
+ }
++
++/*
++ * Decode GETDEVICELIST response
++ */
++static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_getdevicelist_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	dprintk("encoding getdevicelist!\n");
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status != 0)
++		goto out;
++	status = decode_getdevicelist(&xdr, res->devlist);
++out:
++	return status;
++}
++
++/*
++ * Decode GETDEVINFO response
++ */
++static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_getdeviceinfo_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_getdeviceinfo(&xdr, res->pdev);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTGET response
++ */
++static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
++				  struct nfs4_layoutget_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutget(&xdr, rqstp, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTRETURN response
++ */
++static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_layoutreturn_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutreturn(&xdr, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTCOMMIT response
++ */
++static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_layoutcommit_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutcommit(&xdr, rqstp, res);
++	if (status)
++		goto out;
++	decode_getfattr(&xdr, res->fattr, res->server,
++			!RPC_IS_ASYNC(rqstp->rq_task));
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server WRITE response
++ */
++static int nfs4_xdr_dec_dswrite(struct rpc_rqst *rqstp, uint32_t *p,
++				struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_write(&xdr, res);
++	if (!status)
++		return res->count;
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server COMMIT response
++ */
++static int nfs4_xdr_dec_dscommit(struct rpc_rqst *rqstp, uint32_t *p,
++				 struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_commit(&xdr, res);
++out:
++	return status;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+@@ -5866,6 +6849,7 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(GETACL,		enc_getacl,	dec_getacl),
+   PROC(SETACL,		enc_setacl,	dec_setacl),
+   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
++  PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ #if defined(CONFIG_NFS_V4_1)
+   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+   PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
+@@ -5873,6 +6857,13 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
+   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+   PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
++  PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
++  PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
++  PROC(LAYOUTGET,  enc_layoutget,     dec_layoutget),
++  PROC(LAYOUTCOMMIT, enc_layoutcommit,  dec_layoutcommit),
++  PROC(LAYOUTRETURN, enc_layoutreturn,  dec_layoutreturn),
++  PROC(PNFS_WRITE, enc_dswrite,  dec_dswrite),
++  PROC(PNFS_COMMIT, enc_dscommit,  dec_dscommit),
+ #endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-31 20:42:05.532213157 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-31 20:42:05.532213157 -0400
+@@ -0,0 +1,11 @@
++#
++# Makefile for the pNFS Objects Layout Driver kernel module
++#
++objlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o objio_osd.o
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
++
++#
++# Panasas pNFS Layout Driver kernel module
++#
++panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
++obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-31 20:42:05.533243491 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-31 20:42:05.534105468 -0400
+@@ -0,0 +1,1087 @@
++/*
++ *  objio_osd.c
++ *
++ *  pNFS Objects layout implementation over open-osd initiator library
++ *
++ *  Copyright (C) 2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bharrosh@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <scsi/scsi_device.h>
++#include <scsi/osd_attributes.h>
++#include <scsi/osd_initiator.h>
++#include <scsi/osd_sec.h>
++#include <scsi/osd_sense.h>
++
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++#define _LLU(x) ((unsigned long long)x)
++
++enum { BIO_MAX_PAGES_KMALLOC =
++		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
++};
++
++/* A per mountpoint struct currently for device cache */
++struct objio_mount_type {
++	struct list_head dev_list;
++	spinlock_t dev_list_lock;
++};
++
++struct _dev_ent {
++	struct list_head list;
++	struct pnfs_deviceid d_id;
++	struct osd_dev *od;
++};
++
++static void _dev_list_remove_all(struct objio_mount_type *omt)
++{
++	spin_lock(&omt->dev_list_lock);
++
++	while (!list_empty(&omt->dev_list)) {
++		struct _dev_ent *de = list_entry(omt->dev_list.next,
++				 struct _dev_ent, list);
++
++		list_del_init(&de->list);
++		osduld_put_device(de->od);
++		kfree(de);
++	}
++
++	spin_unlock(&omt->dev_list_lock);
++}
++
++static struct osd_dev *___dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct list_head *le;
++
++	list_for_each(le, &omt->dev_list) {
++		struct _dev_ent *de = list_entry(le, struct _dev_ent, list);
++
++		if (0 == memcmp(&de->d_id, d_id, sizeof(*d_id)))
++			return de->od;
++	}
++
++	return NULL;
++}
++
++static struct osd_dev *_dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct osd_dev *od;
++
++	spin_lock(&omt->dev_list_lock);
++	od = ___dev_list_find(omt, d_id);
++	spin_unlock(&omt->dev_list_lock);
++	return od;
++}
++
++static int _dev_list_add(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id, struct osd_dev *od)
++{
++	struct _dev_ent *de = kzalloc(sizeof(*de), GFP_KERNEL);
++
++	if (!de)
++		return -ENOMEM;
++
++	spin_lock(&omt->dev_list_lock);
++
++	if (___dev_list_find(omt, d_id)) {
++		kfree(de);
++		goto out;
++	}
++
++	de->d_id = *d_id;
++	de->od = od;
++	list_add(&de->list, &omt->dev_list);
++
++out:
++	spin_unlock(&omt->dev_list_lock);
++	return 0;
++}
++
++struct objio_segment {
++	struct pnfs_osd_layout *layout;
++
++	unsigned mirrors_p1;
++	unsigned stripe_unit;
++	unsigned group_width;	/* Data stripe_units without integrity comps */
++	u64 group_depth;
++	unsigned group_count;
++
++	unsigned num_comps;
++	/* variable length */
++	struct osd_dev	*ods[1];
++};
++
++struct objio_state;
++typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
++
++struct objio_state {
++	/* Generic layer */
++	struct objlayout_io_state ol_state;
++
++	struct objio_segment *objio_seg;
++
++	struct kref kref;
++	objio_done_fn done;
++	void *private;
++
++	unsigned long length;
++	unsigned numdevs; /* Actually used devs in this IO */
++	/* A per-device variable array of size numdevs */
++	struct _objio_per_comp {
++		struct bio *bio;
++		struct osd_request *or;
++		unsigned long length;
++		u64 offset;
++		unsigned dev;
++	} per_dev[];
++};
++
++/* Send and wait for a get_device_info of devices in the layout,
++   then look them up with the osd_initiator library */
++static struct osd_dev *_device_lookup(struct pnfs_layout_hdr *pnfslay,
++			       struct objio_segment *objio_seg, unsigned comp)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	struct pnfs_osd_deviceaddr *deviceaddr;
++	struct pnfs_deviceid *d_id;
++	struct osd_dev *od;
++	struct osd_dev_info odi;
++	struct objio_mount_type *omt = PNFS_NFS_SERVER(pnfslay)->pnfs_ld_data;
++	int err;
++
++	d_id = &layout->olo_comps[comp].oc_object_id.oid_device_id;
++
++	od = _dev_list_find(omt, d_id);
++	if (od)
++		return od;
++
++	err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr);
++	if (unlikely(err)) {
++		dprintk("%s: objlayout_get_deviceinfo=>%d\n", __func__, err);
++		return ERR_PTR(err);
++	}
++
++	odi.systemid_len = deviceaddr->oda_systemid.len;
++	if (odi.systemid_len > sizeof(odi.systemid)) {
++		err = -EINVAL;
++		goto out;
++	} else if (odi.systemid_len)
++		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
++		       odi.systemid_len);
++	odi.osdname_len	 = deviceaddr->oda_osdname.len;
++	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
++
++	if (!odi.osdname_len && !odi.systemid_len) {
++		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
++			__func__);
++		err = -ENODEV;
++		goto out;
++	}
++
++	od = osduld_info_lookup(&odi);
++	if (unlikely(IS_ERR(od))) {
++		err = PTR_ERR(od);
++		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
++		goto out;
++	}
++
++	_dev_list_add(omt, d_id, od);
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	objlayout_put_deviceinfo(deviceaddr);
++	return err ? ERR_PTR(err) : od;
++}
++
++static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
++	struct objio_segment *objio_seg)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	unsigned i, num_comps = layout->olo_num_comps;
++	int err;
++
++	/* lookup all devices */
++	for (i = 0; i < num_comps; i++) {
++		struct osd_dev *od;
++
++		od = _device_lookup(pnfslay, objio_seg, i);
++		if (unlikely(IS_ERR(od))) {
++			err = PTR_ERR(od);
++			goto out;
++		}
++		objio_seg->ods[i] = od;
++	}
++	objio_seg->num_comps = num_comps;
++	err = 0;
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	return err;
++}
++
++static int _verify_data_map(struct pnfs_osd_layout *layout)
++{
++	struct pnfs_osd_data_map *data_map = &layout->olo_map;
++	u64 stripe_length;
++	u32 group_width;
++
++/* FIXME: Only raid0 for now. if not go through MDS */
++	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
++		printk(KERN_ERR "Only RAID_0 for now\n");
++		return -ENOTSUPP;
++	}
++	if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
++		printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
++			  data_map->odm_num_comps, data_map->odm_mirror_cnt);
++		return -EINVAL;
++	}
++
++	if (data_map->odm_group_width)
++		group_width = data_map->odm_group_width;
++	else
++		group_width = data_map->odm_num_comps /
++						(data_map->odm_mirror_cnt + 1);
++
++	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
++	if (stripe_length >= (1ULL << 32)) {
++		printk(KERN_ERR "Total Stripe length(0x%llx)"
++			  " >= 32bit is not supported\n", _LLU(stripe_length));
++		return -ENOTSUPP;
++	}
++
++	if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
++		printk(KERN_ERR "Stripe Unit(0x%llx)"
++			  " must be Multples of PAGE_SIZE(0x%lx)\n",
++			  _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
++		return -ENOTSUPP;
++	}
++
++	return 0;
++}
++
++int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	struct objio_segment *objio_seg;
++	int err;
++
++	err = _verify_data_map(layout);
++	if (unlikely(err))
++		return err;
++
++	objio_seg = kzalloc(sizeof(*objio_seg) +
++			(layout->olo_num_comps - 1) * sizeof(objio_seg->ods[0]),
++			GFP_KERNEL);
++	if (!objio_seg)
++		return -ENOMEM;
++
++	objio_seg->layout = layout;
++	err = objio_devices_lookup(pnfslay, objio_seg);
++	if (err)
++		goto free_seg;
++
++	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
++	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
++	if (layout->olo_map.odm_group_width) {
++		objio_seg->group_width = layout->olo_map.odm_group_width;
++		objio_seg->group_depth = layout->olo_map.odm_group_depth;
++		objio_seg->group_count = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1 /
++						objio_seg->group_width;
++	} else {
++		objio_seg->group_width = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1;
++		objio_seg->group_depth = -1;
++		objio_seg->group_count = 1;
++	}
++
++	*outp = objio_seg;
++	return 0;
++
++free_seg:
++	dprintk("%s: Error: return %d\n", __func__, err);
++	kfree(objio_seg);
++	*outp = NULL;
++	return err;
++}
++
++void objio_free_lseg(void *p)
++{
++	struct objio_segment *objio_seg = p;
++
++	kfree(objio_seg);
++}
++
++int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct objio_segment *objio_seg = seg;
++	struct objio_state *ios;
++	const unsigned first_size = sizeof(*ios) +
++				objio_seg->num_comps * sizeof(ios->per_dev[0]);
++	const unsigned sec_size = objio_seg->num_comps *
++						sizeof(ios->ol_state.ioerrs[0]);
++
++	dprintk("%s: num_comps=%d\n", __func__, objio_seg->num_comps);
++	ios = kzalloc(first_size + sec_size, GFP_KERNEL);
++	if (unlikely(!ios))
++		return -ENOMEM;
++
++	ios->objio_seg = objio_seg;
++	ios->ol_state.ioerrs = ((void *)ios) + first_size;
++	ios->ol_state.num_comps = objio_seg->num_comps;
++
++	*outp = &ios->ol_state;
++	return 0;
++}
++
++void objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++
++	kfree(ios);
++}
++
++enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
++{
++	switch (oep) {
++	case OSD_ERR_PRI_NO_ERROR:
++		return (enum pnfs_osd_errno)0;
++
++	case OSD_ERR_PRI_CLEAR_PAGES:
++		BUG_ON(1);
++		return 0;
++
++	case OSD_ERR_PRI_RESOURCE:
++		return PNFS_OSD_ERR_RESOURCE;
++	case OSD_ERR_PRI_BAD_CRED:
++		return PNFS_OSD_ERR_BAD_CRED;
++	case OSD_ERR_PRI_NO_ACCESS:
++		return PNFS_OSD_ERR_NO_ACCESS;
++	case OSD_ERR_PRI_UNREACHABLE:
++		return PNFS_OSD_ERR_UNREACHABLE;
++	case OSD_ERR_PRI_NOT_FOUND:
++		return PNFS_OSD_ERR_NOT_FOUND;
++	case OSD_ERR_PRI_NO_SPACE:
++		return PNFS_OSD_ERR_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case OSD_ERR_PRI_EIO:
++		return PNFS_OSD_ERR_EIO;
++	}
++}
++
++static void _clear_bio(struct bio *bio)
++{
++	struct bio_vec *bv;
++	unsigned i;
++
++	__bio_for_each_segment(bv, bio, i, 0) {
++		unsigned this_count = bv->bv_len;
++
++		if (likely(PAGE_SIZE == this_count))
++			clear_highpage(bv->bv_page);
++		else
++			zero_user(bv->bv_page, bv->bv_offset, this_count);
++	}
++}
++
++static int _io_check(struct objio_state *ios, bool is_write)
++{
++	enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
++	int lin_ret = 0;
++	int i;
++
++	for (i = 0; i <  ios->numdevs; i++) {
++		struct osd_sense_info osi;
++		struct osd_request *or = ios->per_dev[i].or;
++		int ret;
++
++		if (!or)
++			continue;
++
++		ret = osd_req_decode_sense(or, &osi);
++		if (likely(!ret))
++			continue;
++
++		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
++			/* start read offset passed endof file */
++			BUG_ON(is_write);
++			_clear_bio(ios->per_dev[i].bio);
++			dprintk("%s: start read offset passed end of file "
++				"offset=0x%llx, length=0x%lx\n", __func__,
++				_LLU(ios->per_dev[i].offset),
++				ios->per_dev[i].length);
++
++			continue; /* we recovered */
++		}
++		objlayout_io_set_result(&ios->ol_state, ios->per_dev[i].dev,
++					osd_pri_2_pnfs_err(osi.osd_err_pri),
++					ios->per_dev[i].offset,
++					ios->per_dev[i].length,
++					is_write);
++
++		if (osi.osd_err_pri >= oep) {
++			oep = osi.osd_err_pri;
++			lin_ret = ret;
++		}
++	}
++
++	return lin_ret;
++}
++
++/*
++ * Common IO state helpers.
++ */
++static void _io_free(struct objio_state *ios)
++{
++	unsigned i;
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[i];
++
++		if (per_dev->or) {
++			osd_end_request(per_dev->or);
++			per_dev->or = NULL;
++		}
++
++		if (per_dev->bio) {
++			bio_put(per_dev->bio);
++			per_dev->bio = NULL;
++		}
++	}
++}
++
++struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
++{
++	unsigned min_dev = ios->objio_seg->layout->olo_comps_index;
++	unsigned max_dev = min_dev + ios->ol_state.num_comps;
++
++	BUG_ON(dev < min_dev || max_dev <= dev);
++	return ios->objio_seg->ods[dev - min_dev];
++}
++
++struct _striping_info {
++	u64 obj_offset;
++	u64 group_length;
++	u64 total_group_length;
++	u64 Major;
++	unsigned dev;
++	unsigned unit_off;
++};
++
++static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
++			      struct _striping_info *si)
++{
++	u32	stripe_unit = ios->objio_seg->stripe_unit;
++	u32	group_width = ios->objio_seg->group_width;
++	u64	group_depth = ios->objio_seg->group_depth;
++	u32	U = stripe_unit * group_width;
++
++	u64	T = U * group_depth;
++	u64	S = T * ios->objio_seg->group_count;
++	u64	M = div64_u64(file_offset, S);
++
++	/*
++	G = (L - (M * S)) / T
++	H = (L - (M * S)) % T
++	*/
++	u64	LmodU = file_offset - M * S;
++	u32	G = div64_u64(LmodU, T);
++	u64	H = LmodU - G * T;
++
++	u32	N = div_u64(H, U);
++
++	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
++	si->obj_offset = si->unit_off + (N * stripe_unit) +
++				  (M * group_depth * stripe_unit);
++
++	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
++	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
++	si->dev *= ios->objio_seg->mirrors_p1;
++
++	si->group_length = T - H;
++	si->total_group_length = T;
++	si->Major = M;
++}
++
++static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
++		unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len)
++{
++	unsigned pg = *cur_pg;
++	struct request_queue *q =
++			osd_request_queue(_io_od(ios, per_dev->dev));
++
++	per_dev->length += cur_len;
++
++	if (per_dev->bio == NULL) {
++		unsigned stripes = ios->ol_state.num_comps /
++						     ios->objio_seg->mirrors_p1;
++		unsigned pages_in_stripe = stripes *
++				      (ios->objio_seg->stripe_unit / PAGE_SIZE);
++		unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
++				    stripes;
++
++		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
++		if (unlikely(!per_dev->bio)) {
++			dprintk("Faild to allocate BIO size=%u\n", bio_size);
++			return -ENOMEM;
++		}
++	}
++
++	while (cur_len > 0) {
++		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
++		unsigned added_len;
++
++		BUG_ON(ios->ol_state.nr_pages <= pg);
++		cur_len -= pglen;
++
++		added_len = bio_add_pc_page(q, per_dev->bio,
++					ios->ol_state.pages[pg], pglen, pgbase);
++		if (unlikely(pglen != added_len))
++			return -ENOMEM;
++		pgbase = 0;
++		++pg;
++	}
++	BUG_ON(cur_len);
++
++	*cur_pg = pg;
++	return 0;
++}
++
++static int _prepare_one_group(struct objio_state *ios, u64 length,
++			      struct _striping_info *si, unsigned first_comp,
++			      unsigned *last_pg)
++{
++	unsigned stripe_unit = ios->objio_seg->stripe_unit;
++	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
++	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
++	unsigned dev = si->dev;
++	unsigned first_dev = dev - (dev % devs_in_group);
++	unsigned comp = first_comp + (dev - first_dev);
++	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
++	unsigned cur_pg = *last_pg;
++	int ret = 0;
++
++	while (length) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[comp];
++		unsigned cur_len, page_off = 0;
++
++		if (!per_dev->length) {
++			per_dev->dev = dev;
++			if (dev < si->dev) {
++				per_dev->offset = si->obj_offset + stripe_unit -
++								   si->unit_off;
++				cur_len = stripe_unit;
++			} else if (dev == si->dev) {
++				per_dev->offset = si->obj_offset;
++				cur_len = stripe_unit - si->unit_off;
++				page_off = si->unit_off & ~PAGE_MASK;
++				BUG_ON(page_off &&
++				      (page_off != ios->ol_state.pgbase));
++			} else { /* dev > si->dev */
++				per_dev->offset = si->obj_offset - si->unit_off;
++				cur_len = stripe_unit;
++			}
++
++			if (max_comp < comp)
++				max_comp = comp;
++
++			dev += mirrors_p1;
++			dev = (dev % devs_in_group) + first_dev;
++		} else {
++			cur_len = stripe_unit;
++		}
++		if (cur_len >= length)
++			cur_len = length;
++
++		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
++				       cur_len);
++		if (unlikely(ret))
++			goto out;
++
++		comp += mirrors_p1;
++		comp = (comp % devs_in_group) + first_comp;
++
++		length -= cur_len;
++		ios->length += cur_len;
++	}
++out:
++	ios->numdevs = max_comp + mirrors_p1;
++	*last_pg = cur_pg;
++	return ret;
++}
++
++static int _io_rw_pagelist(struct objio_state *ios)
++{
++	u64 length = ios->ol_state.count;
++	struct _striping_info si;
++	unsigned devs_in_group = ios->objio_seg->group_width *
++				 ios->objio_seg->mirrors_p1;
++	unsigned first_comp = 0;
++	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
++	unsigned last_pg = 0;
++	int ret = 0;
++
++	_calc_stripe_info(ios, ios->ol_state.offset, &si);
++	while (length) {
++		if (length < si.group_length)
++			si.group_length = length;
++
++		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
++					 &last_pg);
++		if (unlikely(ret))
++			goto out;
++
++		length -= si.group_length;
++
++		si.group_length = si.total_group_length;
++		si.unit_off = 0;
++		++si.Major;
++		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
++						ios->objio_seg->group_depth;
++
++		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
++		si.dev %= num_comps;
++
++		first_comp += devs_in_group;
++		first_comp %= num_comps;
++	}
++
++out:
++	if (!ios->length)
++		return ret;
++
++	return 0;
++}
++
++static ssize_t _sync_done(struct objio_state *ios)
++{
++	struct completion *waiting = ios->private;
++
++	complete(waiting);
++	return 0;
++}
++
++static void _last_io(struct kref *kref)
++{
++	struct objio_state *ios = container_of(kref, struct objio_state, kref);
++
++	ios->done(ios);
++}
++
++static void _done_io(struct osd_request *or, void *p)
++{
++	struct objio_state *ios = p;
++
++	kref_put(&ios->kref, _last_io);
++}
++
++static ssize_t _io_exec(struct objio_state *ios)
++{
++	DECLARE_COMPLETION_ONSTACK(wait);
++	ssize_t status = 0; /* sync status */
++	unsigned i;
++	objio_done_fn saved_done_fn = ios->done;
++	bool sync = ios->ol_state.sync;
++
++	if (sync) {
++		ios->done = _sync_done;
++		ios->private = &wait;
++	}
++
++	kref_init(&ios->kref);
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct osd_request *or = ios->per_dev[i].or;
++
++		if (!or)
++			continue;
++
++		kref_get(&ios->kref);
++		osd_execute_request_async(or, _done_io, ios);
++	}
++
++	kref_put(&ios->kref, _last_io);
++
++	if (sync) {
++		wait_for_completion(&wait);
++		status = saved_done_fn(ios);
++	}
++
++	return status;
++}
++
++/*
++ * read
++ */
++static ssize_t _read_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, false);
++
++	_io_free(ios);
++
++	if (likely(!ret))
++		status = ios->length;
++	else
++		status = ret;
++
++	objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct osd_request *or = NULL;
++	struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++	unsigned dev = per_dev->dev;
++	struct pnfs_osd_object_cred *cred =
++			&ios->objio_seg->layout->olo_comps[dev];
++	struct osd_obj_id obj = {
++		.partition = cred->oc_object_id.oid_partition_id,
++		.id = cred->oc_object_id.oid_object_id,
++	};
++	int ret;
++
++	or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++	if (unlikely(!or)) {
++		ret = -ENOMEM;
++		goto err;
++	}
++	per_dev->or = or;
++
++	osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
++
++	ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++	if (ret) {
++		dprintk("%s: Faild to osd_finalize_request() => %d\n",
++			__func__, ret);
++		goto err;
++	}
++
++	dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++		__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++		per_dev->length);
++
++err:
++	return ret;
++}
++
++static ssize_t _read_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _read_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _read_done;
++	return _io_exec(ios); /* In sync mode exec returns the io status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _read_exec(ios);
++}
++
++/*
++ * write
++ */
++static ssize_t _write_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, true);
++
++	_io_free(ios);
++
++	if (likely(!ret)) {
++		/* FIXME: should be based on the OSD's persistence model
++		 * See OSD2r05 Section 4.13 Data persistence model */
++		ios->ol_state.committed = NFS_UNSTABLE; //NFS_FILE_SYNC;
++		status = ios->length;
++	} else {
++		status = ret;
++	}
++
++	objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
++	unsigned dev = ios->per_dev[cur_comp].dev;
++	unsigned last_comp = cur_comp + ios->objio_seg->mirrors_p1;
++	int ret;
++
++	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
++		struct osd_request *or = NULL;
++		struct pnfs_osd_object_cred *cred =
++					&ios->objio_seg->layout->olo_comps[dev];
++		struct osd_obj_id obj = {
++			.partition = cred->oc_object_id.oid_partition_id,
++			.id = cred->oc_object_id.oid_object_id,
++		};
++		struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++		struct bio *bio;
++
++		or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++		if (unlikely(!or)) {
++			ret = -ENOMEM;
++			goto err;
++		}
++		per_dev->or = or;
++
++		if (per_dev != master_dev) {
++			bio = bio_kmalloc(GFP_KERNEL,
++					  master_dev->bio->bi_max_vecs);
++			if (unlikely(!bio)) {
++				dprintk("Faild to allocate BIO size=%u\n",
++					master_dev->bio->bi_max_vecs);
++				ret = -ENOMEM;
++				goto err;
++			}
++
++			__bio_clone(bio, master_dev->bio);
++			bio->bi_bdev = NULL;
++			bio->bi_next = NULL;
++			per_dev->bio = bio;
++			per_dev->dev = dev;
++			per_dev->length = master_dev->length;
++			per_dev->offset =  master_dev->offset;
++		} else {
++			bio = master_dev->bio;
++			/* FIXME: bio_set_dir() */
++			bio->bi_rw |= (1 << BIO_RW);
++		}
++
++		osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
++
++		ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++		if (ret) {
++			dprintk("%s: Faild to osd_finalize_request() => %d\n",
++				__func__, ret);
++			goto err;
++		}
++
++		dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++			__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++			per_dev->length);
++	}
++
++err:
++	return ret;
++}
++
++static ssize_t _write_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _write_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _write_done;
++	return _io_exec(ios); /* In sync mode exec returns the io->status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	/* TODO: ios->stable = stable; */
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _write_exec(ios);
++}
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++objlayout_get_stripesize(struct pnfs_layout_hdr *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zx\n", __func__, maxsz);
++	return maxsz;
++}
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++objlayout_get_blocksize(void)
++{
++	ssize_t sz = BIO_MAX_PAGES_KMALLOC * PAGE_SIZE;
++
++	return sz;
++}
++
++static struct layoutdriver_policy_operations objlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = objlayout_get_stripesize,
++	.get_blocksize         = objlayout_get_blocksize,
++};
++
++static struct pnfs_layoutdriver_type objlayout_type = {
++	.id = LAYOUT_OSD2_OBJECTS,
++	.name = "LAYOUT_OSD2_OBJECTS",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &objlayout_policy_operations,
++};
++
++void *objio_init_mt(void)
++{
++	struct objio_mount_type *omt = kzalloc(sizeof(*omt), GFP_KERNEL);
++
++	if (!omt)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&omt->dev_list);
++	spin_lock_init(&omt->dev_list_lock);
++	return omt;
++}
++
++void objio_fini_mt(void *mountid)
++{
++	_dev_list_remove_all(mountid);
++	kfree(mountid);
++}
++
++MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++objlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++objlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(objlayout_init);
++module_exit(objlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-31 20:42:05.535059115 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-31 20:42:05.535059115 -0400
+@@ -0,0 +1,790 @@
++/*
++ *  objlayout.c
++ *
++ *  pNFS layout driver for Panasas OSDs
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <scsi/osd_initiator.h>
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct pnfs_client_operations *pnfs_client_ops;
++
++/*
++ * Create a objlayout layout structure for the given inode and return it.
++ */
++static struct pnfs_layout_hdr *
++objlayout_alloc_layout(struct inode *inode)
++{
++	struct objlayout *objlay;
++
++	objlay = kzalloc(sizeof(struct objlayout), GFP_KERNEL);
++	if (objlay) {
++		spin_lock_init(&objlay->lock);
++		INIT_LIST_HEAD(&objlay->err_list);
++	}
++	dprintk("%s: Return %p\n", __func__, objlay);
++	return &objlay->pnfs_layout;
++}
++
++/*
++ * Free an objlayout layout structure
++ */
++static void
++objlayout_free_layout(struct pnfs_layout_hdr *lo)
++{
++	struct objlayout *objlay = OBJLAYOUT(lo);
++
++	dprintk("%s: objlay %p\n", __func__, objlay);
++
++	WARN_ON(!list_empty(&objlay->err_list));
++	kfree(objlay);
++}
++
++/*
++ * Unmarshall layout and store it in pnfslay.
++ */
++static struct pnfs_layout_segment *
++objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
++		     struct nfs4_layoutget_res *lgr)
++{
++	int status;
++	void *layout = lgr->layout.buf;
++	struct pnfs_layout_segment *lseg;
++	struct objlayout_segment *objlseg;
++	struct pnfs_osd_layout *pnfs_osd_layout;
++
++	dprintk("%s: Begin pnfslay %p layout %p\n", __func__, pnfslay, layout);
++
++	BUG_ON(!layout);
++
++	status = -ENOMEM;
++	lseg = kzalloc(sizeof(*lseg) + sizeof(*objlseg) +
++		       pnfs_osd_layout_incore_sz(layout), GFP_KERNEL);
++	if (!lseg)
++		goto err;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	pnfs_osd_layout = (struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++	pnfs_osd_xdr_decode_layout(pnfs_osd_layout, layout);
++
++	status = objio_alloc_lseg(&objlseg->internal, pnfslay, lseg,
++				  pnfs_osd_layout);
++	if (status)
++		goto err;
++
++	dprintk("%s: Return %p\n", __func__, lseg);
++	return lseg;
++
++ err:
++	kfree(lseg);
++	return ERR_PTR(status);
++}
++
++/*
++ * Free a layout segement
++ */
++static void
++objlayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	struct objlayout_segment *objlseg;
++
++	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
++
++	if (unlikely(!lseg))
++		return;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	objio_free_lseg(objlseg->internal);
++	kfree(lseg);
++}
++
++/*
++ * I/O Operations
++ */
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
++static struct objlayout_io_state *
++objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
++			struct page **pages,
++			unsigned pgbase,
++			unsigned nr_pages,
++			loff_t offset,
++			size_t count,
++			struct pnfs_layout_segment *lseg,
++			void *rpcdata)
++{
++	struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++	struct objlayout_io_state *state;
++	u64 lseg_end_offset;
++	size_t size_nr_pages;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	if (objio_alloc_io_state(objlseg->internal, &state))
++		return NULL;
++
++	BUG_ON(offset < lseg->range.offset);
++	lseg_end_offset = end_offset(lseg->range.offset, lseg->range.length);
++	BUG_ON(offset >= lseg_end_offset);
++	if (offset + count > lseg_end_offset) {
++		count = lseg->range.length - (offset - lseg->range.offset);
++		dprintk("%s: truncated count %Zd\n", __func__, count);
++	}
++
++	if (pgbase > PAGE_SIZE) {
++		unsigned n = pgbase >> PAGE_SHIFT;
++
++		pgbase &= ~PAGE_MASK;
++		pages += n;
++		nr_pages -= n;
++	}
++
++	size_nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	BUG_ON(nr_pages < size_nr_pages);
++	if (nr_pages > size_nr_pages)
++		nr_pages = size_nr_pages;
++
++	INIT_LIST_HEAD(&state->err_list);
++	state->lseg = lseg;
++	state->rpcdata = rpcdata;
++	state->pages = pages;
++	state->pgbase = pgbase;
++	state->nr_pages = nr_pages;
++	state->offset = offset;
++	state->count = count;
++	state->sync = 0;
++
++	return state;
++}
++
++static void
++objlayout_free_io_state(struct objlayout_io_state *state)
++{
++	dprintk("%s: freeing io_state\n", __func__);
++	if (unlikely(!state))
++		return;
++
++	objio_free_io_state(state);
++}
++
++/*
++ * I/O done common code
++ */
++static void
++objlayout_iodone(struct objlayout_io_state *state)
++{
++	dprintk("%s: state %p status\n", __func__, state);
++
++	if (likely(state->status >= 0)) {
++		objlayout_free_io_state(state);
++	} else {
++		struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++		spin_lock(&objlay->lock);
++		objlay->delta_space_valid = OBJ_DSU_INVALID;
++		list_add(&objlay->err_list, &state->err_list);
++		spin_unlock(&objlay->lock);
++	}
++}
++
++/*
++ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
++ *
++ * The @index component IO failed (error returned from target). Register
++ * the error for later reporting at layout-return.
++ */
++void
++objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
++			int osd_error, u64 offset, u64 length, bool is_write)
++{
++	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
++
++	BUG_ON(index >= state->num_comps);
++	if (osd_error) {
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(state->lseg);
++		struct pnfs_osd_layout *layout =
++				(typeof(layout))objlseg->pnfs_osd_layout;
++
++		ioerr->oer_component = layout->olo_comps[index].oc_object_id;
++		ioerr->oer_comp_offset = offset;
++		ioerr->oer_comp_length = length;
++		ioerr->oer_iswrite = is_write;
++		ioerr->oer_errno = osd_error;
++
++		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
++			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
++			__func__, index, ioerr->oer_errno,
++			ioerr->oer_iswrite,
++			_DEVID_LO(&ioerr->oer_component.oid_device_id),
++			_DEVID_HI(&ioerr->oer_component.oid_device_id),
++			ioerr->oer_component.oid_partition_id,
++			ioerr->oer_component.oid_object_id,
++			ioerr->oer_comp_offset,
++			ioerr->oer_comp_length);
++	} else {
++		/* User need not call if no error is reported */
++		ioerr->oer_errno = 0;
++	}
++}
++
++static void _rpc_commit_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_commit_complete(wdata);
++}
++
++/*
++ * Commit data remotely on OSDs
++ */
++enum pnfs_try_status
++objlayout_commit(struct nfs_write_data *wdata, int how)
++{
++	int status = PNFS_ATTEMPTED;
++
++	INIT_WORK(&wdata->task.u.tk_work, _rpc_commit_complete);
++	schedule_work(&wdata->task.u.tk_work);
++	dprintk("%s: Return %d\n", __func__, status);
++	return status;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_read_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++
++	pnfs_client_ops->nfs_readlist_complete(rdata);
++}
++
++void
++objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
++{
++	int eof = state->eof;
++	struct nfs_read_data *rdata;
++
++	state->status = status;
++	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
++	rdata = state->rpcdata;
++	rdata->task.tk_status = status;
++	if (status >= 0) {
++		rdata->res.count = status;
++		rdata->res.eof = eof;
++	}
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_readlist_complete(rdata);
++	else {
++		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
++		schedule_work(&rdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async reads.
++ */
++enum pnfs_try_status
++objlayout_read_pagelist(struct nfs_read_data *rdata, unsigned nr_pages)
++{
++	loff_t offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct objlayout_io_state *state;
++	ssize_t status = 0;
++	loff_t eof;
++
++	dprintk("%s: Begin inode %p offset %llu count %d\n",
++		__func__, rdata->inode, offset, (int)count);
++
++	eof = i_size_read(rdata->inode);
++	if (unlikely(offset + count > eof)) {
++		if (offset >= eof) {
++			status = 0;
++			rdata->res.count = 0;
++			rdata->res.eof = 1;
++			goto out;
++		}
++		count = eof - offset;
++	}
++
++	state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
++					 rdata->args.pages, rdata->args.pgbase,
++					 nr_pages, offset, count,
++					 rdata->pdata.lseg, rdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->eof = state->offset + state->count >= eof;
++
++	status = objio_read_pagelist(state);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	rdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_write_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_writelist_complete(wdata);
++}
++
++void
++objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
++		     bool sync)
++{
++	struct nfs_write_data *wdata;
++
++	dprintk("%s: Begin\n", __func__);
++	wdata = state->rpcdata;
++	state->status = status;
++	wdata->task.tk_status = status;
++	if (status >= 0) {
++		wdata->res.count = status;
++		wdata->verf.committed = state->committed;
++		dprintk("%s: Return status %d committed %d\n",
++			__func__, wdata->task.tk_status,
++			wdata->verf.committed);
++	} else
++		dprintk("%s: Return status %d\n",
++			__func__, wdata->task.tk_status);
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_writelist_complete(wdata);
++	else {
++		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
++		schedule_work(&wdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async writes.
++ */
++enum pnfs_try_status
++objlayout_write_pagelist(struct nfs_write_data *wdata,
++			 unsigned nr_pages,
++			 int how)
++{
++	struct objlayout_io_state *state;
++	ssize_t status;
++
++	dprintk("%s: Begin inode %p offset %llu count %u\n",
++		__func__, wdata->inode, wdata->args.offset, wdata->args.count);
++
++	state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
++					 wdata->args.pages,
++					 wdata->args.pgbase,
++					 nr_pages,
++					 wdata->args.offset,
++					 wdata->args.count,
++					 wdata->pdata.lseg, wdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->sync = how & FLUSH_SYNC;
++
++	status = objio_write_pagelist(state, how & FLUSH_STABLE);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	wdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++void
++objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_layoutcommit_args *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct pnfs_osd_layoutupdate lou;
++	__be32 *start;
++
++	dprintk("%s: Begin\n", __func__);
++
++	spin_lock(&objlay->lock);
++	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
++	lou.dsu_delta = objlay->delta_space_used;
++	objlay->delta_space_used = 0;
++	objlay->delta_space_valid = OBJ_DSU_INIT;
++	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
++	spin_unlock(&objlay->lock);
++
++	start = xdr_reserve_space(xdr, 4);
++
++	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++
++	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
++		lou.dsu_delta, lou.olu_ioerr_flag);
++}
++
++static int
++err_prio(u32 oer_errno)
++{
++	switch (oer_errno) {
++	case 0:
++		return 0;
++
++	case PNFS_OSD_ERR_RESOURCE:
++		return OSD_ERR_PRI_RESOURCE;
++	case PNFS_OSD_ERR_BAD_CRED:
++		return OSD_ERR_PRI_BAD_CRED;
++	case PNFS_OSD_ERR_NO_ACCESS:
++		return OSD_ERR_PRI_NO_ACCESS;
++	case PNFS_OSD_ERR_UNREACHABLE:
++		return OSD_ERR_PRI_UNREACHABLE;
++	case PNFS_OSD_ERR_NOT_FOUND:
++		return OSD_ERR_PRI_NOT_FOUND;
++	case PNFS_OSD_ERR_NO_SPACE:
++		return OSD_ERR_PRI_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case PNFS_OSD_ERR_EIO:
++		return OSD_ERR_PRI_EIO;
++	}
++}
++
++static void
++merge_ioerr(struct pnfs_osd_ioerr *dest_err,
++	    const struct pnfs_osd_ioerr *src_err)
++{
++	u64 dest_end, src_end;
++
++	if (!dest_err->oer_errno) {
++		*dest_err = *src_err;
++		/* accumulated device must be blank */
++		memset(&dest_err->oer_component.oid_device_id, 0,
++			sizeof(dest_err->oer_component.oid_device_id));
++
++		return;
++	}
++
++	if (dest_err->oer_component.oid_partition_id !=
++				src_err->oer_component.oid_partition_id)
++		dest_err->oer_component.oid_partition_id = 0;
++
++	if (dest_err->oer_component.oid_object_id !=
++				src_err->oer_component.oid_object_id)
++		dest_err->oer_component.oid_object_id = 0;
++
++	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
++		dest_err->oer_comp_offset = src_err->oer_comp_offset;
++
++	dest_end = end_offset(dest_err->oer_comp_offset,
++			      dest_err->oer_comp_length);
++	src_end =  end_offset(src_err->oer_comp_offset,
++			      src_err->oer_comp_length);
++	if (dest_end < src_end)
++		dest_end = src_end;
++
++	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
++
++	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
++	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
++			dest_err->oer_errno = src_err->oer_errno;
++	} else if (src_err->oer_iswrite) {
++		dest_err->oer_iswrite = true;
++		dest_err->oer_errno = src_err->oer_errno;
++	}
++}
++
++static void
++encode_accumulated_error(struct objlayout *objlay, struct xdr_stream *xdr)
++{
++	struct objlayout_io_state *state, *tmp;
++	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++
++		for (i = 0; i < state->num_comps; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			merge_ioerr(&accumulated_err, ioerr);
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++
++	BUG_ON(pnfs_osd_xdr_encode_ioerr(xdr, &accumulated_err));
++}
++
++void
++objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_layoutreturn_args *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct objlayout_io_state *state, *tmp;
++	__be32 *start, *uninitialized_var(last_xdr);
++
++	dprintk("%s: Begin\n", __func__);
++	start = xdr_reserve_space(xdr, 4);
++	BUG_ON(!start);
++
++	spin_lock(&objlay->lock);
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++		int res = 0;
++
++		for (i = 0; i < state->num_comps && !res; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			dprintk("%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			last_xdr = xdr->p;
++			res = pnfs_osd_xdr_encode_ioerr(xdr, &state->ioerrs[i]);
++		}
++		if (unlikely(res)) {
++			/* no space for even one error descriptor */
++			BUG_ON(last_xdr == start + 1);
++
++			/* we've encountered a situation with lots and lots of
++			 * errors and no space to encode them all. Use the last
++			 * available slot to report the union of all the
++			 * remaining errors.
++			 */
++			xdr_rewind_stream(xdr, last_xdr -
++					       pnfs_osd_ioerr_xdr_sz() / 4);
++			encode_accumulated_error(objlay, xdr);
++			goto loop_done;
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++loop_done:
++	spin_unlock(&objlay->lock);
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++	dprintk("%s: Return\n", __func__);
++}
++
++struct objlayout_deviceinfo {
++	struct page *page;
++	struct pnfs_osd_deviceaddr da; /* This must be last */
++};
++
++/* Initialize and call nfs_getdeviceinfo, then decode and return a
++ * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
++ * should be called.
++ */
++int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr)
++{
++	struct objlayout_deviceinfo *odi;
++	struct pnfs_device pd;
++	struct super_block *sb;
++	struct page *page;
++	size_t sz;
++	u32 *p;
++	int err;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	pd.area = page_address(page);
++
++	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
++	pd.layout_type = LAYOUT_OSD2_OBJECTS;
++	pd.dev_notify_types = 0;
++	pd.pages = &page;
++	pd.pgbase = 0;
++	pd.pglen = PAGE_SIZE;
++	pd.mincount = 0;
++
++	sb = PNFS_INODE(pnfslay)->i_sb;
++	err = pnfs_client_ops->nfs_getdeviceinfo(PNFS_NFS_SERVER(pnfslay), &pd);
++	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
++	if (err)
++		goto err_out;
++
++	p = pd.area;
++	sz = pnfs_osd_xdr_deviceaddr_incore_sz(p);
++	odi = kzalloc(sz + (sizeof(*odi) - sizeof(odi->da)), GFP_KERNEL);
++	if (!odi) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
++	odi->page = page;
++	*deviceaddr = &odi->da;
++	return 0;
++
++err_out:
++	__free_page(page);
++	return err;
++}
++
++void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
++{
++	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
++						struct objlayout_deviceinfo,
++						da);
++
++	__free_page(odi->page);
++	kfree(odi);
++}
++
++/*
++ * Initialize a mountpoint by retrieving the list of
++ * available devices for it.
++ * Return the pnfs_mount_type structure so the
++ * pNFS_client can refer to the mount point later on.
++ */
++static int
++objlayout_initialize_mountpoint(struct nfs_server *server,
++				const struct nfs_fh *mntfh)
++{
++	void *data;
++
++	data = objio_init_mt();
++	if (IS_ERR(data)) {
++		printk(KERN_INFO "%s: objlayout lib not ready err=%ld\n",
++		       __func__, PTR_ERR(data));
++		return PTR_ERR(data);
++	}
++	server->pnfs_ld_data = data;
++
++	dprintk("%s: Return data=%p\n", __func__, data);
++	return 0;
++}
++
++/*
++ * Uninitialize a mountpoint
++ */
++static int
++objlayout_uninitialize_mountpoint(struct nfs_server *server)
++{
++	dprintk("%s: Begin %p\n", __func__, server->pnfs_ld_data);
++	objio_fini_mt(server->pnfs_ld_data);
++	return 0;
++}
++
++struct layoutdriver_io_operations objlayout_io_operations = {
++	.commit                  = objlayout_commit,
++	.read_pagelist           = objlayout_read_pagelist,
++	.write_pagelist          = objlayout_write_pagelist,
++	.alloc_layout            = objlayout_alloc_layout,
++	.free_layout             = objlayout_free_layout,
++	.alloc_lseg              = objlayout_alloc_lseg,
++	.free_lseg               = objlayout_free_lseg,
++	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
++	.encode_layoutreturn     = objlayout_encode_layoutreturn,
++	.initialize_mountpoint   = objlayout_initialize_mountpoint,
++	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
++};
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-31 20:42:05.535059115 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-31 20:42:05.535059115 -0400
+@@ -0,0 +1,171 @@
++/*
++ *  objlayout.h
++ *
++ *  Data types and function declerations for interfacing with the
++ *  pNFS standard object layout driver.
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef _OBJLAYOUT_H
++#define _OBJLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * in-core layout segment
++ */
++struct objlayout_segment {
++	void *internal;    /* for provider internal use */
++	u8 pnfs_osd_layout[];
++};
++
++/*
++ * per-inode layout
++ */
++struct objlayout {
++	struct pnfs_layout_hdr pnfs_layout;
++
++	 /* for layout_commit */
++	enum osd_delta_space_valid_enum {
++		OBJ_DSU_INIT = 0,
++		OBJ_DSU_VALID,
++		OBJ_DSU_INVALID,
++	} delta_space_valid;
++	s64 delta_space_used;  /* consumed by write ops */
++
++	 /* for layout_return */
++	spinlock_t lock;
++	struct list_head err_list;
++};
++
++static inline struct objlayout *
++OBJLAYOUT(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct objlayout, pnfs_layout);
++}
++
++/*
++ * per-I/O operation state
++ * embedded in objects provider io_state data structure
++ */
++struct objlayout_io_state {
++	struct pnfs_layout_segment *lseg;
++
++	struct page **pages;
++	unsigned pgbase;
++	unsigned nr_pages;
++	unsigned long count;
++	loff_t offset;
++	bool sync;
++
++	void *rpcdata;
++	int status;             /* res */
++	int eof;                /* res */
++	int committed;          /* res */
++
++	/* Error reporting (layout_return) */
++	struct list_head err_list;
++	unsigned num_comps;
++	/* Pointer to array of error descriptors of size num_comps.
++	 * It should contain as many entries as devices in the osd_layout
++	 * that participate in the I/O. It is up to the io_engine to allocate
++	 * needed space and set num_comps.
++	 */
++	struct pnfs_osd_ioerr *ioerrs;
++};
++
++/*
++ * Raid engine I/O API
++ */
++extern void *objio_init_mt(void);
++extern void objio_fini_mt(void *mt);
++
++extern int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout);
++extern void objio_free_lseg(void *p);
++
++extern int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp);
++extern void objio_free_io_state(struct objlayout_io_state *state);
++
++extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
++extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
++				    bool stable);
++
++/*
++ * callback API
++ */
++extern void objlayout_io_set_result(struct objlayout_io_state *state,
++				    unsigned index, int osd_error,
++				    u64 offset, u64 length, bool is_write);
++
++static inline void
++objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
++{
++	struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++	/* If one of the I/Os errored out and the delta_space_used was
++	 * invalid we render the complete report as invalid. Protocol mandate
++	 * the DSU be accurate or not reported.
++	 */
++	spin_lock(&objlay->lock);
++	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
++		objlay->delta_space_valid = OBJ_DSU_VALID;
++		objlay->delta_space_used += space_used;
++	}
++	spin_unlock(&objlay->lock);
++}
++
++extern void objlayout_read_done(struct objlayout_io_state *state,
++				ssize_t status, bool sync);
++extern void objlayout_write_done(struct objlayout_io_state *state,
++				 ssize_t status, bool sync);
++
++extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr);
++extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
++
++/*
++ * exported generic objects function vectors
++ */
++extern struct layoutdriver_io_operations objlayout_io_operations;
++extern struct pnfs_client_operations *pnfs_client_ops;
++
++#endif /* _OBJLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-31 20:42:05.536110535 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-31 20:42:05.536110535 -0400
+@@ -0,0 +1,734 @@
++/*
++ *  panfs_shim.c
++ *
++ *  Shim layer for interfacing with the Panasas DirectFlow module I/O stack
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <asm/byteorder.h>
++
++#include "objlayout.h"
++#include "panfs_shim.h"
++
++#include <linux/panfs_shim_api.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct panfs_export_operations *panfs_export_ops;
++
++void *
++objio_init_mt(void)
++{
++	return panfs_export_ops == NULL ? ERR_PTR(-EAGAIN) : NULL;
++}
++
++void objio_fini_mt(void *mountid)
++{
++}
++
++static int
++panfs_shim_conv_raid01(struct pnfs_osd_layout *layout,
++		       struct pnfs_osd_data_map *lo_map,
++		       pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt) {
++		hdr->type = PAN_AGG_RAID1;
++		hdr->hdr.raid1.num_comps = lo_map->odm_mirror_cnt + 1;
++	} else if (layout->olo_num_comps > 1) {
++		hdr->type = PAN_AGG_RAID0;
++		hdr->hdr.raid0.num_comps = layout->olo_num_comps;
++		hdr->hdr.raid0.stripe_unit = lo_map->odm_stripe_unit;
++	} else
++		hdr->type = PAN_AGG_SIMPLE;
++	return 0;
++}
++
++static int
++panfs_shim_conv_raid5(struct pnfs_osd_layout *layout,
++		      struct pnfs_osd_data_map *lo_map,
++		      pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt)
++		goto err;
++
++	if (lo_map->odm_group_width || lo_map->odm_group_depth) {
++		if (!lo_map->odm_group_width || !lo_map->odm_group_depth)
++			goto err;
++
++		hdr->type = PAN_AGG_GRP_RAID5_LEFT;
++		hdr->hdr.grp_raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.grp_raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.grp_raid5_left.stripe_unit = lo_map->odm_stripe_unit;
++		hdr->hdr.grp_raid5_left.rg_width = lo_map->odm_group_width;
++		hdr->hdr.grp_raid5_left.rg_depth = lo_map->odm_group_depth;
++		/* this is a guess, panasas server is not supposed to
++		   hand out layotu otherwise */
++		hdr->hdr.grp_raid5_left.group_layout_policy =
++			PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN;
++	} else {
++		hdr->type = PAN_AGG_RAID5_LEFT;
++		hdr->hdr.raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.raid5_left.stripe_unit2 =
++		hdr->hdr.raid5_left.stripe_unit1 =
++		hdr->hdr.raid5_left.stripe_unit0 = lo_map->odm_stripe_unit;
++	}
++
++	return 0;
++err:
++	return -EINVAL;
++}
++
++/*
++ * Convert a pnfs_osd data map into Panasas aggregation layout header
++ */
++static int
++panfs_shim_conv_pnfs_osd_data_map(
++	struct pnfs_osd_layout *layout,
++	pan_agg_layout_hdr_t *hdr)
++{
++	int status = -EINVAL;
++	struct pnfs_osd_data_map *lo_map = &layout->olo_map;
++
++	if (!layout->olo_num_comps) {
++		dprintk("%s: !!layout.n_comps(%u)\n", __func__,
++			layout->olo_num_comps);
++		goto err;
++	}
++
++	switch (lo_map->odm_raid_algorithm) {
++	case PNFS_OSD_RAID_0:
++		if (layout->olo_num_comps != lo_map->odm_num_comps ||
++		    layout->olo_comps_index) {
++			dprintk("%s: !!PNFS_OSD_RAID_0 "
++				"layout.n_comps(%u) map.n_comps(%u) "
++				"comps_index(%u)\n", __func__,
++				layout->olo_num_comps,
++				lo_map->odm_num_comps,
++				layout->olo_comps_index);
++			goto err;
++		}
++		status = panfs_shim_conv_raid01(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_5:
++		if (!lo_map->odm_group_width) {
++			if (layout->olo_num_comps != lo_map->odm_num_comps ||
++			    layout->olo_comps_index) {
++				dprintk("%s: !!PNFS_OSD_RAID_5 !group_width "
++					"layout.n_comps(%u)!=map.n_comps(%u) "
++					"|| comps_index(%u)\n", __func__,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		} else if ((layout->olo_num_comps != lo_map->odm_num_comps &&
++			    layout->olo_num_comps > lo_map->odm_group_width) ||
++			   (layout->olo_comps_index % lo_map->odm_group_width)){
++				dprintk("%s: !!PNFS_OSD_RAID_5 group_width(%u) "
++					"layout.n_comps(%u) map.n_comps(%u) "
++					"comps_index(%u)\n", __func__,
++					lo_map->odm_group_width,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		status = panfs_shim_conv_raid5(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_4:
++	case PNFS_OSD_RAID_PQ:
++	default:
++		dprintk("%s: !!PNFS_OSD_RAID_(%d)\n", __func__,
++			lo_map->odm_raid_algorithm);
++		goto err;
++	}
++
++	return 0;
++
++err:
++	return status;
++}
++
++/*
++ * Convert pnfs_osd layout into Panasas map and caps type
++ */
++int
++objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	int i, total_comps;
++	int status;
++	struct pnfs_osd_object_cred *lo_comp;
++	pan_size_t alloc_sz, local_sz;
++	pan_sm_map_cap_t *mcs = NULL;
++	u8 *buf;
++	pan_agg_comp_obj_t *pan_comp;
++	pan_sm_sec_t *pan_sec;
++
++	status = -EINVAL;
++	if (layout->olo_num_comps < layout->olo_map.odm_group_width) {
++		total_comps = layout->olo_comps_index + layout->olo_num_comps;
++	} else {
++		/* allocate full map, otherwise SAM gets confused */
++		total_comps = layout->olo_map.odm_num_comps;
++	}
++	alloc_sz = total_comps *
++		   (sizeof(pan_agg_comp_obj_t) + sizeof(pan_sm_sec_t));
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p = layout->olo_comps[i].oc_cap.cred;
++		if (panfs_export_ops->sm_sec_t_get_size_otw(
++			(pan_sm_sec_otw_t *)&p, &local_sz, NULL, NULL))
++			goto err;
++		alloc_sz += local_sz;
++	}
++
++	status = -ENOMEM;
++	mcs = kzalloc(sizeof(*mcs) + alloc_sz, GFP_KERNEL);
++	if (!mcs)
++		goto err;
++	buf = (u8 *)&mcs[1];
++
++	mcs->offset = lseg->range.offset;
++	mcs->length = lseg->range.length;
++#if 0
++	/* FIXME: for now */
++	mcs->expiration_time.ts_sec  = 0;
++	mcs->expiration_time.ts_nsec = 0;
++#endif
++	mcs->full_map.map_hdr.avail_state = PAN_AGG_OBJ_STATE_NORMAL;
++	status = panfs_shim_conv_pnfs_osd_data_map(layout,
++						   &mcs->full_map.layout_hdr);
++	if (status)
++		goto err;
++
++	mcs->full_map.components.size = total_comps;
++	mcs->full_map.components.data = (pan_agg_comp_obj_t *)buf;
++	buf += total_comps * sizeof(pan_agg_comp_obj_t);
++
++	mcs->secs.size = total_comps;
++	mcs->secs.data = (pan_sm_sec_t *)buf;
++	buf += total_comps * sizeof(pan_sm_sec_t);
++
++	lo_comp = layout->olo_comps;
++	pan_comp = mcs->full_map.components.data + layout->olo_comps_index;
++	pan_sec = mcs->secs.data + layout->olo_comps_index;
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p;
++		pan_stor_obj_id_t *obj_id = &mcs->full_map.map_hdr.obj_id;
++		struct pnfs_osd_objid *oc_obj_id = &lo_comp->oc_object_id;
++		u64 dev_id = __be64_to_cpup(
++			(__be64 *)oc_obj_id->oid_device_id.data + 1);
++
++		dprintk("%s: i=%d deviceid=%Lx:%Lx partition=%Lx object=%Lx\n",
++			__func__, i,
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data),
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data + 1),
++			oc_obj_id->oid_partition_id, oc_obj_id->oid_object_id);
++
++		if (i == 0) {
++			/* make up mgr_id to calm sam down */
++			pan_mgr_id_construct_artificial(PAN_MGR_SM, 0,
++							&obj_id->dev_id);
++			obj_id->grp_id = oc_obj_id->oid_partition_id;
++			obj_id->obj_id = oc_obj_id->oid_object_id;
++		}
++
++		if (obj_id->grp_id != lo_comp->oc_object_id.oid_partition_id) {
++			dprintk("%s: i=%d grp_id=0x%Lx oid_partition_id=0x%Lx\n",
++				__func__, i, (u64)obj_id->grp_id,
++				lo_comp->oc_object_id.oid_partition_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		if (obj_id->obj_id != lo_comp->oc_object_id.oid_object_id) {
++			dprintk("%s: i=%d obj_id=0x%Lx oid_object_id=0x%Lx\n",
++				__func__, i, obj_id->obj_id,
++				lo_comp->oc_object_id.oid_object_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		pan_comp->dev_id = dev_id;
++		if (!pan_stor_is_device_id_an_obsd_id(pan_comp->dev_id)) {
++			dprintk("%s: i=%d dev_id=0x%Lx not an obsd_id\n",
++				__func__, i, obj_id->dev_id);
++			status = -EINVAL;
++			goto err;
++		}
++		if (lo_comp->oc_osd_version == PNFS_OSD_MISSING) {
++			dprintk("%s: degraded maps not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++		pan_comp->avail_state = PAN_AGG_COMP_STATE_NORMAL;
++		if (lo_comp->oc_cap_key_sec != PNFS_OSD_CAP_KEY_SEC_NONE) {
++			dprintk("%s: cap key security not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++
++		p = lo_comp->oc_cap.cred;
++		panfs_export_ops->sm_sec_t_unmarshall(
++			(pan_sm_sec_otw_t *)&p,
++			pan_sec,
++			buf,
++			alloc_sz,
++			NULL,
++			&local_sz);
++		buf += local_sz;
++		alloc_sz -= local_sz;
++
++		lo_comp++;
++		pan_comp++;
++		pan_sec++;
++	}
++
++	*outp = mcs;
++	dprintk("%s:Return mcs=%p\n", __func__, mcs);
++	return 0;
++
++err:
++	objio_free_lseg(mcs);
++	dprintk("%s:Error %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * Free a Panasas map and caps type
++ */
++void
++objio_free_lseg(void *p)
++{
++	kfree(p);
++}
++
++/*
++ * I/O routines
++ */
++int
++objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct panfs_shim_io_state *p;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	*outp = &p->ol_state;
++	return 0;
++}
++
++/*
++ * Free an I/O state
++ */
++void
++objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	int i;
++
++	dprintk("%s: freeing io_state\n", __func__);
++	for (i = 0; i < state->ol_state.nr_pages; i++)
++		kunmap(state->ol_state.pages[i]);
++
++	if (state->ucreds)
++		panfs_export_ops->ucreds_put(state->ucreds);
++	kfree(state->sg_list);
++	kfree(state);
++}
++
++static int
++panfs_shim_pages_to_sg(
++	struct panfs_shim_io_state *state,
++	struct page **pages,
++	unsigned int pgbase,
++	unsigned nr_pages,
++	size_t count)
++{
++	unsigned i, n;
++	pan_sg_entry_t *sg;
++
++	dprintk("%s pgbase %u nr_pages %u count %d "
++		"pg0 %p flags 0x%x index %llu\n",
++		__func__, pgbase, nr_pages, (int)count, pages[0],
++		(unsigned)pages[0]->flags, (unsigned long long)pages[0]->index);
++
++	sg = kmalloc(nr_pages * sizeof(*sg), GFP_KERNEL);
++	if (sg == NULL)
++		return -ENOMEM;
++
++	dprintk("%s sg_list %p pages %p pgbase %u nr_pages %u\n",
++		__func__, sg, pages, pgbase, nr_pages);
++
++	for (i = 0; i < nr_pages; i++) {
++		sg[i].buffer = (char *)kmap(pages[i]) + pgbase;
++		n = PAGE_SIZE - pgbase;
++		pgbase = 0;
++		if (n > count)
++			n = count;
++		sg[i].chunk_size = n;
++		count -= n;
++		if (likely(count)) {
++			sg[i].next = &sg[i+1];
++		} else {
++			/* we're done */
++			sg[i].next = NULL;
++			break;
++		}
++	}
++	BUG_ON(count);
++
++	state->sg_list = sg;
++	return 0;
++}
++
++/*
++ * Callback function for async reads
++ */
++static void
++panfs_shim_read_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_read_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.read.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++		status = res_p->length;
++		WARN_ON(status < 0);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_read rc %d: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_read_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.read.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.read.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_read(PAN_SAM_ACCESS_BYPASS_TIMESTAMP,
++					&state->u.read.args,
++					&state->obj_sec,
++					state->sg_list,
++					state->ucreds,
++					ol_state->sync ?
++						NULL : panfs_shim_read_done,
++					state, NULL,
++					&state->u.read.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_read_done(state, NULL, &state->u.read.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++/*
++ * Callback function for async writes
++ */
++static void
++panfs_shim_write_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_write_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.write.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++/*		state->ol_state.committed = NFS_FILE_SYNC;*/
++		state->ol_state.committed = NFS_UNSTABLE;
++		status = res_p->length;
++		WARN_ON(status < 0);
++
++		objlayout_add_delta_space_used(&state->ol_state,
++					       res_p->delta_capacity_used);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_write rc %u: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_write_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_write_pagelist(struct objlayout_io_state *ol_state,
++		     bool stable /* unused, PanOSD writes are stable */)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.write.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.write.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_write(PAN_SAM_ACCESS_NONE,
++					 &state->u.write.args,
++					 &state->obj_sec,
++					 state->sg_list,
++					 state->ucreds,
++					 ol_state->sync ?
++						NULL : panfs_shim_write_done,
++					 state,
++					 NULL,
++					 &state->u.write.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_write_done(state, NULL, &state->u.write.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++int
++panfs_shim_register(struct panfs_export_operations *ops)
++{
++	if (panfs_export_ops) {
++		printk(KERN_INFO
++		       "%s: panfs already registered (panfs ops %p)\n",
++		       __func__, panfs_export_ops);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: registering panfs ops %p\n",
++	       __func__, ops);
++
++	panfs_export_ops = ops;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_register);
++
++int
++panfs_shim_unregister(void)
++{
++	if (!panfs_export_ops) {
++		printk(KERN_INFO "%s: panfs is not registered\n", __func__);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: unregistering panfs ops %p\n",
++	       __func__, panfs_export_ops);
++
++	panfs_export_ops = NULL;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_unregister);
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++panlayout_get_stripesize(struct pnfs_layout_hdr *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	dprintk("%s: Begin\n", __func__);
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *panlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)panlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			n *= 8;	/* FIXME: until we have 2-D coalescing */
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zd\n", __func__, maxsz);
++	return maxsz;
++}
++
++#define PANLAYOUT_DEF_STRIPE_UNIT    (64*1024)
++#define PANLAYOUT_DEF_STRIPE_WIDTH   9
++#define PANLAYOUT_MAX_STRIPE_WIDTH   11
++#define PANLAYOUT_MAX_GATHER_STRIPES 8
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++panlayout_get_blocksize(void)
++{
++	ssize_t sz = (PANLAYOUT_MAX_STRIPE_WIDTH-1) *
++		      PANLAYOUT_DEF_STRIPE_UNIT *
++		      PANLAYOUT_MAX_GATHER_STRIPES;
++	dprintk("%s: Return %Zd\n", __func__, sz);
++	return sz;
++}
++
++static struct layoutdriver_policy_operations panlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = panlayout_get_stripesize,
++	.get_blocksize         = panlayout_get_blocksize,
++};
++
++#define PNFS_LAYOUT_PANOSD (NFS4_PNFS_PRIVATE_LAYOUT | LAYOUT_OSD2_OBJECTS)
++
++static struct pnfs_layoutdriver_type panlayout_type = {
++	.id = PNFS_LAYOUT_PANOSD,
++	.name = "PNFS_LAYOUT_PANOSD",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &panlayout_policy_operations,
++};
++
++MODULE_DESCRIPTION("pNFS Layout Driver for Panasas OSDs");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++panlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Registered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++panlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Unregistered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(panlayout_init);
++module_exit(panlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-31 20:42:05.537124598 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-31 20:42:05.537124598 -0400
+@@ -0,0 +1,482 @@
++/*
++ *  panfs_shim.h
++ *
++ *  Data types and external function declerations for interfacing with
++ *  panfs (Panasas DirectFlow) I/O stack
++ *
++ *  Copyright (C) 2007 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#ifndef _PANLAYOUT_PANFS_SHIM_H
++#define _PANLAYOUT_PANFS_SHIM_H
++
++typedef s8 pan_int8_t;
++typedef u8 pan_uint8_t;
++typedef s16 pan_int16_t;
++typedef u16 pan_uint16_t;
++typedef s32 pan_int32_t;
++typedef u32 pan_uint32_t;
++typedef s64 pan_int64_t;
++typedef u64 pan_uint64_t;
++
++/*
++ * from pan_base_types.h
++ */
++typedef  pan_uint64_t pan_rpc_none_t;
++typedef pan_uint32_t  pan_rpc_arrdim_t;
++typedef pan_uint32_t  pan_status_t;
++typedef pan_uint8_t   pan_otw_t;
++typedef pan_uint8_t   pan_pad_t;
++
++typedef pan_uint32_t  pan_timespec_sec_t;
++typedef pan_uint32_t  pan_timespec_nsec_t;
++
++typedef  struct pan_timespec_s  pan_timespec_t;
++struct pan_timespec_s {
++  pan_timespec_sec_t   ts_sec;
++  pan_timespec_nsec_t  ts_nsec;
++};
++
++/*
++ * from pan_std_types.h
++ */
++typedef pan_uint32_t pan_size_t;
++typedef  int  pan_bool_t;
++
++/*
++ * from pan_common_error.h
++ */
++#define PAN_SUCCESS                                         ((pan_status_t)0)
++#define PAN_ERR_IN_PROGRESS                                 ((pan_status_t)55)
++
++/*
++ * from pan_sg.h
++ */
++typedef struct pan_sg_entry_s pan_sg_entry_t;
++struct pan_sg_entry_s {
++  void                  *buffer;       /* pointer to memory */
++  pan_uint32_t           chunk_size;   /* size of each chunk (bytes) */
++  pan_sg_entry_t        *next;
++};
++
++/*
++ * from pan_storage.h
++ */
++typedef pan_uint64_t pan_stor_dev_id_t;
++typedef pan_uint32_t pan_stor_obj_grp_id_t;
++typedef pan_uint64_t pan_stor_obj_uniq_t;
++typedef pan_uint32_t pan_stor_action_t;
++typedef pan_uint8_t pan_stor_cap_key_t[20];
++
++typedef pan_uint8_t pan_stor_key_type_t;
++typedef pan_uint64_t pan_stor_len_t;
++typedef pan_int64_t pan_stor_delta_len_t;
++typedef pan_uint64_t pan_stor_offset_t;
++typedef pan_uint16_t pan_stor_op_t;
++
++typedef pan_uint16_t pan_stor_sec_level_t;
++
++struct pan_stor_obj_id_s {
++  pan_stor_dev_id_t      dev_id;
++  pan_stor_obj_uniq_t    obj_id;
++  pan_stor_obj_grp_id_t  grp_id;
++};
++
++typedef struct pan_stor_obj_id_s pan_stor_obj_id_t;
++
++#define PAN_STOR_OP_NONE ((pan_stor_op_t) 0U)
++#define PAN_STOR_OP_READ ((pan_stor_op_t) 8U)
++#define PAN_STOR_OP_WRITE ((pan_stor_op_t) 9U)
++#define PAN_STOR_OP_APPEND ((pan_stor_op_t) 10U)
++#define PAN_STOR_OP_GETATTR ((pan_stor_op_t) 11U)
++#define PAN_STOR_OP_SETATTR ((pan_stor_op_t) 12U)
++#define PAN_STOR_OP_FLUSH ((pan_stor_op_t) 13U)
++#define PAN_STOR_OP_CLEAR ((pan_stor_op_t) 14U)
++
++/*
++ * from pan_aggregation_map.h
++ */
++typedef pan_uint8_t pan_agg_type_t;
++typedef pan_uint64_t pan_agg_map_version_t;
++typedef pan_uint8_t pan_agg_obj_state_t;
++typedef pan_uint8_t pan_agg_comp_state_t;
++typedef pan_uint8_t pan_agg_comp_flag_t;
++
++#define PAN_AGG_OBJ_STATE_INVALID ((pan_agg_obj_state_t) 0x00)
++#define PAN_AGG_OBJ_STATE_NORMAL ((pan_agg_obj_state_t) 0x01)
++#define PAN_AGG_OBJ_STATE_DEGRADED ((pan_agg_obj_state_t) 0x02)
++#define PAN_AGG_OBJ_STATE_RECONSTRUCT ((pan_agg_obj_state_t) 0x03)
++#define PAN_AGG_OBJ_STATE_COPYBACK ((pan_agg_obj_state_t) 0x04)
++#define PAN_AGG_OBJ_STATE_UNAVAILABLE ((pan_agg_obj_state_t) 0x05)
++#define PAN_AGG_OBJ_STATE_CREATING ((pan_agg_obj_state_t) 0x06)
++#define PAN_AGG_OBJ_STATE_DELETED ((pan_agg_obj_state_t) 0x07)
++#define PAN_AGG_COMP_STATE_INVALID ((pan_agg_comp_state_t) 0x00)
++#define PAN_AGG_COMP_STATE_NORMAL ((pan_agg_comp_state_t) 0x01)
++#define PAN_AGG_COMP_STATE_UNAVAILABLE ((pan_agg_comp_state_t) 0x02)
++#define PAN_AGG_COMP_STATE_COPYBACK ((pan_agg_comp_state_t) 0x03)
++#define PAN_AGG_COMP_F_NONE ((pan_agg_comp_flag_t) 0x00)
++#define PAN_AGG_COMP_F_ATTR_STORING ((pan_agg_comp_flag_t) 0x01)
++#define PAN_AGG_COMP_F_OBJ_CORRUPT_OBS ((pan_agg_comp_flag_t) 0x02)
++#define PAN_AGG_COMP_F_TEMP ((pan_agg_comp_flag_t) 0x04)
++
++struct pan_aggregation_map_s {
++  pan_agg_map_version_t  version;
++  pan_agg_obj_state_t    avail_state;
++  pan_stor_obj_id_t      obj_id;
++};
++
++typedef struct pan_aggregation_map_s pan_aggregation_map_t;
++
++struct pan_agg_comp_obj_s {
++  pan_stor_dev_id_t     dev_id;
++  pan_agg_comp_state_t  avail_state;
++  pan_agg_comp_flag_t   comp_flags;
++};
++
++typedef struct pan_agg_comp_obj_s pan_agg_comp_obj_t;
++
++struct pan_agg_simple_header_s {
++  pan_uint8_t  unused;
++};
++
++typedef struct pan_agg_simple_header_s pan_agg_simple_header_t;
++
++struct pan_agg_raid1_header_s {
++  pan_uint16_t  num_comps;
++};
++
++typedef struct pan_agg_raid1_header_s pan_agg_raid1_header_t;
++
++struct pan_agg_raid0_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++};
++
++typedef struct pan_agg_raid0_header_s pan_agg_raid0_header_t;
++
++struct pan_agg_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit0;
++  pan_uint32_t  stripe_unit1;
++  pan_uint32_t  stripe_unit2;
++};
++
++typedef struct pan_agg_raid5_left_header_s pan_agg_raid5_left_header_t;
++
++typedef struct pan_agg_grp_raid5_left_header_s pan_agg_grp_raid5_left_header_t;
++
++struct pan_agg_grp_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++  pan_uint16_t  rg_width;
++  pan_uint16_t  rg_depth;
++  pan_uint8_t   group_layout_policy;
++};
++
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_INVALID ((pan_uint8_t) 0x00)
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN ((pan_uint8_t) 0x01)
++
++#define PAN_AGG_NULL_MAP ((pan_agg_type_t) 0x00)
++#define PAN_AGG_SIMPLE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_RAID1 ((pan_agg_type_t) 0x02)
++#define PAN_AGG_RAID0 ((pan_agg_type_t) 0x03)
++#define PAN_AGG_RAID5_LEFT ((pan_agg_type_t) 0x04)
++#define PAN_AGG_GRP_RAID5_LEFT ((pan_agg_type_t) 0x06)
++#define PAN_AGG_MINTYPE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_MAXTYPE ((pan_agg_type_t) 0x06)
++
++struct pan_agg_layout_hdr_s {
++  pan_agg_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_uint64_t                        null;
++    pan_agg_simple_header_t             simple;
++    pan_agg_raid1_header_t              raid1;
++    pan_agg_raid0_header_t              raid0;
++    pan_agg_raid5_left_header_t         raid5_left;
++    pan_agg_grp_raid5_left_header_t     grp_raid5_left;
++  } hdr;
++};
++
++typedef struct pan_agg_layout_hdr_s pan_agg_layout_hdr_t;
++
++struct pan_agg_comp_obj_a_s {
++  pan_rpc_arrdim_t size;
++  pan_agg_comp_obj_t *data;
++};
++typedef struct pan_agg_comp_obj_a_s pan_agg_comp_obj_a;
++
++struct pan_agg_full_map_s {
++  pan_aggregation_map_t  map_hdr;
++  pan_agg_layout_hdr_t   layout_hdr;
++  pan_agg_comp_obj_a     components;
++};
++
++typedef struct pan_agg_full_map_s pan_agg_full_map_t;
++
++/*
++ * from pan_obsd_rpc_types.h
++ */
++typedef pan_uint8_t pan_obsd_security_key_a[16];
++
++typedef pan_uint8_t pan_obsd_capability_key_a[20];
++
++typedef pan_uint8_t pan_obsd_key_holder_id_t;
++
++#define PAN_OBSD_KEY_HOLDER_BASIS_KEY ((pan_obsd_key_holder_id_t) 0x01)
++#define PAN_OBSD_KEY_HOLDER_CAP_KEY ((pan_obsd_key_holder_id_t) 0x02)
++
++struct pan_obsd_key_holder_s {
++  pan_obsd_key_holder_id_t select;
++  pan_pad_t pad[3];
++  union {
++    pan_obsd_security_key_a    basis_key;
++    pan_obsd_capability_key_a  cap_key;
++  } key;
++};
++
++typedef struct pan_obsd_key_holder_s pan_obsd_key_holder_t;
++
++/*
++ * from pan_sm_sec.h
++ */
++typedef pan_uint8_t pan_sm_sec_type_t;
++typedef pan_uint8_t pan_sm_sec_otw_allo_mode_t;
++
++struct pan_obsd_capability_generic_otw_t_s {
++  pan_rpc_arrdim_t size;
++  pan_uint8_t *data;
++};
++typedef struct pan_obsd_capability_generic_otw_t_s
++				pan_obsd_capability_generic_otw_t;
++
++struct pan_sm_sec_obsd_s {
++  pan_obsd_key_holder_t              key;
++  pan_obsd_capability_generic_otw_t  cap_otw;
++  pan_sm_sec_otw_allo_mode_t         allo_mode;
++};
++
++typedef struct pan_sm_sec_obsd_s pan_sm_sec_obsd_t;
++
++struct pan_sm_sec_s {
++  pan_sm_sec_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_rpc_none_t     none;
++    pan_sm_sec_obsd_t  obsd;
++  } variant;
++};
++
++typedef struct pan_sm_sec_s pan_sm_sec_t;
++
++struct pan_sm_sec_a_s {
++  pan_rpc_arrdim_t size;
++  pan_sm_sec_t *data;
++};
++typedef struct pan_sm_sec_a_s pan_sm_sec_a;
++typedef pan_otw_t *pan_sm_sec_otw_t;
++
++/*
++ * from pan_sm_types.h
++ */
++typedef pan_uint64_t pan_sm_cap_handle_t;
++
++struct pan_sm_map_cap_s {
++  pan_agg_full_map_t   full_map;
++  pan_stor_offset_t    offset;
++  pan_stor_len_t       length;
++  pan_sm_sec_a         secs;
++  pan_sm_cap_handle_t  handle;
++  pan_timespec_t       expiration_time;
++  pan_stor_action_t    action_mask;
++  pan_uint32_t         flags;
++};
++
++typedef struct pan_sm_map_cap_s pan_sm_map_cap_t;
++
++/*
++ * from pan_sm_ops.h
++ */
++typedef pan_rpc_none_t pan_sm_cache_ptr_t;
++
++/*
++ * from pan_sam_api.h
++ */
++typedef pan_uint32_t    pan_sam_access_flags_t;
++
++typedef struct pan_sam_dev_error_s  pan_sam_dev_error_t;
++struct pan_sam_dev_error_s {
++    pan_stor_dev_id_t       dev_id;
++    pan_stor_op_t           stor_op;
++    pan_status_t            error;
++};
++
++typedef struct pan_sam_ext_status_s pan_sam_ext_status_t;
++struct pan_sam_ext_status_s {
++    pan_uint32_t        available;
++    pan_uint32_t        size;
++    pan_sam_dev_error_t *errors;
++};
++
++enum pan_sam_rpc_sec_sel_e {
++    PAN_SAM_RPC_SEC_DEFAULT,
++    PAN_SAM_RPC_SEC_ATLEAST,
++    PAN_SAM_RPC_SEC_EXACTLY
++};
++typedef enum pan_sam_rpc_sec_sel_e pan_sam_rpc_sec_sel_t;
++
++typedef struct pan_sam_obj_sec_s pan_sam_obj_sec_t;
++struct pan_sam_obj_sec_s {
++    pan_stor_sec_level_t    min_security;
++    pan_sm_map_cap_t        *map_ccaps;
++};
++
++typedef struct  pan_sam_rpc_sec_s   pan_sam_rpc_sec_t;
++struct pan_sam_rpc_sec_s {
++    pan_sam_rpc_sec_sel_t   selector;
++};
++
++typedef struct pan_sam_read_args_s pan_sam_read_args_t;
++struct pan_sam_read_args_s {
++    pan_stor_obj_id_t                obj_id;
++    pan_sm_cache_ptr_t               obj_ent;
++    void                            *return_attr;
++    void                            *checksum;
++    pan_stor_offset_t                offset;
++    pan_uint16_t                     sm_options;
++    void                            *callout;
++    void                            *callout_arg;
++};
++
++typedef struct pan_sam_read_res_s pan_sam_read_res_t;
++struct pan_sam_read_res_s {
++    pan_status_t             result;
++    pan_sam_ext_status_t     ext_status;
++    pan_stor_len_t           length;
++    void                    *attr;
++    void                    *checksum;
++};
++
++typedef void (*pan_sam_read_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_read_res_t  *res_p,
++    pan_status_t        status);
++
++#define PAN_SAM_ACCESS_NONE                             0x0000
++#define PAN_SAM_ACCESS_BYPASS_TIMESTAMP                 0x0020
++
++typedef struct pan_sam_write_args_s pan_sam_write_args_t;
++struct pan_sam_write_args_s {
++    pan_stor_obj_id_t   obj_id;
++    pan_sm_cache_ptr_t  obj_ent;
++    pan_stor_offset_t   offset;
++    void                *attr;
++    void                *return_attr;
++};
++
++typedef struct pan_sam_write_res_s pan_sam_write_res_t;
++struct pan_sam_write_res_s {
++    pan_status_t            result;
++    pan_sam_ext_status_t    ext_status;
++    pan_stor_len_t          length;
++    pan_stor_delta_len_t    delta_capacity_used;
++    pan_bool_t              parity_dirty;
++    void                   *attr;
++};
++
++typedef void (*pan_sam_write_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_write_res_t *res_p,
++    pan_status_t        status);
++
++/*
++ * from pan_mgr_types.h
++ */
++#define PAN_MGR_ID_TYPE_SHIFT 56
++#define PAN_MGR_ID_TYPE_MASK ((pan_mgr_id_t)18374686479671623680ULL)
++#define PAN_MGR_ID_UNIQ_MASK ((pan_mgr_id_t)72057594037927935ULL)
++
++typedef pan_uint16_t pan_mgr_type_t;
++typedef pan_uint64_t pan_mgr_id_t;
++
++#define PAN_MGR_SM ((pan_mgr_type_t) 2U)
++#define PAN_MGR_OBSD ((pan_mgr_type_t) 6U)
++
++/*
++ * from pan_mgr_types_c.h
++ */
++#define pan_mgr_id_construct_artificial(_mgr_type_, _mgr_uniq_, _mgr_id_p_) { \
++  pan_mgr_id_t  _id1, _id2; \
++\
++  _id1 = (_mgr_type_); \
++  _id1 <<= PAN_MGR_ID_TYPE_SHIFT; \
++  _id1 &= PAN_MGR_ID_TYPE_MASK; \
++  _id2 = (_mgr_uniq_); \
++  _id2 &= PAN_MGR_ID_UNIQ_MASK; \
++  _id1 |= _id2; \
++  *(_mgr_id_p_) = _id1; \
++}
++
++/*
++ * from pan_storage_c.h
++ */
++#define pan_stor_is_device_id_an_obsd_id(_device_id_) \
++    ((((_device_id_) & PAN_MGR_ID_TYPE_MASK) >> PAN_MGR_ID_TYPE_SHIFT) \
++	== PAN_MGR_OBSD)
++
++/*
++ * pnfs_shim internal definitions
++ */
++
++struct panfs_shim_io_state {
++	struct objlayout_io_state ol_state;
++
++	pan_sg_entry_t *sg_list;
++	pan_sam_obj_sec_t obj_sec;
++	void *ucreds;
++	union {
++		struct {
++			pan_sam_read_args_t args;
++			pan_sam_read_res_t res;
++		} read;
++		struct {
++			pan_sam_write_args_t args;
++			pan_sam_write_res_t res;
++		} write;
++	} u;
++};
++
++#endif /* _PANLAYOUT_PANFS_SHIM_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-31 20:42:05.538121971 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-31 20:42:05.538121971 -0400
+@@ -0,0 +1,435 @@
++/*
++ *  pnfs_osd_xdr.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/pnfs_osd_xdr.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/*
++ * The following implementation is based on these Internet Drafts:
++ *
++ * draft-ietf-nfsv4-minorversion-21
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_objid(u32 *p, struct pnfs_osd_objid *objid)
++{
++	COPYMEM(objid->oid_device_id.data, sizeof(objid->oid_device_id.data));
++	READ64(objid->oid_partition_id);
++	READ64(objid->oid_object_id);
++	return p;
++}
++
++static inline u32 *
++pnfs_osd_xdr_decode_opaque_cred(u32 *p,
++				struct pnfs_osd_opaque_cred *opaque_cred)
++{
++	READ32(opaque_cred->cred_len);
++	COPYMEM(opaque_cred->cred, opaque_cred->cred_len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_object_cred(u32 *p, struct pnfs_osd_object_cred *comp,
++				u8 **credp)
++{
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_objid(p, &comp->oc_object_id);
++	READ32(comp->oc_osd_version);
++	READ32(comp->oc_cap_key_sec);
++
++	cred = *credp;
++	comp->oc_cap_key.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap_key);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap_key.cred_len));
++	comp->oc_cap.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap.cred_len));
++	*credp = cred;
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_data_map(u32 *p, struct pnfs_osd_data_map *data_map)
++{
++	READ32(data_map->odm_num_comps);
++	READ64(data_map->odm_stripe_unit);
++	READ32(data_map->odm_group_width);
++	READ32(data_map->odm_group_depth);
++	READ32(data_map->odm_mirror_cnt);
++	READ32(data_map->odm_raid_algorithm);
++	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
++		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
++		__func__,
++		data_map->odm_num_comps,
++		(unsigned long long)data_map->odm_stripe_unit,
++		data_map->odm_group_width,
++		data_map->odm_group_depth,
++		data_map->odm_mirror_cnt,
++		data_map->odm_raid_algorithm);
++	return p;
++}
++
++struct pnfs_osd_layout *
++pnfs_osd_xdr_decode_layout(struct pnfs_osd_layout *layout, u32 *p)
++{
++	int i;
++	u32 *start = p;
++	struct pnfs_osd_object_cred *comp;
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_data_map(p, &layout->olo_map);
++	READ32(layout->olo_comps_index);
++	READ32(layout->olo_num_comps);
++	layout->olo_comps = (struct pnfs_osd_object_cred *)(layout + 1);
++	comp = layout->olo_comps;
++	cred = (u8 *)(comp + layout->olo_num_comps);
++	dprintk("%s: comps_index=%u num_comps=%u\n",
++		__func__, layout->olo_comps_index, layout->olo_num_comps);
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		p = pnfs_osd_xdr_decode_object_cred(p, comp, &cred);
++		dprintk("%s: comp[%d]=dev(%llx:%llx) par=0x%llx obj=0x%llx "
++			"key_len=%u cap_len=%u\n",
++			__func__, i,
++			_DEVID_LO(&comp->oc_object_id.oid_device_id),
++			_DEVID_HI(&comp->oc_object_id.oid_device_id),
++			comp->oc_object_id.oid_partition_id,
++			comp->oc_object_id.oid_object_id,
++			comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
++		comp++;
++	}
++	dprintk("%s: xdr_size=%Zd end=%p in_core_size=%Zd\n", __func__,
++	       (char *)p - (char *)start, cred, (char *)cred - (char *)layout);
++	return layout;
++}
++
++/*
++ * Get Device Information Decoding
++ *
++ * Note: since Device Information is currently done synchronously, most
++ *       of the actual fields are left inside the rpc buffer and are only
++ *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
++ *       should not be freed while the returned information is in use.
++ */
++
++u32 *__xdr_read_calc_nfs4_string(
++	u32 *p, struct nfs4_string *str, u8 **freespace)
++{
++	u32 len;
++	char *data;
++	bool need_copy;
++
++	READ32(len);
++	data = (char *)p;
++
++	if (data[len]) { /* Not null terminated we'll need extra space */
++		data = *freespace;
++		*freespace += len + 1;
++		need_copy = true;
++	} else {
++		need_copy = false;
++	}
++
++	if (str) {
++		str->len = len;
++		str->data = data;
++		if (need_copy) {
++			memcpy(data, p, len);
++			data[len] = 0;
++		}
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++u32 *__xdr_read_calc_u8_opaque(
++	u32 *p, struct nfs4_string *str)
++{
++	u32 len;
++
++	READ32(len);
++
++	if (str) {
++		str->len = len;
++		str->data = (char *)p;
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetid {
++ * 	u32			oti_type;
++ * 	struct nfs4_string	oti_scsi_device_id;
++ * };
++ */
++u32 *__xdr_read_calc_targetid(
++	u32 *p, struct pnfs_osd_targetid* targetid, u8 **freespace)
++{
++	u32 oti_type;
++
++	READ32(oti_type);
++	if (targetid)
++		targetid->oti_type = oti_type;
++
++	switch (oti_type) {
++	case OBJ_TARGET_SCSI_NAME:
++	case OBJ_TARGET_SCSI_DEVICE_ID:
++		p = __xdr_read_calc_u8_opaque(p,
++			targetid ? &targetid->oti_scsi_device_id : NULL);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_net_addr {
++ * 	struct nfs4_string	r_netid;
++ * 	struct nfs4_string	r_addr;
++ * };
++ */
++u32 *__xdr_read_calc_net_addr(
++	u32 *p, struct pnfs_osd_net_addr* netaddr, u8 **freespace)
++{
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_netid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_addr : NULL,
++			freespace);
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetaddr {
++ * 	u32				ota_available;
++ * 	struct pnfs_osd_net_addr	ota_netaddr;
++ * };
++ */
++u32 *__xdr_read_calc_targetaddr(
++	u32 *p, struct pnfs_osd_targetaddr *targetaddr, u8 **freespace)
++{
++	u32 ota_available;
++
++	READ32(ota_available);
++	if (targetaddr)
++		targetaddr->ota_available = ota_available;
++
++	if (ota_available) {
++		p = __xdr_read_calc_net_addr(p,
++				targetaddr ? &targetaddr->ota_netaddr : NULL,
++				freespace);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++u32 *__xdr_read_calc_deviceaddr(
++	u32 *p, struct pnfs_osd_deviceaddr *deviceaddr, u8 **freespace)
++{
++	p = __xdr_read_calc_targetid(p,
++			deviceaddr ? &deviceaddr->oda_targetid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_targetaddr(p,
++			deviceaddr ? &deviceaddr->oda_targetaddr : NULL,
++			freespace);
++
++	if (deviceaddr)
++		COPYMEM(deviceaddr->oda_lun, sizeof(deviceaddr->oda_lun));
++	else
++		p += XDR_QUADLEN(sizeof(deviceaddr->oda_lun));
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_systemid : NULL);
++
++	if (deviceaddr) {
++		p = pnfs_osd_xdr_decode_object_cred(p,
++				&deviceaddr->oda_root_obj_cred, freespace);
++	} else {
++		*freespace += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_osdname : NULL);
++
++	return p;
++}
++
++size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p)
++{
++	u8 *null_freespace = NULL;
++	size_t sz;
++
++	__xdr_read_calc_deviceaddr(p, NULL, &null_freespace);
++	sz = sizeof(struct pnfs_osd_deviceaddr) + (size_t)null_freespace;
++
++	return sz;
++}
++
++void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p)
++{
++	u8 *freespace = (u8 *)(deviceaddr + 1);
++
++	__xdr_read_calc_deviceaddr(p, deviceaddr, &freespace);
++}
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou)
++{
++	__be32 *p = xdr_reserve_space(xdr, 16);
++
++	if (!p)
++		return -E2BIG;
++
++	*p++ = cpu_to_be32(lou->dsu_valid);
++	if (lou->dsu_valid)
++		p = xdr_encode_hyper(p, lou->dsu_delta);
++	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ */
++static inline int pnfs_osd_xdr_encode_objid(struct xdr_stream *xdr,
++					    struct pnfs_osd_objid *object_id)
++{
++	__be32 *p;
++
++	p = xdr_reserve_space(xdr, 32);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
++				    sizeof(object_id->oid_device_id.data));
++	p = xdr_encode_hyper(p, object_id->oid_partition_id);
++	p = xdr_encode_hyper(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++int pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr,
++			      struct pnfs_osd_ioerr *ioerr)
++{
++	__be32 *p;
++	int ret;
++
++	ret = pnfs_osd_xdr_encode_objid(xdr, &ioerr->oer_component);
++	if (ret)
++		return ret;
++
++	p = xdr_reserve_space(xdr, 24);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
++	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
++	*p++ = cpu_to_be32(ioerr->oer_iswrite);
++	*p   = cpu_to_be32(ioerr->oer_errno);
++
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-31 20:41:19.162150222 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-31 20:42:05.539131687 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ 
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ static struct kmem_cache *nfs_page_cachep;
+ 
+@@ -56,7 +57,8 @@ nfs_page_free(struct nfs_page *p)
+ struct nfs_page *
+ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ 		   struct page *page,
+-		   unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page		*req;
+ 
+@@ -79,7 +81,11 @@ nfs_create_request(struct nfs_open_conte
+ 	req->wb_pgbase	= offset;
+ 	req->wb_bytes   = count;
+ 	req->wb_context = get_nfs_open_context(ctx);
++	req->wb_lock_context = nfs_get_lock_context(ctx);
+ 	kref_init(&req->wb_kref);
++	req->wb_lseg    = lseg;
++	if (lseg)
++		get_lseg(lseg);
+ 	return req;
+ }
+ 
+@@ -141,18 +147,26 @@ void nfs_clear_request(struct nfs_page *
+ {
+ 	struct page *page = req->wb_page;
+ 	struct nfs_open_context *ctx = req->wb_context;
++	struct nfs_lock_context *l_ctx = req->wb_lock_context;
+ 
+ 	if (page != NULL) {
+ 		page_cache_release(page);
+ 		req->wb_page = NULL;
+ 	}
++	if (l_ctx != NULL) {
++		nfs_put_lock_context(l_ctx);
++		req->wb_lock_context = NULL;
++	}
+ 	if (ctx != NULL) {
+ 		put_nfs_open_context(ctx);
+ 		req->wb_context = NULL;
+ 	}
++	if (req->wb_lseg != NULL) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
+ }
+ 
+-
+ /**
+  * nfs_release_request - Release the count on an NFS read/write request
+  * @req: request to release
+@@ -231,11 +245,12 @@ void nfs_pageio_init(struct nfs_pageio_d
+  * Return 'true' if this is the case, else return 'false'.
+  */
+ static int nfs_can_coalesce_requests(struct nfs_page *prev,
+-				     struct nfs_page *req)
++				     struct nfs_page *req,
++				     struct nfs_pageio_descriptor *pgio)
+ {
+ 	if (req->wb_context->cred != prev->wb_context->cred)
+ 		return 0;
+-	if (req->wb_context->lockowner != prev->wb_context->lockowner)
++	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
+ 		return 0;
+ 	if (req->wb_context->state != prev->wb_context->state)
+ 		return 0;
+@@ -245,6 +260,12 @@ static int nfs_can_coalesce_requests(str
+ 		return 0;
+ 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ 		return 0;
++	if (req->wb_lseg != prev->wb_lseg)
++		return 0;
++#ifdef CONFIG_NFS_V4_1
++	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
++		return 0;
++#endif /* CONFIG_NFS_V4_1 */
+ 	return 1;
+ }
+ 
+@@ -277,7 +298,7 @@ static int nfs_pageio_do_add_request(str
+ 		if (newlen > desc->pg_bsize)
+ 			return 0;
+ 		prev = nfs_list_entry(desc->pg_list.prev);
+-		if (!nfs_can_coalesce_requests(prev, req))
++		if (!nfs_can_coalesce_requests(prev, req, desc))
+ 			return 0;
+ 	} else
+ 		desc->pg_base = req->wb_pgbase;
+@@ -366,6 +387,7 @@ void nfs_pageio_cond_complete(struct nfs
+  * @idx_start: lower bound of page->index to scan
+  * @npages: idx_start + npages sets the upper bound to scan.
+  * @tag: tag to scan for
++ * @use_pnfs: will be set TRUE if commit needs to be handled by layout driver
+  *
+  * Moves elements from one of the inode request lists.
+  * If the number of requests is set to 0, the entire address_space
+@@ -375,7 +397,7 @@ void nfs_pageio_cond_complete(struct nfs
+  */
+ int nfs_scan_list(struct nfs_inode *nfsi,
+ 		struct list_head *dst, pgoff_t idx_start,
+-		unsigned int npages, int tag)
++		  unsigned int npages, int tag, int *use_pnfs)
+ {
+ 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ 	struct nfs_page *req;
+@@ -406,6 +428,8 @@ int nfs_scan_list(struct nfs_inode *nfsi
+ 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+ 						req->wb_index, tag);
+ 				nfs_list_add_request(req, dst);
++				if (req->wb_lseg)
++					*use_pnfs = 1;
+ 				res++;
+ 				if (res == INT_MAX)
+ 					goto out;
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-31 20:42:05.541150301 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-31 20:42:05.541150301 -0400
+@@ -0,0 +1,2037 @@
++/*
++ *  linux/fs/nfs/pnfs.c
++ *
++ *  pNFS functions to call and manage layout drivers.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/smp_lock.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_mount.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/rculist.h>
++
++#include "internal.h"
++#include "nfs4_fs.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS
++
++#define MIN_POOL_LC		(4)
++
++static int pnfs_initialized;
++
++static void pnfs_free_layout(struct pnfs_layout_hdr *lo,
++			     struct pnfs_layout_range *range);
++static inline void get_layout(struct pnfs_layout_hdr *lo);
++
++/* Locking:
++ *
++ * pnfs_spinlock:
++ * 	protects pnfs_modules_tbl.
++ */
++static spinlock_t pnfs_spinlock = __SPIN_LOCK_UNLOCKED(pnfs_spinlock);
++
++/*
++ * pnfs_modules_tbl holds all pnfs modules
++ */
++static struct list_head	pnfs_modules_tbl;
++static struct kmem_cache *pnfs_cachep;
++static mempool_t *pnfs_layoutcommit_mempool;
++
++static inline struct nfs4_layoutcommit_data *pnfs_layoutcommit_alloc(void)
++{
++	struct nfs4_layoutcommit_data *p =
++			mempool_alloc(pnfs_layoutcommit_mempool, GFP_NOFS);
++	if (p)
++		memset(p, 0, sizeof(*p));
++
++	return p;
++}
++
++void pnfs_layoutcommit_free(struct nfs4_layoutcommit_data *p)
++{
++	mempool_free(p, pnfs_layoutcommit_mempool);
++}
++
++/*
++ * struct pnfs_module - One per pNFS device module.
++ */
++struct pnfs_module {
++	struct pnfs_layoutdriver_type *pnfs_ld_type;
++	struct list_head        pnfs_tblid;
++};
++
++int
++pnfs_initialize(void)
++{
++	INIT_LIST_HEAD(&pnfs_modules_tbl);
++
++	pnfs_cachep = kmem_cache_create("nfs4_layoutcommit_data",
++					sizeof(struct nfs4_layoutcommit_data),
++					0, SLAB_HWCACHE_ALIGN, NULL);
++	if (pnfs_cachep == NULL)
++		return -ENOMEM;
++
++	pnfs_layoutcommit_mempool = mempool_create(MIN_POOL_LC,
++						   mempool_alloc_slab,
++						   mempool_free_slab,
++						   pnfs_cachep);
++	if (pnfs_layoutcommit_mempool == NULL) {
++		kmem_cache_destroy(pnfs_cachep);
++		return -ENOMEM;
++	}
++
++	pnfs_initialized = 1;
++	return 0;
++}
++
++void pnfs_uninitialize(void)
++{
++	mempool_destroy(pnfs_layoutcommit_mempool);
++	kmem_cache_destroy(pnfs_cachep);
++}
++
++/* search pnfs_modules_tbl for right pnfs module */
++static int
++find_pnfs(u32 id, struct pnfs_module **module) {
++	struct  pnfs_module *local = NULL;
++
++	dprintk("PNFS: %s: Searching for %u\n", __func__, id);
++	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) {
++		if (local->pnfs_ld_type->id == id) {
++			*module = local;
++			return(1);
++		}
++	}
++	return 0;
++}
++
++/* Set cred to indicate we require a layoutcommit
++ * If we don't even have a layout, we don't need to commit it.
++ */
++void
++pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
++{
++	dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (has_layout(nfsi) &&
++	    !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state)) {
++		nfsi->layout->cred = get_rpccred(ctx->state->owner->so_cred);
++		__set_bit(NFS_INO_LAYOUTCOMMIT,
++			  &nfsi->layout->state);
++		nfsi->change_attr++;
++		spin_unlock(&nfsi->vfs_inode.i_lock);
++		dprintk("%s: Set layoutcommit\n", __func__);
++		return;
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Update last_write_offset for layoutcommit.
++ * TODO: We should only use commited extents, but the current nfs
++ * implementation does not calculate the written range in nfs_commit_done.
++ * We therefore update this field in writeback_done.
++ */
++void
++pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
++{
++	loff_t end_pos;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (offset < nfsi->layout->write_begin_pos)
++		nfsi->layout->write_begin_pos = offset;
++	end_pos = offset + extent - 1; /* I'm being inclusive */
++	if (end_pos > nfsi->layout->write_end_pos)
++		nfsi->layout->write_end_pos = end_pos;
++	dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
++		__func__,
++		(unsigned long) extent,
++		(unsigned long) offset ,
++		(unsigned long) nfsi->layout->write_begin_pos,
++		(unsigned long) nfsi->layout->write_end_pos);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Unitialize a mountpoint in a layout driver */
++void
++unmount_pnfs_layoutdriver(struct nfs_server *nfss)
++{
++	if (PNFS_EXISTS_LDIO_OP(nfss, uninitialize_mountpoint))
++		nfss->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint(nfss);
++}
++
++/*
++ * Set the server pnfs module to the first registered pnfs_type.
++ * Only one pNFS layout driver is supported.
++ */
++void
++set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
++		      u32 id)
++{
++	struct pnfs_module *mod = NULL;
++
++	if (server->pnfs_curr_ld)
++		return;
++
++	if (!find_pnfs(id, &mod)) {
++		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
++		find_pnfs(id, &mod);
++	}
++
++	if (!mod) {
++		dprintk("%s: No pNFS module found for %u. ", __func__, id);
++		goto out_err;
++	}
++
++	server->pnfs_curr_ld = mod->pnfs_ld_type;
++	if (mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
++							server, mntfh)) {
++		printk(KERN_ERR "%s: Error initializing mount point "
++		       "for layout driver %u. ", __func__, id);
++		goto out_err;
++	}
++
++	dprintk("%s: pNFS module for %u set\n", __func__, id);
++	return;
++
++out_err:
++	dprintk("Using NFSv4 I/O\n");
++	server->pnfs_curr_ld = NULL;
++}
++
++/* Allow I/O module to set its functions structure */
++struct pnfs_client_operations*
++pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++	struct layoutdriver_io_operations *io_ops = ld_type->ld_io_ops;
++
++	if (!pnfs_initialized) {
++		printk(KERN_ERR "%s Registration failure. "
++		       "pNFS not initialized.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops || !io_ops->alloc_layout || !io_ops->free_layout) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_layout and free_layout.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->alloc_lseg || !io_ops->free_lseg) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_lseg and free_lseg.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->read_pagelist || !io_ops->write_pagelist ||
++	    !io_ops->commit) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "read_pagelist, write_pagelist, and commit.\n",
++		       __func__);
++		return NULL;
++	}
++
++	pnfs_mod = kmalloc(sizeof(struct pnfs_module), GFP_KERNEL);
++	if (pnfs_mod != NULL) {
++		dprintk("%s Registering id:%u name:%s\n",
++			__func__,
++			ld_type->id,
++			ld_type->name);
++		pnfs_mod->pnfs_ld_type = ld_type;
++		INIT_LIST_HEAD(&pnfs_mod->pnfs_tblid);
++
++		spin_lock(&pnfs_spinlock);
++		list_add(&pnfs_mod->pnfs_tblid, &pnfs_modules_tbl);
++		spin_unlock(&pnfs_spinlock);
++	}
++
++	return &pnfs_ops;
++}
++
++/*  Allow I/O module to set its functions structure */
++void
++pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++
++	if (find_pnfs(ld_type->id, &pnfs_mod)) {
++		dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
++		spin_lock(&pnfs_spinlock);
++		list_del(&pnfs_mod->pnfs_tblid);
++		spin_unlock(&pnfs_spinlock);
++		kfree(pnfs_mod);
++	}
++}
++
++/*
++ * pNFS client layout cache
++ */
++#if defined(CONFIG_SMP)
++#define BUG_ON_UNLOCKED_INO(ino) \
++	BUG_ON(!spin_is_locked(&ino->i_lock))
++#define BUG_ON_UNLOCKED_LO(lo) \
++	BUG_ON_UNLOCKED_INO(PNFS_INODE(lo))
++#else /* CONFIG_SMP */
++#define BUG_ON_UNLOCKED_INO(lo) do {} while (0)
++#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
++#endif /* CONFIG_SMP */
++
++static inline void
++get_layout(struct pnfs_layout_hdr *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	lo->refcount++;
++}
++
++static inline void
++put_layout_locked(struct pnfs_layout_hdr *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	BUG_ON(lo->refcount <= 0);
++
++	lo->refcount--;
++	if (!lo->refcount) {
++		struct layoutdriver_io_operations *io_ops = PNFS_LD_IO_OPS(lo);
++		struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++		dprintk("%s: freeing layout cache %p\n", __func__, lo);
++		WARN_ON(!list_empty(&lo->layouts));
++		io_ops->free_layout(lo);
++		nfsi->layout = NULL;
++	}
++}
++
++void
++put_layout(struct inode *inode)
++{
++	spin_lock(&inode->i_lock);
++	put_layout_locked(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++
++}
++
++void
++pnfs_layout_release(struct pnfs_layout_hdr *lo,
++		    struct pnfs_layout_range *range)
++{
++	struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (range)
++		pnfs_free_layout(lo, range);
++	/*
++	 * Matched in _pnfs_update_layout for layoutget
++	 * and by get_layout in _pnfs_return_layout for layoutreturn
++	 */
++	put_layout_locked(lo);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	wake_up_all(&nfsi->lo_waitq);
++}
++
++void
++pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layout_range range = {
++		.iomode = IOMODE_ANY,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	lo = nfsi->layout;
++	if (lo) {
++		pnfs_free_layout(lo, &range);
++		WARN_ON(!list_empty(&nfsi->layout->segs));
++		WARN_ON(!list_empty(&nfsi->layout->layouts));
++
++		if (nfsi->layout->refcount != 1)
++			printk(KERN_WARNING "%s: layout refcount not=1 %d\n",
++				__func__, nfsi->layout->refcount);
++		WARN_ON(nfsi->layout->refcount != 1);
++
++		/* Matched by refcount set to 1 in alloc_init_layout */
++		put_layout_locked(lo);
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/*
++ * Called by the state manger to remove all layouts established under an
++ * expired lease.
++ */
++void
++pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++	struct pnfs_layout_hdr *lo;
++
++	while (!list_empty(&clp->cl_layouts)) {
++		lo = list_entry(clp->cl_layouts.next, struct pnfs_layout_hdr,
++				layouts);
++		dprintk("%s freeing layout for inode %lu\n", __func__,
++			lo->inode->i_ino);
++		pnfs_destroy_layout(NFS_I(lo->inode));
++	}
++}
++
++static inline void
++init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
++{
++	INIT_LIST_HEAD(&lseg->fi_list);
++	kref_init(&lseg->kref);
++	lseg->valid = true;
++	lseg->layout = lo;
++}
++
++static void
++destroy_lseg(struct kref *kref)
++{
++	struct pnfs_layout_segment *lseg =
++		container_of(kref, struct pnfs_layout_segment, kref);
++
++	dprintk("--> %s\n", __func__);
++	/* Matched by get_layout in pnfs_insert_layout */
++	put_layout_locked(lseg->layout);
++	PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
++}
++
++static void
++put_lseg_locked(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	kref_put(&lseg->kref, destroy_lseg);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++
++void
++put_lseg(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	kref_put(&lseg->kref, destroy_lseg);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++EXPORT_SYMBOL(put_lseg);
++
++void get_lseg(struct pnfs_layout_segment *lseg)
++{
++	kref_get(&lseg->kref);
++}
++EXPORT_SYMBOL(get_lseg);
++
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end: NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1: NFS4_MAX_UINT64;
++}
++
++/*
++ * is l2 fully contained in l1?
++ *   start1                             end1
++ *   [----------------------------------)
++ *           start2           end2
++ *           [----------------)
++ */
++static inline int
++lo_seg_contained(struct pnfs_layout_range *l1,
++		 struct pnfs_layout_range *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (start1 <= start2) && (end1 >= end2);
++}
++
++/*
++ * is l1 and l2 intersecting?
++ *   start1                             end1
++ *   [----------------------------------)
++ *                              start2           end2
++ *                              [----------------)
++ */
++static inline int
++lo_seg_intersecting(struct pnfs_layout_range *l1,
++		    struct pnfs_layout_range *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
++	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
++}
++
++void
++pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
++			const nfs4_stateid *stateid)
++{
++	write_seqlock(&lo->seqlock);
++	memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
++	write_sequnlock(&lo->seqlock);
++}
++
++void
++pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	do {
++		seq = read_seqbegin(&lo->seqlock);
++		memcpy(dst->u.data, lo->stateid.u.data,
++		       sizeof(lo->stateid.u.data));
++	} while (read_seqretry(&lo->seqlock, seq));
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void
++pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
++			      struct nfs4_state *state)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	write_seqlock(&lo->seqlock);
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE))
++		do {
++			seq = read_seqbegin(&state->seqlock);
++			memcpy(lo->stateid.u.data, state->stateid.u.data,
++					sizeof(state->stateid.u.data));
++		} while (read_seqretry(&state->seqlock, seq));
++	write_sequnlock(&lo->seqlock);
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++* Get layout from server.
++*    for now, assume that whole file layouts are requested.
++*    arg->offset: 0
++*    arg->length: all ones
++*/
++static int
++send_layoutget(struct inode *ino,
++	   struct nfs_open_context *ctx,
++	   struct pnfs_layout_range *range,
++	   struct pnfs_layout_segment **lsegpp,
++	   struct pnfs_layout_hdr *lo)
++{
++	int status;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct nfs4_layoutget *lgp;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
++	if (lgp == NULL) {
++		pnfs_layout_release(lo, NULL);
++		return -ENOMEM;
++	}
++	lgp->args.minlength = NFS4_MAX_UINT64;
++	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
++	lgp->args.range.iomode = range->iomode;
++	lgp->args.range.offset = 0;
++	lgp->args.range.length = NFS4_MAX_UINT64;
++	lgp->args.type = server->pnfs_curr_ld->id;
++	lgp->args.inode = ino;
++	lgp->lsegpp = lsegpp;
++
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
++		struct nfs_open_context *oldctx = ctx;
++
++		if (!oldctx) {
++			ctx = nfs_find_open_context(ino, NULL,
++					(range->iomode == IOMODE_READ) ?
++					FMODE_READ: FMODE_WRITE);
++			BUG_ON(!ctx);
++		}
++		/* Set the layout stateid from the open stateid */
++		pnfs_layout_from_open_stateid(NFS_I(ino)->layout, ctx->state);
++		if (!oldctx)
++			put_nfs_open_context(ctx);
++	}
++
++	/* Retrieve layout information from server */
++	status = nfs4_proc_layoutget(lgp);
++
++	dprintk("<-- %s status %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	false
++ */
++static inline int
++should_free_lseg(struct pnfs_layout_segment *lseg,
++		   struct pnfs_layout_range *range)
++{
++	return (range->iomode == IOMODE_ANY ||
++		lseg->range.iomode == range->iomode) &&
++	       lo_seg_intersecting(&lseg->range, range);
++}
++
++static struct pnfs_layout_segment *
++has_layout_to_return(struct pnfs_layout_hdr *lo,
++		     struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *out = NULL, *lseg;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list)
++		if (should_free_lseg(lseg, range)) {
++			out = lseg;
++			break;
++		}
++
++	dprintk("%s:Return lseg=%p\n", __func__, out);
++	return out;
++}
++
++static inline bool
++_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
++{
++	return atomic_read(&lseg->kref.refcount) == 1;
++}
++
++
++static void
++pnfs_free_layout(struct pnfs_layout_hdr *lo,
++		 struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *lseg, *next;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
++		if (!should_free_lseg(lseg, range) ||
++		    !_pnfs_can_return_lseg(lseg))
++			continue;
++		dprintk("%s: freeing lseg %p iomode %d "
++			"offset %llu length %llu\n", __func__,
++			lseg, lseg->range.iomode, lseg->range.offset,
++			lseg->range.length);
++		list_del(&lseg->fi_list);
++		put_lseg_locked(lseg);
++	}
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp;
++
++		clp = PNFS_NFS_SERVER(lo)->nfs_client;
++		spin_lock(&clp->cl_lock);
++		list_del_init(&lo->layouts);
++		spin_unlock(&clp->cl_lock);
++		pnfs_set_layout_stateid(lo, &zero_stateid);
++	}
++
++	dprintk("%s:Return\n", __func__);
++}
++
++static bool
++pnfs_return_layout_barrier(struct nfs_inode *nfsi,
++			   struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *lseg;
++	bool ret = false;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
++		if (!should_free_lseg(lseg, range))
++			continue;
++		lseg->valid = false;
++		if (!_pnfs_can_return_lseg(lseg)) {
++			dprintk("%s: wait on lseg %p refcount %d\n",
++				__func__, lseg,
++				atomic_read(&lseg->kref.refcount));
++			ret = true;
++		}
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	dprintk("%s:Return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++return_layout(struct inode *ino, struct pnfs_layout_range *range,
++	      enum pnfs_layoutreturn_type type, struct pnfs_layout_hdr *lo,
++	      bool wait)
++{
++	struct nfs4_layoutreturn *lrp;
++	struct nfs_server *server = NFS_SERVER(ino);
++	int status = -ENOMEM;
++
++	dprintk("--> %s\n", __func__);
++
++	BUG_ON(type != RETURN_FILE);
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (lrp == NULL) {
++		if (lo && (type == RETURN_FILE))
++			pnfs_layout_release(lo, NULL);
++		goto out;
++	}
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = server->pnfs_curr_ld->id;
++	lrp->args.return_type = type;
++	lrp->args.range = *range;
++	lrp->args.inode = ino;
++
++	status = nfs4_proc_layoutreturn(lrp, wait);
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++}
++
++int
++_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
++		    const nfs4_stateid *stateid, /* optional */
++		    enum pnfs_layoutreturn_type type,
++		    bool wait)
++{
++	struct pnfs_layout_hdr *lo = NULL;
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_range arg;
++	int status = 0;
++
++	dprintk("--> %s type %d\n", __func__, type);
++
++
++	arg.iomode = range ? range->iomode : IOMODE_ANY;
++	arg.offset = 0;
++	arg.length = NFS4_MAX_UINT64;
++
++	if (type == RETURN_FILE) {
++		spin_lock(&ino->i_lock);
++		lo = nfsi->layout;
++		if (lo && !has_layout_to_return(lo, &arg)) {
++			lo = NULL;
++		}
++		if (!lo) {
++			spin_unlock(&ino->i_lock);
++			dprintk("%s: no layout segments to return\n", __func__);
++			goto out;
++		}
++
++		/* Reference for layoutreturn matched in pnfs_layout_release */
++		get_layout(lo);
++
++		spin_unlock(&ino->i_lock);
++
++		if (pnfs_return_layout_barrier(nfsi, &arg)) {
++			if (stateid) { /* callback */
++				status = -EAGAIN;
++				goto out_put;
++			}
++			dprintk("%s: waiting\n", __func__);
++			wait_event(nfsi->lo_waitq,
++				   !pnfs_return_layout_barrier(nfsi, &arg));
++		}
++
++		if (layoutcommit_needed(nfsi)) {
++			if (stateid && !wait) { /* callback */
++				dprintk("%s: layoutcommit pending\n", __func__);
++				status = -EAGAIN;
++				goto out_put;
++			}
++			status = pnfs_layoutcommit_inode(ino, wait);
++			if (status) {
++				/* Return layout even if layoutcommit fails */
++				dprintk("%s: layoutcommit failed, status=%d. "
++					"Returning layout anyway\n",
++					__func__, status);
++			}
++		}
++
++		if (!stateid)
++			status = return_layout(ino, &arg, type, lo, wait);
++		else
++			pnfs_layout_release(lo, &arg);
++	}
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++out_put:
++	put_layout(ino);
++	goto out;
++}
++
++/*
++ * cmp two layout segments for sorting into layout cache
++ */
++static inline s64
++cmp_layout(struct pnfs_layout_range *l1,
++	   struct pnfs_layout_range *l2)
++{
++	s64 d;
++
++	/* higher offset > lower offset */
++	d = l1->offset - l2->offset;
++	if (d)
++		return d;
++
++	/* longer length > shorter length */
++	d = l1->length - l2->length;
++	if (d)
++		return d;
++
++	/* read > read/write */
++	return (int)(l1->iomode == IOMODE_READ) -
++	(int)(l2->iomode == IOMODE_READ);
++}
++
++static void
++pnfs_insert_layout(struct pnfs_layout_hdr *lo,
++		   struct pnfs_layout_segment *lseg)
++{
++	struct pnfs_layout_segment *lp;
++	int found = 0;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp = PNFS_NFS_SERVER(lo)->nfs_client;
++
++		spin_lock(&clp->cl_lock);
++		BUG_ON(!list_empty(&lo->layouts));
++		list_add_tail(&lo->layouts, &clp->cl_layouts);
++		spin_unlock(&clp->cl_lock);
++	}
++	list_for_each_entry (lp, &lo->segs, fi_list) {
++		if (cmp_layout(&lp->range, &lseg->range) > 0)
++			continue;
++		list_add_tail(&lseg->fi_list, &lp->fi_list);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu before "
++			"lp %p iomode %d offset %llu length %llu\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length,
++			lp, lp->range.iomode, lp->range.offset,
++			lp->range.length);
++		found = 1;
++		break;
++	}
++	if (!found) {
++		list_add_tail(&lseg->fi_list, &lo->segs);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu at tail\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length);
++	}
++	get_layout(lo);
++
++	dprintk("%s:Return\n", __func__);
++}
++
++/*
++ * Each layoutdriver embeds pnfs_layout_hdr as the first field in it's
++ * per-layout type layout cache structure and returns it ZEROed
++ * from layoutdriver_io_ops->alloc_layout
++ */
++static struct pnfs_layout_hdr *
++alloc_init_layout(struct inode *ino)
++{
++	struct pnfs_layout_hdr *lo;
++	struct layoutdriver_io_operations *io_ops;
++
++	io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
++	lo = io_ops->alloc_layout(ino);
++	if (!lo) {
++		printk(KERN_ERR
++			"%s: out of memory: io_ops->alloc_layout failed\n",
++			__func__);
++		return NULL;
++	}
++	lo->refcount = 1;
++	INIT_LIST_HEAD(&lo->layouts);
++	INIT_LIST_HEAD(&lo->segs);
++	seqlock_init(&lo->seqlock);
++	lo->inode = ino;
++	return lo;
++}
++
++/*
++ * Retrieve and possibly allocate the inode layout
++ *
++ * ino->i_lock must be taken by the caller.
++ */
++static struct pnfs_layout_hdr *
++pnfs_alloc_layout(struct inode *ino)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_hdr *new = NULL;
++
++	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
++
++	BUG_ON_UNLOCKED_INO(ino);
++	if (likely(nfsi->layout))
++		return nfsi->layout;
++
++	spin_unlock(&ino->i_lock);
++	new = alloc_init_layout(ino);
++	spin_lock(&ino->i_lock);
++
++	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
++		nfsi->layout = new;
++	} else if (new) {
++		/* Reference the layout accross i_lock release and grab */
++		get_layout(nfsi->layout);
++		spin_unlock(&ino->i_lock);
++		NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
++		spin_lock(&ino->i_lock);
++		put_layout_locked(nfsi->layout);
++	}
++	return nfsi->layout;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	true
++ */
++static inline int
++has_matching_lseg(struct pnfs_layout_segment *lseg,
++		  struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_range range1;
++
++	if ((range->iomode == IOMODE_RW && lseg->range.iomode != IOMODE_RW) ||
++	    !lo_seg_intersecting(&lseg->range, range))
++		return 0;
++
++	/* range1 covers only the first byte in the range */
++	range1 = *range;
++	range1.length = 1;
++	return lo_seg_contained(&lseg->range, &range1);
++}
++
++/*
++ * lookup range in layout
++ */
++static struct pnfs_layout_segment *
++pnfs_has_layout(struct pnfs_layout_hdr *lo,
++		struct pnfs_layout_range *range,
++		bool take_ref,
++		bool only_valid)
++{
++	struct pnfs_layout_segment *lseg, *ret = NULL;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list) {
++		if (has_matching_lseg(lseg, range) &&
++		    (lseg->valid || !only_valid)) {
++			ret = lseg;
++			if (take_ref)
++				get_lseg(ret);
++			break;
++		}
++		if (cmp_layout(range, &lseg->range) > 0)
++			break;
++	}
++
++	dprintk("%s:Return lseg %p take_ref %d ref %d valid %d\n",
++		__func__, ret, take_ref,
++		ret ? atomic_read(&ret->kref.refcount) : 0,
++		ret ? ret->valid : 0);
++	return ret;
++}
++
++/* Update the file's layout for the given range and iomode.
++ * Layout is retreived from the server if needed.
++ * If lsegpp is given, the appropriate layout segment is referenced and
++ * returned to the caller.
++ */
++void
++_pnfs_update_layout(struct inode *ino,
++		   struct nfs_open_context *ctx,
++		   loff_t pos,
++		   u64 count,
++		   enum pnfs_iomode iomode,
++		   struct pnfs_layout_segment **lsegpp)
++{
++	struct pnfs_layout_range arg = {
++		.iomode = iomode,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layout_segment *lseg = NULL;
++	bool take_ref = (lsegpp != NULL);
++
++	if (take_ref)
++		*lsegpp = NULL;
++	spin_lock(&ino->i_lock);
++	lo = pnfs_alloc_layout(ino);
++	if (lo == NULL) {
++		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
++		goto out_unlock;
++	}
++
++	/* Check to see if the layout for the given range already exists */
++	lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
++	if (lseg && !lseg->valid) {
++		if (take_ref)
++			put_lseg_locked(lseg);
++		/* someone is cleaning the layout */
++		lseg = NULL;
++		goto out_unlock;
++	}
++
++	if (lseg) {
++		dprintk("%s: Using cached lseg %p for %llu@%llu iomode %d)\n",
++			__func__,
++			lseg,
++			arg.length,
++			arg.offset,
++			arg.iomode);
++
++		goto out_unlock;
++	}
++
++	/* if get layout already failed once goto out */
++	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) {
++		if (unlikely(nfsi->pnfs_layout_suspend &&
++		    get_seconds() >= nfsi->pnfs_layout_suspend)) {
++			dprintk("%s: layout_get resumed\n", __func__);
++			clear_bit(lo_fail_bit(iomode),
++				  &nfsi->layout->state);
++			nfsi->pnfs_layout_suspend = 0;
++		} else
++			goto out_unlock;
++	}
++
++	/* Reference the layout for layoutget matched in pnfs_layout_release */
++	get_layout(lo);
++	spin_unlock(&ino->i_lock);
++
++	send_layoutget(ino, ctx, &arg, lsegpp, lo);
++out:
++	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
++		nfsi->layout->state, lseg);
++	return;
++out_unlock:
++	if (lsegpp)
++		*lsegpp = lseg;
++	spin_unlock(&ino->i_lock);
++	goto out;
++}
++
++void
++pnfs_get_layout_done(struct nfs4_layoutget *lgp, int rpc_status)
++{
++	struct pnfs_layout_segment *lseg = NULL;
++	struct nfs_inode *nfsi = NFS_I(lgp->args.inode);
++	time_t suspend = 0;
++
++	dprintk("-->%s\n", __func__);
++
++	lgp->status = rpc_status;
++	if (likely(!rpc_status)) {
++		if (unlikely(lgp->res.layout.len < 0)) {
++			printk(KERN_ERR
++			       "%s: ERROR Returned layout size is ZERO\n", __func__);
++			lgp->status = -EIO;
++		}
++		goto out;
++	}
++
++	dprintk("%s: ERROR retrieving layout %d\n", __func__, rpc_status);
++	switch (rpc_status) {
++	case -NFS4ERR_BADLAYOUT:
++		lgp->status = -ENOENT;
++		/* FALLTHROUGH */
++	case -EACCES:	/* NFS4ERR_ACCESS */
++		/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
++		goto out;
++
++	case -NFS4ERR_LAYOUTTRYLATER:
++	case -NFS4ERR_RECALLCONFLICT:
++	case -NFS4ERR_OLD_STATEID:
++	case -EAGAIN:	/* NFS4ERR_LOCKED */
++		lgp->status = -NFS4ERR_DELAY;	/* for nfs4_handle_exception */
++		/* FALLTHROUGH */
++	case -NFS4ERR_GRACE:
++	case -NFS4ERR_DELAY:
++		goto out;
++
++	case -NFS4ERR_ADMIN_REVOKED:
++	case -NFS4ERR_DELEG_REVOKED:
++		/* The layout is expected to be returned at this point.
++		 * This should clear the layout stateid as well */
++		suspend = get_seconds() + 1;
++		break;
++
++	case -NFS4ERR_LAYOUTUNAVAILABLE:
++		lgp->status = -ENOTSUPP;
++		break;
++
++	case -NFS4ERR_REP_TOO_BIG:
++	case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
++		lgp->status = -E2BIG;
++		break;
++
++	/* Leave the following errors untranslated */
++	case -NFS4ERR_DEADSESSION:
++	case -NFS4ERR_DQUOT:
++	case -EINVAL:		/* NFS4ERR_INVAL */
++	case -EIO:		/* NFS4ERR_IO */
++	case -NFS4ERR_FHEXPIRED:
++	case -NFS4ERR_MOVED:
++	case -NFS4ERR_NOSPC:
++	case -ESERVERFAULT:	/* NFS4ERR_SERVERFAULT */
++	case -ESTALE:		/* NFS4ERR_STALE */
++	case -ETOOSMALL:	/* NFS4ERR_TOOSMALL */
++		break;
++
++	/* The following errors are our fault and should never happen */
++	case -NFS4ERR_BADIOMODE:
++	case -NFS4ERR_BADXDR:
++	case -NFS4ERR_REQ_TOO_BIG:
++	case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
++	case -NFS4ERR_WRONG_TYPE:
++		lgp->status = -EINVAL;
++		/* FALLTHROUGH */
++	case -NFS4ERR_BAD_STATEID:
++	case -NFS4ERR_NOFILEHANDLE:
++	case -ENOTSUPP:	/* NFS4ERR_NOTSUPP */
++	case -NFS4ERR_OPENMODE:
++	case -NFS4ERR_OP_NOT_IN_SESSION:
++	case -NFS4ERR_TOO_MANY_OPS:
++		dprintk("%s: error %d: should never happen\n", __func__,
++			rpc_status);
++		break;
++
++	/* The following errors are the server's fault */
++	default:
++		dprintk("%s: illegal error %d\n", __func__, rpc_status);
++		lgp->status = -EIO;
++		break;
++	}
++
++	/* remember that get layout failed and suspend trying */
++	nfsi->pnfs_layout_suspend = suspend;
++	set_bit(lo_fail_bit(lgp->args.range.iomode),
++		&nfsi->layout->state);
++	dprintk("%s: layout_get suspended until %ld\n",
++		__func__, suspend);
++out:
++	dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
++		__func__, lgp->status, nfsi->layout->state, lseg);
++	return;
++}
++
++int
++pnfs_layout_process(struct nfs4_layoutget *lgp)
++{
++	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
++	struct nfs4_layoutget_res *res = &lgp->res;
++	struct pnfs_layout_segment *lseg;
++	struct inode *ino = PNFS_INODE(lo);
++	int status = 0;
++
++	/* Inject layout blob into I/O device driver */
++	lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, res);
++	if (!lseg || IS_ERR(lseg)) {
++		if (!lseg)
++			status = -ENOMEM;
++		else
++			status = PTR_ERR(lseg);
++		dprintk("%s: Could not allocate layout: error %d\n",
++		       __func__, status);
++		goto out;
++	}
++
++	spin_lock(&ino->i_lock);
++	init_lseg(lo, lseg);
++	lseg->range = res->range;
++	if (lgp->lsegpp) {
++		get_lseg(lseg);
++		*lgp->lsegpp = lseg;
++	}
++	pnfs_insert_layout(lo, lseg);
++
++	if (res->return_on_close) {
++		lo->roc_iomode |= res->range.iomode;
++		if (!lo->roc_iomode)
++			lo->roc_iomode = IOMODE_ANY;
++	}
++
++	/* Done processing layoutget. Set the layout stateid */
++	pnfs_set_layout_stateid(lo, &res->stateid);
++	spin_unlock(&ino->i_lock);
++out:
++	return status;
++}
++
++void
++readahead_range(struct inode *inode, struct list_head *pages, loff_t *offset,
++		size_t *count)
++{
++	struct page *first, *last;
++	loff_t foff, i_size = i_size_read(inode);
++	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
++	size_t range;
++
++
++	first = list_entry((pages)->prev, struct page, lru);
++	last = list_entry((pages)->next, struct page, lru);
++
++	foff = (loff_t)first->index << PAGE_CACHE_SHIFT;
++
++	range = (last->index - first->index) * PAGE_CACHE_SIZE;
++	if (last->index == end_index)
++		range += ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
++	else
++		range += PAGE_CACHE_SIZE;
++	dprintk("%s foff %lu, range %Zu\n", __func__, (unsigned long)foff,
++		range);
++	*offset = foff;
++	*count = range;
++}
++
++void
++pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)
++{
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layoutdriver_type *ld;
++
++	pgio->pg_test = NULL;
++
++	lo = NFS_I(inode)->layout;
++	ld = NFS_SERVER(inode)->pnfs_curr_ld;
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !lo)
++		return;
++
++	if (ld->ld_policy_ops)
++		pgio->pg_test = ld->ld_policy_ops->pg_test;
++}
++
++static u32
++pnfs_getboundary(struct inode *inode)
++{
++	u32 stripe_size = 0;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct layoutdriver_policy_operations *policy_ops;
++
++	if (!nfss->pnfs_curr_ld)
++		goto out;
++
++	policy_ops = nfss->pnfs_curr_ld->ld_policy_ops;
++	if (!policy_ops || !policy_ops->get_stripesize)
++		goto out;
++
++	/* The default is to not gather across stripes */
++	if (pnfs_ld_gather_across_stripes(nfss->pnfs_curr_ld))
++		goto out;
++
++	spin_lock(&inode->i_lock);
++	if (NFS_I(inode)->layout)
++		stripe_size = policy_ops->get_stripesize(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++out:
++	return stripe_size;
++}
++
++/*
++ * rsize is already set by caller to MDS rsize.
++ */
++void
++pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
++		  struct inode *inode,
++		  struct nfs_open_context *ctx,
++		  struct list_head *pages,
++		  size_t *rsize)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	size_t count = 0;
++	loff_t loff;
++
++	pgio->pg_iswrite = 0;
++	pgio->pg_boundary = 0;
++	pgio->pg_test = NULL;
++	pgio->pg_lseg = NULL;
++
++	if (!pnfs_enabled_sb(nfss))
++		return;
++
++	/* Calculate the total read-ahead count */
++	readahead_range(inode, pages, &loff, &count);
++
++	if (count > 0) {
++		_pnfs_update_layout(inode, ctx, loff, count, IOMODE_READ,
++				    &pgio->pg_lseg);
++		if (!pgio->pg_lseg)
++			return;
++
++		*rsize = NFS_SERVER(inode)->ds_rsize;
++		pgio->pg_boundary = pnfs_getboundary(inode);
++		if (pgio->pg_boundary)
++			pnfs_set_pg_test(inode, pgio);
++	}
++}
++
++void
++pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
++		       size_t *wsize)
++{
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	pgio->pg_iswrite = 1;
++	if (!pnfs_enabled_sb(server)) {
++		pgio->pg_boundary = 0;
++		pgio->pg_test = NULL;
++		return;
++	}
++	pgio->pg_boundary = pnfs_getboundary(inode);
++	pnfs_set_pg_test(inode, pgio);
++	*wsize = server->ds_wsize;
++}
++
++/* Return I/O buffer size for a layout driver
++ * This value will determine what size reads and writes
++ * will be gathered into and sent to the data servers.
++ * blocksize must be a multiple of the page cache size.
++ */
++unsigned int
++pnfs_getiosize(struct nfs_server *server)
++{
++	if (!PNFS_EXISTS_LDPOLICY_OP(server, get_blocksize))
++		return 0;
++	return server->pnfs_curr_ld->ld_policy_ops->get_blocksize();
++}
++
++void
++pnfs_set_ds_iosize(struct nfs_server *server)
++{
++	unsigned dssize = pnfs_getiosize(server);
++
++	/* Set buffer size for data servers */
++	if (dssize > 0) {
++		server->ds_rsize = server->ds_wsize =
++			nfs_block_size(dssize, NULL);
++	} else {
++		server->ds_wsize = server->wsize;
++		server->ds_rsize = server->rsize;
++	}
++}
++
++static int
++pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
++{
++	put_lseg(pdata->lseg);
++	pdata->lseg = NULL;
++	pdata->call_ops->rpc_call_done(task, data);
++	if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
++		return -EAGAIN;
++	if (pdata->pnfsflags & PNFS_NO_RPC) {
++		pdata->call_ops->rpc_release(data);
++	} else {
++		/*
++		 * just restore original rpc call ops
++		 * rpc_release will be called later by the rpc scheduling layer.
++		 */
++		task->tk_ops = pdata->call_ops;
++	}
++	return 0;
++}
++
++/* Post-write completion function
++ * Invoked by all layout drivers when write_pagelist is done.
++ *
++ * NOTE: callers set data->pnfsflags PNFS_NO_RPC
++ * so that the NFS cleanup routines perform only the page cache
++ * cleanup.
++ */
++static void
++pnfs_write_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	struct pnfs_layout_range range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = wdata->args.offset;
++	range.length = wdata->args.count;
++	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
++			    wdata->pdata.call_ops, wdata->pdata.how);
++}
++
++static void
++pnfs_writeback_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	/* update last write offset and need layout commit
++	 * for non-files layout types (files layout calls
++	 * pnfs4_write_done for this)
++	 */
++	if ((pdata->pnfsflags & PNFS_NO_RPC) &&
++	    data->task.tk_status >= 0 && data->res.count > 0) {
++		struct nfs_inode *nfsi = NFS_I(data->inode);
++
++		pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++		pnfs_need_layoutcommit(nfsi, data->args.context);
++	}
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++static void _pnfs_clear_lseg_from_pages(struct list_head *head)
++{
++	struct nfs_page *req;
++
++	list_for_each_entry(req, head, wb_list) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem write function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ *
++ * TODO: Is wdata->how and wdata->args.stable always the same value?
++ * TODO: It seems in NFS, the server may not do a stable write even
++ * though it was requested (and vice-versa?).  To check, it looks
++ * in data->res.verf->committed.  Do we need this ability
++ * for non-file layout drivers?
++ */
++enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *wdata,
++			const struct rpc_call_ops *call_ops, int how)
++{
++	struct inode *inode = wdata->inode;
++	enum pnfs_try_status trypnfs;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = wdata->req->wb_lseg;
++
++	wdata->pdata.call_ops = call_ops;
++	wdata->pdata.pnfs_error = 0;
++	wdata->pdata.how = how;
++
++	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
++		inode->i_ino, wdata->args.count, wdata->args.offset, how);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		wdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	wdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->write_pagelist(wdata,
++		nfs_page_array_len(wdata->args.pgbase, wdata->args.count),
++								how);
++
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		wdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		wdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&wdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/* Post-read completion function.  Invoked by all layout drivers when
++ * read_pagelist is done
++ */
++static void
++pnfs_read_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	struct pnfs_layout_range range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = rdata->args.offset;
++	range.length = rdata->args.count;
++	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
++			   rdata->pdata.call_ops);
++}
++
++static void
++pnfs_read_done(struct nfs_read_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem read function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ */
++enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *rdata,
++		       const struct rpc_call_ops *call_ops)
++{
++	struct inode *inode = rdata->inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = rdata->req->wb_lseg;
++	enum pnfs_try_status trypnfs;
++
++	rdata->pdata.call_ops = call_ops;
++	rdata->pdata.pnfs_error = 0;
++
++	dprintk("%s: Reading ino:%lu %u@%llu\n",
++		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		rdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	rdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->read_pagelist(rdata,
++		nfs_page_array_len(rdata->args.pgbase, rdata->args.count));
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		rdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		rdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&rdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/*
++ * This gives the layout driver an opportunity to read in page "around"
++ * the data to be written.  It returns 0 on success, otherwise an error code
++ * which will either be passed up to user, or ignored if
++ * some previous part of write succeeded.
++ * Note the range [pos, pos+len-1] is entirely within the page.
++ */
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata)
++{
++	struct pnfs_fsdata *data;
++	int status = 0;
++
++	dprintk("--> %s: pos=%llu len=%u\n",
++		__func__, (unsigned long long)pos, len);
++	data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
++	if (!data) {
++		status = -ENOMEM;
++		goto out;
++	}
++	data->lseg = lseg; /* refcount passed into data to be managed there */
++	status = NFS_SERVER(inode)->pnfs_curr_ld->ld_io_ops->write_begin(
++						lseg, page, pos, len, data);
++	if (status) {
++		kfree(data);
++		data = NULL;
++	}
++out:
++	*fsdata = data;
++	dprintk("<-- %s: status=%d\n", __func__, status);
++	return status;
++}
++
++/* Return 0 on succes, negative on failure */
++/* CAREFUL - what happens if copied < len??? */
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status;
++
++	status = nfss->pnfs_curr_ld->ld_io_ops->write_end(inode, page,
++						pos, len, copied, lseg);
++	return status;
++}
++
++/* pNFS Commit callback function for all layout drivers */
++static void
++pnfs_commit_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		struct pnfs_layout_range range = {
++			.iomode = IOMODE_RW,
++			.offset = data->args.offset,
++			.length = data->args.count,
++		};
++		dprintk("%s: retrying\n", __func__);
++		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
++				    true);
++		pnfs_initiate_commit(data, NFS_CLIENT(data->inode),
++				     pdata->call_ops, pdata->how, 1);
++	}
++}
++
++enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		    const struct rpc_call_ops *call_ops, int sync)
++{
++	struct inode *inode = data->inode;
++	struct nfs_server *nfss = NFS_SERVER(data->inode);
++	enum pnfs_try_status trypnfs;
++
++	dprintk("%s: Begin\n", __func__);
++
++	/* We need to account for possibility that
++	 * each nfs_page can point to a different lseg (or be NULL).
++	 * For the immediate case of whole-file-only layouts, we at
++	 * least know there can be only a single lseg.
++	 * We still have to account for the possibility of some being NULL.
++	 * This will be done by passing the buck to the layout driver.
++	 */
++	data->pdata.call_ops = call_ops;
++	data->pdata.pnfs_error = 0;
++	data->pdata.how = sync;
++	data->pdata.lseg = NULL;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(data, sync);
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		data->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		_pnfs_clear_lseg_from_pages(&data->pages);
++	} else
++		nfs_inc_stats(inode, NFSIOS_PNFS_COMMIT);
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
++{
++	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
++
++	/* TODO: Maybe we should avoid this by allowing the layout driver
++	* to directly xdr its layout on the wire.
++	*/
++	if (nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit)
++		nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit(
++					NFS_I(data->args.inode)->layout,
++					&data->args, data->status);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int
++pnfs_layoutcommit_setup(struct inode *inode,
++			struct nfs4_layoutcommit_data *data,
++			loff_t write_begin_pos, loff_t write_end_pos)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int result = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	data->args.inode = inode;
++	data->args.fh = NFS_FH(inode);
++	data->args.layout_type = nfss->pnfs_curr_ld->id;
++	data->res.fattr = &data->fattr;
++	nfs_fattr_init(&data->fattr);
++
++	/* TODO: Need to determine the correct values */
++	data->args.time_modify_changed = 0;
++
++	/* Set values from inode so it can be reset
++	 */
++	data->args.range.iomode = IOMODE_RW;
++	data->args.range.offset = write_begin_pos;
++	data->args.range.length = write_end_pos - write_begin_pos + 1;
++	data->args.lastbytewritten =  min(write_end_pos,
++					  i_size_read(inode) - 1);
++	data->args.bitmask = nfss->attr_bitmask;
++	data->res.server = nfss;
++
++	/* Call layout driver to set the arguments */
++	if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit)
++		result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
++				NFS_I(inode)->layout, &data->args);
++
++	dprintk("<-- %s Status %d\n", __func__, result);
++	return result;
++}
++
++/* Issue a async layoutcommit for an inode.
++ */
++int
++pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	struct nfs4_layoutcommit_data *data;
++	struct nfs_inode *nfsi = NFS_I(inode);
++	loff_t write_begin_pos;
++	loff_t write_end_pos;
++
++	int status = 0;
++
++	dprintk("%s Begin (sync:%d)\n", __func__, sync);
++
++	BUG_ON(!has_layout(nfsi));
++
++	data = pnfs_layoutcommit_alloc();
++	if (!data)
++		return -ENOMEM;
++
++	spin_lock(&inode->i_lock);
++	if (!layoutcommit_needed(nfsi)) {
++		spin_unlock(&inode->i_lock);
++		goto out_free;
++	}
++
++	/* Clear layoutcommit properties in the inode so
++	 * new lc info can be generated
++	 */
++	write_begin_pos = nfsi->layout->write_begin_pos;
++	write_end_pos = nfsi->layout->write_end_pos;
++	data->cred = nfsi->layout->cred;
++	nfsi->layout->write_begin_pos = 0;
++	nfsi->layout->write_end_pos = 0;
++	nfsi->layout->cred = NULL;
++	__clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state);
++	pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout);
++
++	/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
++	get_layout(NFS_I(inode)->layout);
++
++	spin_unlock(&inode->i_lock);
++
++	/* Set up layout commit args */
++	status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
++					 write_end_pos);
++	if (status) {
++		/* The layout driver failed to setup the layoutcommit */
++		put_rpccred(data->cred);
++		put_layout(inode);
++		goto out_free;
++	}
++	status = nfs4_proc_layoutcommit(data, sync);
++out:
++	dprintk("%s end (err:%d)\n", __func__, status);
++	return status;
++out_free:
++	pnfs_layoutcommit_free(data);
++	goto out;
++}
++
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
++{
++	if (fsdata) {
++		/* lseg refcounting handled directly in nfs_Write_end */
++		kfree(fsdata);
++	}
++}
++
++/* Callback operations for layout drivers.
++ */
++struct pnfs_client_operations pnfs_ops = {
++	.nfs_getdevicelist = nfs4_proc_getdevicelist,
++	.nfs_getdeviceinfo = nfs4_proc_getdeviceinfo,
++	.nfs_readlist_complete = pnfs_read_done,
++	.nfs_writelist_complete = pnfs_writeback_done,
++	.nfs_commit_complete = pnfs_commit_done,
++};
++
++EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
++EXPORT_SYMBOL(pnfs_register_layoutdriver);
++
++
++/* Device ID cache. Supports one layout type per struct nfs_client */
++int
++nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
++			 void (*free_callback)(struct kref *))
++{
++	struct nfs4_deviceid_cache *c;
++
++	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
++	if (!c)
++		return -ENOMEM;
++	spin_lock(&clp->cl_lock);
++	if (clp->cl_devid_cache != NULL) {
++		kref_get(&clp->cl_devid_cache->dc_kref);
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [kref [%d]]\n", __func__,
++			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
++		kfree(c);
++	} else {
++		int i;
++
++		spin_lock_init(&c->dc_lock);
++		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
++			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
++		kref_init(&c->dc_kref);
++		c->dc_free_callback = free_callback;
++		clp->cl_devid_cache = c;
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [new]\n", __func__);
++	}
++	return 0;
++}
++EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
++
++void
++nfs4_init_deviceid_node(struct nfs4_deviceid *d)
++{
++	INIT_HLIST_NODE(&d->de_node);
++	kref_init(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_init_deviceid_node);
++
++/* Called from layoutdriver_io_operations->alloc_lseg */
++void
++nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = d;
++}
++EXPORT_SYMBOL(nfs4_set_layout_deviceid);
++
++/* Called from layoutdriver_io_operations->free_lseg */
++void
++nfs4_put_unset_layout_deviceid(struct pnfs_layout_segment *l,
++			   struct nfs4_deviceid *d,
++			   void (*free_callback)(struct kref *))
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = NULL;
++	kref_put(&d->de_kref, free_callback);
++}
++EXPORT_SYMBOL(nfs4_put_unset_layout_deviceid);
++
++/* Find and reference a deviceid */
++struct nfs4_deviceid *
++nfs4_find_get_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			if (!atomic_inc_not_zero(&d->de_kref.refcount)) {
++				goto fail;
++			} else {
++				rcu_read_unlock();
++				return d;
++			}
++		}
++	}
++fail:
++	rcu_read_unlock();
++	return NULL;
++}
++EXPORT_SYMBOL(nfs4_find_get_deviceid);
++
++/*
++ * Add and kref_get a deviceid.
++ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
++ */
++struct nfs4_deviceid *
++nfs4_add_get_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(&new->de_id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			kref_get(&d->de_kref);
++			spin_unlock(&c->dc_lock);
++			dprintk("%s [discard]\n", __func__);
++			c->dc_free_callback(&new->de_kref);
++			return d;
++		}
++	}
++	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
++	kref_get(&new->de_kref);
++	spin_unlock(&c->dc_lock);
++	dprintk("%s [new]\n", __func__);
++	return new;
++}
++EXPORT_SYMBOL(nfs4_add_get_deviceid);
++
++/*
++ * Remove the first deviceid from a hash bucket, or return 0 if bucket list
++ * is empty.
++ */
++static int
++nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash,
++		     struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (id && memcmp(id, &d->de_id, NFS4_PNFS_DEVICEID4_SIZE))
++			continue;
++		hlist_del_rcu(&d->de_node);
++		spin_unlock(&c->dc_lock);
++		synchronize_rcu();
++		dprintk("%s [%d]\n", __func__,
++			atomic_read(&d->de_kref.refcount));
++		kref_put(&d->de_kref, c->dc_free_callback);
++		return 1;
++	}
++	spin_unlock(&c->dc_lock);
++	return 0;
++}
++
++void
++nfs4_delete_device(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	long hash = nfs4_deviceid_hash(id);
++
++	nfs4_remove_deviceid(c, hash, id);
++}
++EXPORT_SYMBOL(nfs4_delete_device);
++
++static void
++nfs4_free_deviceid_cache(struct kref *kref)
++{
++	struct nfs4_deviceid_cache *cache =
++		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
++	long i;
++
++	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
++		while (nfs4_remove_deviceid(cache, i, NULL))
++			;
++	kfree(cache);
++}
++
++void
++nfs4_put_deviceid_cache(struct nfs_client *clp)
++{
++	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
++	int refcount;
++
++	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
++	spin_lock(&clp->cl_lock);
++	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
++	if (refcount == 1)
++		clp->cl_devid_cache = NULL;
++	spin_unlock(&clp->cl_lock);
++	dprintk("%s [%d]\n", __func__, refcount);
++	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
++}
++EXPORT_SYMBOL(nfs4_put_deviceid_cache);
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-31 20:42:05.542222767 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-31 20:42:05.542222767 -0400
+@@ -0,0 +1,354 @@
++/*
++ *  fs/nfs/pnfs.h
++ *
++ *  pNFS client data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_PNFS_H
++#define FS_NFS_PNFS_H
++
++#include <linux/nfs4_pnfs.h>
++
++#ifdef CONFIG_NFS_V4_1
++
++#include <linux/nfs_page.h>
++#include <linux/nfs_iostat.h>
++#include "iostat.h"
++
++/* nfs4proc.c */
++extern int nfs4_proc_getdevicelist(struct nfs_server *server,
++				   const struct nfs_fh *fh,
++				   struct pnfs_devicelist *devlist);
++extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
++				   struct pnfs_device *dev);
++extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
++extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
++				   int issync);
++extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
++
++/* pnfs.c */
++extern const nfs4_stateid zero_stateid;
++
++void _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp);
++
++int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *,
++			const nfs4_stateid *stateid, /* optional */
++			enum pnfs_layoutreturn_type, bool wait);
++void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *mntfh, u32 id);
++void unmount_pnfs_layoutdriver(struct nfs_server *);
++enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
++					     const struct rpc_call_ops *, int);
++enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
++					    const struct rpc_call_ops *);
++int pnfs_initialize(void);
++void pnfs_uninitialize(void);
++void pnfs_layoutcommit_free(struct nfs4_layoutcommit_data *data);
++void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
++int pnfs_layoutcommit_inode(struct inode *inode, int sync);
++void pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent);
++void pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx);
++unsigned int pnfs_getiosize(struct nfs_server *server);
++void pnfs_set_ds_iosize(struct nfs_server *server);
++enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
++					 const struct rpc_call_ops *, int);
++void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
++			   struct nfs_open_context *, struct list_head *,
++			   size_t *);
++void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
++			    size_t *);
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
++void pnfs_get_layout_done(struct nfs4_layoutget *, int rpc_status);
++int pnfs_layout_process(struct nfs4_layoutget *lgp);
++void pnfs_layout_release(struct pnfs_layout_hdr *, struct pnfs_layout_range *range);
++void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
++			     const nfs4_stateid *stateid);
++void pnfs_destroy_layout(struct nfs_inode *);
++void pnfs_destroy_all_layouts(struct nfs_client *);
++void put_layout(struct inode *inode);
++void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata);
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg);
++
++#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops->opname)
++#define PNFS_EXISTS_LDPOLICY_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_policy_ops && \
++				     (srv)->pnfs_curr_ld->ld_policy_ops->opname)
++
++#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
++
++static inline int lo_fail_bit(u32 iomode)
++{
++	return iomode == IOMODE_RW ?
++			 NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED;
++}
++
++/* Return true if a layout driver is being used for this mountpoint */
++static inline int pnfs_enabled_sb(struct nfs_server *nfss)
++{
++	return nfss->pnfs_curr_ld != NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return !fsdata  || ((struct pnfs_layout_segment *)fsdata == lseg) ||
++		!fsdata->bypass_eof;
++}
++
++/* Should the pNFS client commit and return the layout upon a setattr */
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
++		return false;
++	return NFS_SERVER(inode)->pnfs_curr_ld->ld_policy_ops->flags &
++		PNFS_LAYOUTRET_ON_SETATTR;
++}
++
++/* Should the pNFS client commit and return the layout on close
++ */
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return nfsi->layout->roc_iomode;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status = 0;
++
++	*fsdata = lseg;
++	if (lseg && PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++		status = _pnfs_write_begin(inode, page, pos, len, lseg,
++					   (struct pnfs_fsdata **) fsdata);
++	return status;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++
++	if (PNFS_EXISTS_LDIO_OP(nfss, write_end))
++		return _pnfs_write_end(inode, page, pos, len, copied, lseg);
++	else
++		return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_end_cleanup))
++			nfss->pnfs_curr_ld->ld_io_ops->write_end_cleanup(filp, fsdata);
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			pnfs_free_fsdata(fsdata);
++	}
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct pnfs_layout_range *range,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss) &&
++	    (type != RETURN_FILE || has_layout(nfsi)))
++		return _pnfs_return_layout(ino, range, stateid, type, wait);
++
++	return 0;
++}
++
++static inline void pnfs_update_layout(struct inode *ino,
++	struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss))
++		_pnfs_update_layout(ino, ctx, pos, count, access_type, lsegpp);
++	else {
++		if (lsegpp)
++			*lsegpp = NULL;
++	}
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	if (pnfs_enabled_sb(nfss))
++		return pnfs_ld_use_rpc_code(nfss->pnfs_curr_ld);
++
++	return 1;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			return ((struct pnfs_fsdata *) fsdata)->lseg;
++	}
++	return fsdata;
++}
++#else  /* CONFIG_NFS_V4_1 */
++
++static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++}
++
++static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++}
++
++static inline void get_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void put_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void
++pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	if (lsegpp)
++		*lsegpp = NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return 1;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *data,
++		      const struct rpc_call_ops *call_ops)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *data,
++		       const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		   const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	*fsdata = NULL;
++	return 0;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	return 1;
++}
++
++static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	return 0;
++}
++
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	return false;
++}
++
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct pnfs_layout_range *range,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	return 0;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	return NULL;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++#endif /* FS_NFS_PNFS_H */
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-31 20:41:19.163155499 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-31 20:42:05.543103394 -0400
+@@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	fattr = nfs_alloc_fattr();
+ 	status = -ENOMEM;
+ 	if (fh == NULL || fattr == NULL)
+-		goto out;
++		goto out_free;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -455,7 +455,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 */
+ 	if (status == 0)
+ 		status = nfs_instantiate(dentry, fh, fattr);
+-
++out_free:
+ 	nfs_free_fattr(fattr);
+ 	nfs_free_fhandle(fh);
+ out:
+@@ -694,6 +694,7 @@ const struct nfs_rpc_ops nfs_v2_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs_dir_inode_operations,
+ 	.file_inode_ops	= &nfs_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs_proc_get_root,
+ 	.getattr	= nfs_proc_getattr,
+ 	.setattr	= nfs_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-31 20:41:19.164160482 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-31 20:42:05.544233042 -0400
+@@ -18,8 +18,12 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_page.h>
++#include <linux/smp_lock.h>
++#include <linux/module.h>
+ 
+ #include <asm/system.h>
++#include <linux/module.h>
++#include "pnfs.h"
+ 
+ #include "nfs4_fs.h"
+ #include "internal.h"
+@@ -117,11 +121,14 @@ int nfs_readpage_async(struct nfs_open_c
+ 	LIST_HEAD(one_request);
+ 	struct nfs_page	*new;
+ 	unsigned int len;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	len = nfs_page_length(page);
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+-	new = nfs_create_request(ctx, inode, page, 0, len);
++	pnfs_update_layout(inode, ctx, 0, NFS4_MAX_UINT64, IOMODE_READ, &lseg);
++	new = nfs_create_request(ctx, inode, page, 0, len, lseg);
++	put_lseg(lseg);
+ 	if (IS_ERR(new)) {
+ 		unlock_page(page);
+ 		return PTR_ERR(new);
+@@ -155,24 +162,20 @@ static void nfs_readpage_release(struct 
+ 	nfs_release_request(req);
+ }
+ 
+-/*
+- * Set up the NFS read request struct
+- */
+-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset)
++int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		      const struct rpc_call_ops *call_ops)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+ 		.callback_data = data,
+@@ -180,9 +183,46 @@ static int nfs_read_rpcsetup(struct nfs_
+ 		.flags = RPC_TASK_ASYNC | swap_flags,
+ 	};
+ 
++	/* Set up the initial task struct. */
++	NFS_PROTO(inode)->read_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
++			data->task.tk_pid,
++			inode->i_sb->s_id,
++			(long long)NFS_FILEID(inode),
++			data->args.count,
++			(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_read);
++
++int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
++		return pnfs_get_read_status(data);
++
++	return nfs_initiate_read(data, clnt, call_ops);
++}
++
++/*
++ * Set up the NFS read request struct
++ */
++static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	data->req	  = req;
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -190,27 +230,14 @@ static int nfs_read_rpcsetup(struct nfs_
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.count   = count;
+ 	data->res.eof     = 0;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct. */
+-	NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-			data->task.tk_pid,
+-			inode->i_sb->s_id,
+-			(long long)NFS_FILEID(inode),
+-			count,
+-			(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	return pnfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ }
+ 
+ static void
+@@ -354,7 +381,14 @@ static void nfs_readpage_retry(struct rp
+ {
+ 	struct nfs_readargs *argp = &data->args;
+ 	struct nfs_readres *resp = &data->res;
++	struct nfs_client *clp = NFS_SERVER(data->inode)->nfs_client;
+ 
++#ifdef CONFIG_NFS_V4_1
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (resp->eof || resp->count == argp->count)
+ 		return;
+ 
+@@ -368,7 +402,10 @@ static void nfs_readpage_retry(struct rp
+ 	argp->offset += resp->count;
+ 	argp->pgbase += resp->count;
+ 	argp->count -= resp->count;
+-	nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++	data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++	nfs_restart_rpc(task, clp);
+ }
+ 
+ /*
+@@ -409,13 +446,19 @@ static void nfs_readpage_release_partial
+ void nfs_read_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_read_data *data = calldata;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	}
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
+ 				&data->args.seq_args, &data->res.seq_res,
+ 				0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_read_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_read_partial_ops = {
+@@ -568,7 +611,8 @@ readpage_async_filler(void *data, struct
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+ 
+-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
++	new = nfs_create_request(desc->ctx, inode, page, 0, len,
++				 desc->pgio->pg_lseg);
+ 	if (IS_ERR(new))
+ 		goto out_error;
+ 
+@@ -624,6 +668,9 @@ int nfs_readpages(struct file *filp, str
+ 	if (ret == 0)
+ 		goto read_complete; /* all pages were read */
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_read(&pgio, inode, desc.ctx, pages, &rsize);
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (rsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
+ 	else
+@@ -632,6 +679,7 @@ int nfs_readpages(struct file *filp, str
+ 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ 
+ 	nfs_pageio_complete(&pgio);
++	put_lseg(pgio.pg_lseg);
+ 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+ read_complete:
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-31 20:41:19.165170508 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-31 20:42:05.545114737 -0400
+@@ -64,6 +64,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -676,6 +677,28 @@ static int nfs_show_options(struct seq_f
+ 
+ 	return 0;
+ }
++#ifdef CONFIG_NFS_V4_1
++void show_sessions(struct seq_file *m, struct nfs_server *server)
++{
++	if (nfs4_has_session(server->nfs_client))
++		seq_printf(m, ",sessions");
++}
++#else
++void show_sessions(struct seq_file *m, struct nfs_server *server) {}
++#endif
++
++#ifdef CONFIG_NFS_V4_1
++void show_pnfs(struct seq_file *m, struct nfs_server *server)
++{
++	seq_printf(m, ",pnfs=");
++	if (server->pnfs_curr_ld)
++		seq_printf(m, "%s", server->pnfs_curr_ld->name);
++	else
++		seq_printf(m, "not configured");
++}
++#else  /* CONFIG_NFS_V4_1 */
++void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ /*
+  * Present statistical information for this VFS mountpoint
+@@ -714,6 +737,8 @@ static int nfs_show_stats(struct seq_fil
+ 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ 		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
++		show_sessions(m, nfss);
++		show_pnfs(m, nfss);
+ 	}
+ #endif
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-31 20:41:19.166151095 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-31 20:42:05.546131839 -0400
+@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
+ 	struct nfs_unlinkdata *data = calldata;
+ 	struct nfs_server *server = NFS_SERVER(data->dir);
+ 
+-	if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
++	if (nfs4_setup_sequence(server, NULL, &data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-31 20:41:17.273213379 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-31 20:42:05.548212682 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ #include <linux/nfs_page.h>
+ #include <linux/backing-dev.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -28,6 +29,7 @@
+ #include "iostat.h"
+ #include "nfs4_fs.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+ 
+@@ -59,6 +61,7 @@ struct nfs_write_data *nfs_commitdata_al
+ 	}
+ 	return p;
+ }
++EXPORT_SYMBOL(nfs_commitdata_alloc);
+ 
+ void nfs_commit_free(struct nfs_write_data *p)
+ {
+@@ -66,6 +69,7 @@ void nfs_commit_free(struct nfs_write_da
+ 		kfree(p->pagevec);
+ 	mempool_free(p, nfs_commit_mempool);
+ }
++EXPORT_SYMBOL(nfs_commit_free);
+ 
+ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+ {
+@@ -418,6 +422,17 @@ static void nfs_inode_remove_request(str
+ 	nfs_clear_request(req);
+ 	nfs_release_request(req);
+ }
++static void
++nfs_mark_request_nopnfs(struct nfs_page *req)
++{
++	struct pnfs_layout_segment *lseg = req->wb_lseg;
++
++	if (req->wb_lseg == NULL)
++		return;
++	req->wb_lseg = NULL;
++	put_lseg(lseg);
++	dprintk(" retry through MDS\n");
++}
+ 
+ static void
+ nfs_mark_request_dirty(struct nfs_page *req)
+@@ -523,7 +538,7 @@ nfs_need_commit(struct nfs_inode *nfsi)
+  * The requests are *not* checked to ensure that they form a contiguous set.
+  */
+ static int
+-nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
++nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int *use_pnfs)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	int ret;
+@@ -531,7 +546,8 @@ nfs_scan_commit(struct inode *inode, str
+ 	if (!nfs_need_commit(nfsi))
+ 		return 0;
+ 
+-	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
++	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT,
++			    use_pnfs);
+ 	if (ret > 0)
+ 		nfsi->ncommit -= ret;
+ 	if (nfs_need_commit(NFS_I(inode)))
+@@ -560,7 +576,8 @@ static inline int nfs_scan_commit(struct
+ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+ 		struct page *page,
+ 		unsigned int offset,
+-		unsigned int bytes)
++		unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page *req;
+ 	unsigned int rqend;
+@@ -585,8 +602,8 @@ static struct nfs_page *nfs_try_to_updat
+ 		 * Note: nfs_flush_incompatible() will already
+ 		 * have flushed out requests having wrong owners.
+ 		 */
+-		if (offset > rqend
+-		    || end < req->wb_offset)
++		if (offset > rqend || end < req->wb_offset ||
++		    req->wb_lseg != lseg)
+ 			goto out_flushme;
+ 
+ 		if (nfs_set_page_tag_locked(req))
+@@ -634,16 +651,17 @@ out_err:
+  * already called nfs_flush_incompatible() if necessary.
+  */
+ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+-		struct page *page, unsigned int offset, unsigned int bytes)
++		struct page *page, unsigned int offset, unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	struct nfs_page	*req;
+ 	int error;
+ 
+-	req = nfs_try_to_update_request(inode, page, offset, bytes);
++	req = nfs_try_to_update_request(inode, page, offset, bytes, lseg);
+ 	if (req != NULL)
+ 		goto out;
+-	req = nfs_create_request(ctx, inode, page, offset, bytes);
++	req = nfs_create_request(ctx, inode, page, offset, bytes, lseg);
+ 	if (IS_ERR(req))
+ 		goto out;
+ 	error = nfs_inode_add_request(inode, req);
+@@ -656,23 +674,27 @@ out:
+ }
+ 
+ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+-		unsigned int offset, unsigned int count)
++			       unsigned int offset, unsigned int count,
++			       struct pnfs_layout_segment *lseg,
++			       void *fsdata)
+ {
+ 	struct nfs_page	*req;
+ 
+-	req = nfs_setup_write_request(ctx, page, offset, count);
++	req = nfs_setup_write_request(ctx, page, offset, count, lseg);
+ 	if (IS_ERR(req))
+ 		return PTR_ERR(req);
+ 	nfs_mark_request_dirty(req);
+ 	/* Update file length */
+-	nfs_grow_file(page, offset, count);
++	if (pnfs_grow_ok(lseg, fsdata))
++		nfs_grow_file(page, offset, count);
+ 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	return 0;
+ }
+ 
+-int nfs_flush_incompatible(struct file *file, struct page *page)
++int nfs_flush_incompatible(struct file *file, struct page *page,
++			   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct nfs_page	*req;
+@@ -689,7 +711,10 @@ int nfs_flush_incompatible(struct file *
+ 		req = nfs_page_find_request(page);
+ 		if (req == NULL)
+ 			return 0;
+-		do_flush = req->wb_page != page || req->wb_context != ctx;
++		do_flush = req->wb_page != page || req->wb_context != ctx ||
++			req->wb_lock_context->lockowner != current->files ||
++			req->wb_lock_context->pid != current->tgid ||
++			req->wb_lseg != lseg;
+ 		nfs_release_request(req);
+ 		if (!do_flush)
+ 			return 0;
+@@ -716,7 +741,8 @@ static int nfs_write_pageuptodate(struct
+  * things with a page scheduled for an RPC call (e.g. invalidate it).
+  */
+ int nfs_updatepage(struct file *file, struct page *page,
+-		unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg, void *fsdata)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct inode	*inode = page->mapping->host;
+@@ -741,7 +767,7 @@ int nfs_updatepage(struct file *file, st
+ 		offset = 0;
+ 	}
+ 
+-	status = nfs_writepage_setup(ctx, page, offset, count);
++	status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata);
+ 	if (status < 0)
+ 		nfs_set_pageerror(page);
+ 
+@@ -771,25 +797,21 @@ static int flush_task_priority(int how)
+ 	return RPC_PRIORITY_NORMAL;
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_write_rpcsetup(struct nfs_page *req,
+-		struct nfs_write_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset,
+-		int how)
++int nfs_initiate_write(struct nfs_write_data *data,
++		       struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops,
++		       int how)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.task = &data->task,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+@@ -800,12 +822,62 @@ static int nfs_write_rpcsetup(struct nfs
+ 	};
+ 	int ret = 0;
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->write_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	if (how & FLUSH_SYNC) {
++		ret = rpc_wait_for_completion_task(task);
++		if (ret == 0)
++			ret = task->tk_status;
++	}
++	rpc_put_task(task);
++out:
++	return ret;
++}
++EXPORT_SYMBOL(nfs_initiate_write);
++
++int pnfs_initiate_write(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_write(data, clnt, call_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_write_rpcsetup(struct nfs_page *req,
++		struct nfs_write_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset,
++		int how)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	data->req = req;
+ 	data->inode = inode = req->wb_context->path.dentry->d_inode;
+-	data->cred = msg.rpc_cred;
++	data->cred = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -813,6 +885,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 	data->args.stable  = NFS_UNSTABLE;
+ 	if (how & FLUSH_STABLE) {
+ 		data->args.stable = NFS_DATA_SYNC;
+@@ -825,30 +898,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated write call "
+-		"(req %s/%lld, %u bytes @ offset %llu)\n",
+-		data->task.tk_pid,
+-		inode->i_sb->s_id,
+-		(long long)NFS_FILEID(inode),
+-		count,
+-		(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task)) {
+-		ret = PTR_ERR(task);
+-		goto out;
+-	}
+-	if (how & FLUSH_SYNC) {
+-		ret = rpc_wait_for_completion_task(task);
+-		if (ret == 0)
+-			ret = task->tk_status;
+-	}
+-	rpc_put_task(task);
+-out:
+-	return ret;
++	return pnfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ }
+ 
+ /* If a nfs_flush_* function fails, it should remove reqs from @head and
+@@ -859,6 +909,7 @@ static void nfs_redirty_request(struct n
+ {
+ 	struct page *page = req->wb_page;
+ 
++	nfs_mark_request_nopnfs(req);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	nfs_end_page_writeback(page);
+@@ -971,6 +1022,10 @@ static void nfs_pageio_init_write(struct
+ {
+ 	size_t wsize = NFS_SERVER(inode)->wsize;
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_write(pgio, inode, &wsize);
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	if (wsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
+ 	else
+@@ -1036,13 +1091,27 @@ out:
+ void nfs_write_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_write_data *data = calldata;
+-	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(clp, &data->args.seq_args,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	} else if (data->args.count > NFS_SERVER(data->inode)->wsize) {
++		/* retrying via MDS? */
++		data->pdata.orig_count = data->args.count;
++		data->args.count = NFS_SERVER(data->inode)->wsize;
++		dprintk("%s: trimmed count %u to wsize %u\n", __func__,
++		data->pdata.orig_count, data->args.count);
++	} else
++		data->pdata.orig_count = 0;
++
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
++				&data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_write_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_write_partial_ops = {
+@@ -1126,10 +1195,11 @@ int nfs_writeback_done(struct rpc_task *
+ 	struct nfs_writeargs	*argp = &data->args;
+ 	struct nfs_writeres	*resp = &data->res;
+ 	struct nfs_server	*server = NFS_SERVER(data->inode);
++	struct nfs_client	*clp = server->nfs_client;
+ 	int status;
+ 
+-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
+-		task->tk_pid, task->tk_status);
++	dprintk("NFS: %5u nfs_writeback_done (status %d count %u)\n",
++		task->tk_pid, task->tk_status, resp->count);
+ 
+ 	/*
+ 	 * ->write_done will attempt to use post-op attributes to detect
+@@ -1142,6 +1212,13 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (status != 0)
+ 		return status;
+ 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
++#ifdef CONFIG_NFS_V4_1
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+@@ -1158,7 +1235,7 @@ int nfs_writeback_done(struct rpc_task *
+ 		if (time_before(complain, jiffies)) {
+ 			dprintk("NFS:       faulty NFS server %s:"
+ 				" (committed = %d) != (stable = %d)\n",
+-				server->nfs_client->cl_hostname,
++				clp->cl_hostname,
+ 				resp->verf->committed, argp->stable);
+ 			complain = jiffies + 300 * HZ;
+ 		}
+@@ -1168,6 +1245,9 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (task->tk_status >= 0 && resp->count < argp->count) {
+ 		static unsigned long    complain;
+ 
++		dprintk("NFS:       short write:"
++			" (resp->count %u) < (argp->count = %u)\n",
++			resp->count, argp->count);
+ 		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ 
+ 		/* Has the server at least made some progress? */
+@@ -1184,7 +1264,10 @@ int nfs_writeback_done(struct rpc_task *
+ 				 */
+ 				argp->stable = NFS_FILE_SYNC;
+ 			}
+-			nfs_restart_rpc(task, server->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++			data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++			nfs_restart_rpc(task, clp);
+ 			return -EAGAIN;
+ 		}
+ 		if (time_before(complain, jiffies)) {
+@@ -1228,40 +1311,73 @@ static void nfs_commitdata_release(void 
+ 	nfs_commit_free(wdata);
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_commit_rpcsetup(struct list_head *head,
+-		struct nfs_write_data *data,
+-		int how)
++int nfs_initiate_commit(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
+ {
+-	struct nfs_page *first = nfs_list_entry(head->next);
+-	struct inode *inode = first->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = first->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+-		.callback_ops = &nfs_commit_ops,
++		.callback_ops = call_ops,
+ 		.callback_data = data,
+ 		.workqueue = nfsiod_workqueue,
+ 		.flags = RPC_TASK_ASYNC,
+ 		.priority = priority,
+ 	};
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->commit_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_commit);
++
++
++int pnfs_initiate_commit(struct nfs_write_data *data,
++			 struct rpc_clnt *clnt,
++			 const struct rpc_call_ops *call_ops,
++			 int how, int pnfs)
++{
++	if (pnfs &&
++	    (pnfs_try_to_commit(data, &nfs_commit_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_commit(data, clnt, &nfs_commit_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_commit_rpcsetup(struct list_head *head,
++		struct nfs_write_data *data,
++		int how, int pnfs)
++{
++	struct nfs_page *first = nfs_list_entry(head->next);
++	struct inode *inode = first->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	list_splice_init(head, &data->pages);
+ 
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = first->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(data->inode);
+ 	/* Note: we always request a commit of the entire inode */
+@@ -1272,45 +1388,47 @@ static int nfs_commit_rpcsetup(struct li
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
++	kref_init(&data->refcount);
++	data->parent      = NULL;
++	data->args.context = first->wb_context;  /* used by commit done */
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->commit_setup(data, &msg);
++	return pnfs_initiate_commit(data, NFS_CLIENT(inode), &nfs_commit_ops,
++				    how, pnfs);
++}
+ 
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++/* Handle memory error during commit */
++void nfs_mark_list_commit(struct list_head *head)
++{
++	struct nfs_page         *req;
+ 
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	while (!list_empty(head)) {
++		req = nfs_list_entry(head->next);
++		nfs_list_remove_request(req);
++		nfs_mark_request_commit(req);
++		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
++		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
++				BDI_RECLAIMABLE);
++		nfs_clear_page_tag_locked(req);
++	}
+ }
++EXPORT_SYMBOL(nfs_mark_list_commit);
+ 
+ /*
+  * Commit dirty pages
+  */
+ static int
+-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
++nfs_commit_list(struct inode *inode, struct list_head *head, int how, int pnfs)
+ {
+ 	struct nfs_write_data	*data;
+-	struct nfs_page         *req;
+ 
+ 	data = nfs_commitdata_alloc();
+-
+ 	if (!data)
+ 		goto out_bad;
+ 
+ 	/* Set up the argument struct */
+-	return nfs_commit_rpcsetup(head, data, how);
++	return nfs_commit_rpcsetup(head, data, how, pnfs);
+  out_bad:
+-	while (!list_empty(head)) {
+-		req = nfs_list_entry(head->next);
+-		nfs_list_remove_request(req);
+-		nfs_mark_request_commit(req);
+-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+-				BDI_RECLAIMABLE);
+-		nfs_clear_page_tag_locked(req);
+-	}
++	nfs_mark_list_commit(head);
+ 	nfs_commit_clear_lock(NFS_I(inode));
+ 	return -ENOMEM;
+ }
+@@ -1330,6 +1448,19 @@ static void nfs_commit_done(struct rpc_t
+ 		return;
+ }
+ 
++static inline void nfs_commit_cleanup(struct kref *kref)
++{
++	struct nfs_write_data *data;
++
++	data = container_of(kref, struct nfs_write_data, refcount);
++	/* Clear lock only when all cloned commits are finished */
++	if (data->parent)
++		kref_put(&data->parent->refcount, nfs_commit_cleanup);
++	else
++		nfs_commit_clear_lock(NFS_I(data->inode));
++	nfs_commitdata_release(data);
++}
++
+ static void nfs_commit_release(void *calldata)
+ {
+ 	struct nfs_write_data	*data = calldata;
+@@ -1347,6 +1478,11 @@ static void nfs_commit_release(void *cal
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 		if (status < 0) {
++			if (req->wb_lseg) {
++				nfs_mark_request_nopnfs(req);
++				nfs_mark_request_dirty(req);
++				goto next;
++			}
+ 			nfs_context_set_write_error(req->wb_context, status);
+ 			nfs_inode_remove_request(req);
+ 			dprintk(", error = %d\n", status);
+@@ -1363,12 +1499,12 @@ static void nfs_commit_release(void *cal
+ 		}
+ 		/* We have a mismatch. Write the page again */
+ 		dprintk(" mismatch\n");
++		nfs_mark_request_nopnfs(req);
+ 		nfs_mark_request_dirty(req);
+ 	next:
+ 		nfs_clear_page_tag_locked(req);
+ 	}
+-	nfs_commit_clear_lock(NFS_I(data->inode));
+-	nfs_commitdata_release(calldata);
++	kref_put(&data->refcount, nfs_commit_cleanup);
+ }
+ 
+ static const struct rpc_call_ops nfs_commit_ops = {
+@@ -1384,21 +1520,22 @@ int nfs_commit_inode(struct inode *inode
+ 	LIST_HEAD(head);
+ 	int may_wait = how & FLUSH_SYNC;
+ 	int res = 0;
++	int use_pnfs = 0;
+ 
+ 	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ 		goto out_mark_dirty;
+ 	spin_lock(&inode->i_lock);
+-	res = nfs_scan_commit(inode, &head, 0, 0);
++	res = nfs_scan_commit(inode, &head, 0, 0, &use_pnfs);
+ 	spin_unlock(&inode->i_lock);
+ 	if (res) {
+-		int error = nfs_commit_list(inode, &head, how);
++		int error = nfs_commit_list(inode, &head, how, use_pnfs);
+ 		if (error < 0)
+ 			return error;
+-		if (may_wait)
++		if (may_wait) {
+ 			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+ 					nfs_wait_bit_killable,
+ 					TASK_KILLABLE);
+-		else
++		} else
+ 			goto out_mark_dirty;
+ 	} else
+ 		nfs_commit_clear_lock(NFS_I(inode));
+@@ -1451,7 +1588,18 @@ static int nfs_commit_unstable_pages(str
+ 
+ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ {
+-	return nfs_commit_unstable_pages(inode, wbc);
++	int ret;
++	ret = nfs_commit_unstable_pages(inode, wbc);
++	if (ret >= 0 && layoutcommit_needed(NFS_I(inode))) {
++		int err, sync = wbc->sync_mode;
++
++		if (wbc->nonblocking || wbc->for_background)
++			sync = 0;
++		err = pnfs_layoutcommit_inode(inode, sync);
++		if (err < 0)
++			ret = err;
++	}
++	return ret;
+ }
+ 
+ /*
+@@ -1459,6 +1607,7 @@ int nfs_write_inode(struct inode *inode,
+  */
+ int nfs_wb_all(struct inode *inode)
+ {
++	int ret;
+ 	struct writeback_control wbc = {
+ 		.sync_mode = WB_SYNC_ALL,
+ 		.nr_to_write = LONG_MAX,
+@@ -1466,7 +1615,8 @@ int nfs_wb_all(struct inode *inode)
+ 		.range_end = LLONG_MAX,
+ 	};
+ 
+-	return sync_inode(inode, &wbc);
++	ret = sync_inode(inode, &wbc);
++	return ret;
+ }
+ 
+ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
+--- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-31 20:42:05.577222704 -0400
+@@ -2,6 +2,7 @@
+ #define LINUX_EXPORTFS_H 1
+ 
+ #include <linux/types.h>
++#include <linux/exp_xdr.h>
+ 
+ struct dentry;
+ struct inode;
+@@ -175,4 +176,62 @@ extern struct dentry *generic_fh_to_pare
+ 	struct fid *fid, int fh_len, int fh_type,
+ 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+ 
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct pnfs_filelayout_device;
++struct pnfs_filelayout_layout;
++
++extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				     const struct pnfs_filelayout_device *fdev);
++extern enum nfsstat4 filelayout_encode_layout(struct exp_xdr_stream *xdr,
++				      const struct pnfs_filelayout_layout *flp);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct list_head;
++
++extern int blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				      const struct list_head *volumes);
++
++extern enum nfsstat4 blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++					       const struct list_head *layouts);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_PNFSD)
++#include <linux/module.h>
++
++struct pnfsd_cb_operations;
++
++struct pnfsd_cb_ctl {
++	spinlock_t lock;
++	struct module *module;
++	const struct pnfsd_cb_operations *cb_op;
++};
++
++/* in expfs.c so that file systems can depend on it */
++extern struct pnfsd_cb_ctl pnfsd_cb_ctl;
++
++static inline int
++pnfsd_get_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	int ret = -ENOENT;
++
++	spin_lock(&pnfsd_cb_ctl.lock);
++	if (!pnfsd_cb_ctl.cb_op)
++		goto out;
++	if (!try_module_get(pnfsd_cb_ctl.module))
++		goto out;
++	ctl->cb_op = pnfsd_cb_ctl.cb_op;
++	ctl->module = pnfsd_cb_ctl.module;
++	ret = 0;
++out:
++	spin_unlock(&pnfsd_cb_ctl.lock);
++	return ret;
++}
++
++static inline void
++pnfsd_put_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	module_put(ctl->module);
++}
++#endif /* CONFIG_PNFSD */
+ #endif /* LINUX_EXPORTFS_H */
+diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-31 20:42:05.576053304 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-31 20:42:05.576053304 -0400
+@@ -0,0 +1,141 @@
++#ifndef _LINUX_EXP_XDR_H
++#define _LINUX_EXP_XDR_H
++
++#include <asm/byteorder.h>
++#include <asm/unaligned.h>
++#include <linux/string.h>
++
++struct exp_xdr_stream {
++	__be32 *p;
++	__be32 *end;
++};
++
++/**
++ * exp_xdr_qwords - Calculate the number of quad-words holding nbytes
++ * @nbytes: number of bytes to encode
++ */
++static inline size_t
++exp_xdr_qwords(__u32 nbytes)
++{
++	return DIV_ROUND_UP(nbytes, 4);
++}
++
++/**
++ * exp_xdr_qbytes - Calculate the number of bytes holding qwords
++ * @qwords: number of quad-words to encode
++ */
++static inline size_t
++exp_xdr_qbytes(size_t qwords)
++{
++	return qwords << 2;
++}
++
++/**
++ * exp_xdr_reserve_space - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nbytes: number of bytes to reserve
++ *
++ * Checks that we have enough buffer space to encode 'nbytes' more
++ * bytes of data. If so, update the xdr stream.
++ */
++static inline __be32 *
++exp_xdr_reserve_space(struct exp_xdr_stream *xdr, size_t nbytes)
++{
++	__be32 *p = xdr->p;
++	__be32 *q;
++
++	/* align nbytes on the next 32-bit boundary */
++	q = p + exp_xdr_qwords(nbytes);
++	if (unlikely(q > xdr->end || q < p))
++		return NULL;
++	xdr->p = q;
++	return p;
++}
++
++/**
++ * exp_xdr_reserve_qwords - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nwords: number of quad words (u32's) to reserve
++ */
++static inline __be32 *
++exp_xdr_reserve_qwords(struct exp_xdr_stream *xdr, size_t qwords)
++{
++	return exp_xdr_reserve_space(xdr, exp_xdr_qbytes(qwords));
++}
++
++/**
++ * exp_xdr_encode_u32 - Encode an unsigned 32-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u32(__be32 *p, __u32 val)
++{
++	*p = cpu_to_be32(val);
++	return p + 1;
++}
++
++/**
++ * exp_xdr_encode_u64 - Encode an unsigned 64-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u64(__be32 *p, __u64 val)
++{
++	put_unaligned_be64(val, p);
++	return p + 2;
++}
++
++/**
++ * exp_xdr_encode_bytes - Encode an array of bytes onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the array of bytes
++ * @nbytes: number of bytes to encode
++ */
++static inline __be32 *
++exp_xdr_encode_bytes(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	if (likely(nbytes != 0)) {
++		unsigned int qwords = exp_xdr_qwords(nbytes);
++		unsigned int padding = exp_xdr_qbytes(qwords) - nbytes;
++
++		memcpy(p, ptr, nbytes);
++		if (padding != 0)
++			memset((char *)p + nbytes, 0, padding);
++		p += qwords;
++	}
++	return p;
++}
++
++/**
++ * exp_xdr_encode_opaque - Encode an opaque type onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the opaque array
++ * @nbytes: number of bytes to encode
++ *
++ * Encodes the 32-bit opaque size in bytes followed by the opaque value.
++ */
++static inline __be32 *
++exp_xdr_encode_opaque(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	p = exp_xdr_encode_u32(p, nbytes);
++	return exp_xdr_encode_bytes(p, ptr, nbytes);
++}
++
++/**
++ * exp_xdr_encode_opaque_qlen - Encode the opaque length onto a xdr stream
++ * @lenp: pointer to the opaque length destination
++ * @endp: pointer to the end of the opaque array
++ *
++ * Encodes the 32-bit opaque size in bytes given the start and end pointers
++ */
++static inline __be32 *
++exp_xdr_encode_opaque_len(__be32 *lenp, const void *endp)
++{
++	size_t nbytes = (char *)endp - (char *)(lenp + 1);
++
++	exp_xdr_encode_u32(lenp, nbytes);
++	return lenp + 1 + exp_xdr_qwords(nbytes);
++}
++#endif /* _LINUX_EXP_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-31 20:41:19.120034834 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-31 20:42:05.579212604 -0400
+@@ -387,6 +387,7 @@ struct inodes_stat_t {
+ #include <asm/byteorder.h>
+ 
+ struct export_operations;
++struct pnfs_export_operations;
+ struct hd_geometry;
+ struct iovec;
+ struct nameidata;
+@@ -1329,6 +1330,7 @@ struct super_block {
+ 	const struct dquot_operations	*dq_op;
+ 	const struct quotactl_ops	*s_qcop;
+ 	const struct export_operations *s_export_op;
++	const struct pnfs_export_operations *s_pnfs_op;
+ 	unsigned long		s_flags;
+ 	unsigned long		s_magic;
+ 	struct dentry		*s_root;
+diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
+--- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-31 20:42:05.581035627 -0400
+@@ -17,7 +17,10 @@
+ 
+ #define NFS4_BITMAP_SIZE	2
+ #define NFS4_VERIFIER_SIZE	8
+-#define NFS4_STATEID_SIZE	16
++#define NFS4_CLIENTID_SIZE	8
++#define NFS4_STATEID_SEQID_SIZE 4
++#define NFS4_STATEID_OTHER_SIZE 12
++#define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
+ #define NFS4_FHSIZE		128
+ #define NFS4_MAXPATHLEN		PATH_MAX
+ #define NFS4_MAXNAMLEN		NAME_MAX
+@@ -119,6 +122,13 @@
+ #define EXCHGID4_FLAG_MASK_A			0x40070003
+ #define EXCHGID4_FLAG_MASK_R			0x80070003
+ 
++static inline bool
++is_ds_only_session(u32 exchange_flags)
++{
++	u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;
++	return (exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
++}
++
+ #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED	0x00000004
+@@ -166,8 +176,25 @@ struct nfs4_acl {
+ 	struct nfs4_ace	aces[0];
+ };
+ 
++struct nfs4_fsid {
++	u64	major;
++	u64	minor;
++};
++
+ typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
+-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
++typedef struct { char data[NFS4_CLIENTID_SIZE]; } nfs4_clientid;
++
++struct nfs41_stateid {
++	__be32 seqid;
++	char other[NFS4_STATEID_OTHER_SIZE];
++} __attribute__ ((packed));
++
++typedef struct {
++	union {
++		char data[NFS4_STATEID_SIZE];
++		struct nfs41_stateid stateid;
++	} u;
++} nfs4_stateid;
+ 
+ enum nfs_opnum4 {
+ 	OP_ACCESS = 3,
+@@ -471,6 +498,8 @@ enum lock_type4 {
+ #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
+ #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
+ #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
++#define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
++#define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+ 
+ #define NFSPROC4_NULL 0
+ #define NFSPROC4_COMPOUND 1
+@@ -523,6 +552,7 @@ enum {
+ 	NFSPROC4_CLNT_GETACL,
+ 	NFSPROC4_CLNT_SETACL,
+ 	NFSPROC4_CLNT_FS_LOCATIONS,
++	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
+ 
+ 	/* nfs41 */
+ 	NFSPROC4_CLNT_EXCHANGE_ID,
+@@ -531,6 +561,13 @@ enum {
+ 	NFSPROC4_CLNT_SEQUENCE,
+ 	NFSPROC4_CLNT_GET_LEASE_TIME,
+ 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
++	NFSPROC4_CLNT_LAYOUTGET,
++	NFSPROC4_CLNT_LAYOUTCOMMIT,
++	NFSPROC4_CLNT_LAYOUTRETURN,
++	NFSPROC4_CLNT_GETDEVICELIST,
++	NFSPROC4_CLNT_GETDEVICEINFO,
++	NFSPROC4_CLNT_PNFS_WRITE,
++	NFSPROC4_CLNT_PNFS_COMMIT,
+ };
+ 
+ /* nfs41 types */
+@@ -549,6 +586,43 @@ enum state_protect_how4 {
+ 	SP4_SSV		= 2
+ };
+ 
++enum pnfs_layouttype {
++	LAYOUT_NFSV4_1_FILES  = 1,
++	LAYOUT_OSD2_OBJECTS = 2,
++	LAYOUT_BLOCK_VOLUME = 3,
++};
++
++/* used for both layout return and recall */
++enum pnfs_layoutreturn_type {
++	RETURN_FILE = 1,
++	RETURN_FSID = 2,
++	RETURN_ALL  = 3
++};
++
++enum pnfs_iomode {
++	IOMODE_READ = 1,
++	IOMODE_RW = 2,
++	IOMODE_ANY = 3,
++};
++
++enum pnfs_notify_deviceid_type4 {
++	NOTIFY_DEVICEID4_CHANGE = 1 << 1,
++	NOTIFY_DEVICEID4_DELETE = 1 << 2,
++};
++
++#define NFL4_UFLG_MASK			0x0000003F
++#define NFL4_UFLG_DENSE			0x00000001
++#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
++#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
++
++/* Encoded in the loh_body field of type layouthint4 */
++enum filelayout_hint_care4 {
++	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
++	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
++	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
++	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
++};
++
+ #endif
+ #endif
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-31 20:42:05.583087731 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-31 20:42:05.583087731 -0400
+@@ -0,0 +1,329 @@
++/*
++ *  include/linux/nfs4_pnfs.h
++ *
++ *  Common data structures needed by the pnfs client and pnfs layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_NFS4_PNFS_H
++#define LINUX_NFS4_PNFS_H
++
++#include <linux/nfs_page.h>
++
++enum pnfs_try_status {
++	PNFS_ATTEMPTED     = 0,
++	PNFS_NOT_ATTEMPTED = 1,
++};
++
++#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
++
++/* Per-layout driver specific registration structure */
++struct pnfs_layoutdriver_type {
++	const u32 id;
++	const char *name;
++	struct layoutdriver_io_operations *ld_io_ops;
++	struct layoutdriver_policy_operations *ld_policy_ops;
++};
++
++struct pnfs_fsdata {
++	int bypass_eof;
++	struct pnfs_layout_segment *lseg;
++	void *private;
++};
++
++#if defined(CONFIG_NFS_V4_1)
++
++static inline struct nfs_inode *
++PNFS_NFS_INODE(struct pnfs_layout_hdr *lo)
++{
++	return NFS_I(lo->inode);
++}
++
++static inline struct inode *
++PNFS_INODE(struct pnfs_layout_hdr *lo)
++{
++	return lo->inode;
++}
++
++static inline struct nfs_server *
++PNFS_NFS_SERVER(struct pnfs_layout_hdr *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo));
++}
++
++static inline struct pnfs_layoutdriver_type *
++PNFS_LD(struct pnfs_layout_hdr *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo))->pnfs_curr_ld;
++}
++
++static inline struct layoutdriver_io_operations *
++PNFS_LD_IO_OPS(struct pnfs_layout_hdr *lo)
++{
++	return PNFS_LD(lo)->ld_io_ops;
++}
++
++static inline struct layoutdriver_policy_operations *
++PNFS_LD_POLICY_OPS(struct pnfs_layout_hdr *lo)
++{
++	return PNFS_LD(lo)->ld_policy_ops;
++}
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return nfsi->layout != NULL;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return has_layout(nfsi) &&
++	       test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state);
++}
++
++extern void put_lseg(struct pnfs_layout_segment *lseg);
++extern void get_lseg(struct pnfs_layout_segment *lseg);
++
++#else /* CONFIG_NFS_V4_1 */
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return false;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++struct pnfs_layout_segment {
++	struct list_head fi_list;
++	struct pnfs_layout_range range;
++	struct kref kref;
++	bool valid;
++	struct pnfs_layout_hdr *layout;
++	struct nfs4_deviceid *deviceid;
++	u8 ld_data[];			/* layout driver private data */
++};
++
++static inline void *
++LSEG_LD_DATA(struct pnfs_layout_segment *lseg)
++{
++	return lseg->ld_data;
++}
++
++/* Layout driver I/O operations.
++ * Either the pagecache or non-pagecache read/write operations must be implemented
++ */
++struct layoutdriver_io_operations {
++	/* Functions that use the pagecache.
++	 * If use_pagecache == 1, then these functions must be implemented.
++	 */
++	/* read and write pagelist should return just 0 (to indicate that
++	 * the layout code has taken control) or 1 (to indicate that the
++	 * layout code wishes to fall back to normal nfs.)  If 0 is returned,
++	 * information can be passed back through nfs_data->res and
++	 * nfs_data->task.tk_status, and the appropriate pnfs done function
++	 * MUST be called.
++	 */
++	enum pnfs_try_status
++	(*read_pagelist) (struct nfs_read_data *nfs_data, unsigned nr_pages);
++	enum pnfs_try_status
++	(*write_pagelist) (struct nfs_write_data *nfs_data, unsigned nr_pages, int how);
++	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
++			    loff_t pos, unsigned count,
++			    struct pnfs_fsdata *fsdata);
++	int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
++			 unsigned count, unsigned copied,
++			 struct pnfs_layout_segment *lseg);
++	void (*write_end_cleanup)(struct file *filp,
++				  struct pnfs_fsdata *fsdata);
++
++	/* Consistency ops */
++	/* 2 problems:
++	 * 1) the page list contains nfs_pages, NOT pages
++	 * 2) currently the NFS code doesn't create a page array (as it does with read/write)
++	 */
++	enum pnfs_try_status
++	(*commit) (struct nfs_write_data *nfs_data, int how);
++
++	/* Layout information. For each inode, alloc_layout is executed once to retrieve an
++	 * inode specific layout structure.  Each subsequent layoutget operation results in
++	 * a set_layout call to set the opaque layout in the layout driver.*/
++	struct pnfs_layout_hdr * (*alloc_layout) (struct inode *inode);
++	void (*free_layout) (struct pnfs_layout_hdr *);
++	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
++	void (*free_lseg) (struct pnfs_layout_segment *lseg);
++
++	int (*setup_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				   struct nfs4_layoutcommit_args *args);
++	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				     struct xdr_stream *xdr,
++				     const struct nfs4_layoutcommit_args *args);
++	void (*cleanup_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				      struct nfs4_layoutcommit_args *args,
++				      int status);
++	void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid,
++				struct xdr_stream *xdr,
++				const struct nfs4_layoutreturn_args *args);
++
++	/* Registration information for a new mounted file system
++	 */
++	int (*initialize_mountpoint) (struct nfs_server *,
++				      const struct nfs_fh * mntfh);
++	int (*uninitialize_mountpoint) (struct nfs_server *server);
++};
++
++enum layoutdriver_policy_flags {
++	/* Should the full nfs rpc cleanup code be used after io */
++	PNFS_USE_RPC_CODE		= 1 << 0,
++
++	/* Should the NFS req. gather algorithm cross stripe boundaries? */
++	PNFS_GATHER_ACROSS_STRIPES	= 1 << 1,
++
++	/* Should the pNFS client commit and return the layout upon a setattr */
++	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 3,
++};
++
++struct layoutdriver_policy_operations {
++	unsigned flags;
++
++	/* The stripe size of the file system */
++	ssize_t (*get_stripesize) (struct pnfs_layout_hdr *layoutid);
++
++	/* test for nfs page cache coalescing */
++	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++
++	/* Retreive the block size of the file system.
++	 * If gather_across_stripes == 1, then the file system will gather
++	 * requests into the block size.
++	 * TODO: Where will the layout driver get this info?  It is hard
++	 * coded in PVFS2.
++	 */
++	ssize_t (*get_blocksize) (void);
++};
++
++/* Should the full nfs rpc cleanup code be used after io */
++static inline int
++pnfs_ld_use_rpc_code(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_USE_RPC_CODE;
++}
++
++/* Should the NFS req. gather algorithm cross stripe boundaries? */
++static inline int
++pnfs_ld_gather_across_stripes(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_GATHER_ACROSS_STRIPES;
++}
++
++struct pnfs_device {
++	struct pnfs_deviceid dev_id;
++	unsigned int  layout_type;
++	unsigned int  mincount;
++	struct page **pages;
++	void          *area;
++	unsigned int  pgbase;
++	unsigned int  pglen;
++	unsigned int  dev_notify_types;
++};
++
++struct pnfs_devicelist {
++	unsigned int		eof;
++	unsigned int		num_devs;
++	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
++};
++
++/*
++ * Device ID RCU cache. A device ID is unique per client ID and layout type.
++ */
++#define NFS4_DEVICE_ID_HASH_BITS	5
++#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
++#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
++
++static inline u32
++nfs4_deviceid_hash(struct pnfs_deviceid *id)
++{
++	unsigned char *cptr = (unsigned char *)id->data;
++	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
++	u32 x = 0;
++
++	while (nbytes--) {
++		x *= 37;
++		x += *cptr++;
++	}
++	return x & NFS4_DEVICE_ID_HASH_MASK;
++}
++
++struct nfs4_deviceid_cache {
++	spinlock_t		dc_lock;
++	struct kref		dc_kref;
++	void			(*dc_free_callback)(struct kref *);
++	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
++};
++
++/* Device ID cache node */
++struct nfs4_deviceid {
++	struct hlist_node	de_node;
++	struct pnfs_deviceid	de_id;
++	struct kref		de_kref;
++};
++
++extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_put_deviceid_cache(struct nfs_client *);
++extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
++extern struct nfs4_deviceid *nfs4_find_get_deviceid(
++				struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++extern struct nfs4_deviceid *nfs4_add_get_deviceid(struct nfs4_deviceid_cache *,
++				struct nfs4_deviceid *);
++extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *);
++extern void nfs4_put_unset_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_delete_device(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++
++/* pNFS client callback functions.
++ * These operations allow the layout driver to access pNFS client
++ * specific information or call pNFS client->server operations.
++ * E.g., getdeviceinfo, I/O callbacks, etc
++ */
++struct pnfs_client_operations {
++	int (*nfs_getdevicelist) (struct nfs_server *,
++				  const struct nfs_fh *fh,
++				  struct pnfs_devicelist *devlist);
++	int (*nfs_getdeviceinfo) (struct nfs_server *,
++				  struct pnfs_device *dev);
++
++	/* Post read callback. */
++	void (*nfs_readlist_complete) (struct nfs_read_data *nfs_data);
++
++	/* Post write callback. */
++	void (*nfs_writelist_complete) (struct nfs_write_data *nfs_data);
++
++	/* Post commit callback. */
++	void (*nfs_commit_complete) (struct nfs_write_data *nfs_data);
++	void (*nfs_return_layout) (struct inode *);
++};
++
++extern struct pnfs_client_operations pnfs_ops;
++
++extern struct pnfs_client_operations *pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
++extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
++
++#define NFS4_PNFS_MAX_LAYOUTS 4
++#define NFS4_PNFS_PRIVATE_LAYOUT 0x80000000
++
++#endif /* LINUX_NFS4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-31 20:42:05.596098115 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-31 20:42:05.596098115 -0400
+@@ -0,0 +1,101 @@
++#ifndef NFSD4_BLOCK
++#define NFSD4_BLOCK
++
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#define PNFS_BLOCK_SUCCESS		1
++#define PNFS_BLOCK_FAILURE		0
++
++#define PNFS_BLOCK_CTL_START		1
++#define PNFS_BLOCK_CTL_STOP		2
++#define PNFS_BLOCK_CTL_VERS		3 /* Allows daemon to request current
++					   * version from kernel via an upcall.
++					   */
++
++#define PNFS_UPCALL_MSG_STOP	0
++#define PNFS_UPCALL_MSG_GETSIG	1
++#define PNFS_UPCALL_MSG_GETSLICE	2
++#define PNFS_UPCALL_MSG_DMCHK	3	// See if dev_t is a DM volume
++#define PNFS_UPCALL_MSG_DMGET	4
++#define PNFS_UPCALL_MSG_VERS	5
++
++#define PNFS_UPCALL_VERS		8
++
++typedef struct stripe_dev {
++	int	major,
++		minor,
++		offset;
++} stripe_dev_t;
++
++typedef struct bl_comm_res {
++	int				res_status;
++	union {
++		struct {
++			long long	start,
++					length;
++		} slice;
++		struct {
++			int		num_stripes,
++					stripe_size;
++			stripe_dev_t	devs[];
++		} stripe;
++		struct {
++			long long	sector;
++			int		offset,
++					len;
++			char		sig[];
++		} sig;
++		int			vers,
++					dm_vol;
++	} u;
++} bl_comm_res_t;
++
++typedef struct bl_comm_msg {
++	int		msg_type,
++			msg_status;
++	union {
++		dev_t	msg_dev;
++		int	msg_vers;
++	} u;
++	bl_comm_res_t	*msg_res;
++} bl_comm_msg_t;
++
++#ifdef __KERNEL__
++
++typedef struct bl_comm {
++	/* ---- protects access to this structure ---- */
++	struct mutex		lock;
++	/* ---- protects access to rpc pipe ---- */
++	struct mutex		pipe_lock;
++	struct dentry		*pipe_dentry;
++	wait_queue_head_t	pipe_wq;
++	bl_comm_msg_t		msg;
++} bl_comm_t;
++
++int pnfs_block_enabled(struct inode *, int);
++int bl_layout_type(struct super_block *sb);
++int bl_getdeviceiter(struct super_block *, u32 layout_type,
++		     struct nfsd4_pnfs_dev_iter_res *);
++int bl_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++		     u32 layout_type,
++		     const struct nfsd4_pnfs_deviceid *);
++enum nfsstat4 bl_layoutget(struct inode *, struct exp_xdr_stream *,
++			   const struct nfsd4_pnfs_layoutget_arg *,
++			   struct nfsd4_pnfs_layoutget_res *);
++int bl_layoutcommit(struct inode *,
++		    const struct nfsd4_pnfs_layoutcommit_arg *,
++		    struct nfsd4_pnfs_layoutcommit_res *);
++int bl_layoutreturn(struct inode *,
++		    const struct nfsd4_pnfs_layoutreturn_arg *);
++int bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len);
++int bl_init_proc(void);
++int bl_upcall(bl_comm_t *, bl_comm_msg_t *, bl_comm_res_t **);
++
++extern bl_comm_t	*bl_comm_global;	// Ugly...
++#endif /* __KERNEL__ */
++
++#endif /* NFSD4_BLOCK */
++
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-31 20:42:05.597097942 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-31 20:42:05.597097942 -0400
+@@ -0,0 +1,345 @@
++/*
++ * include/linux/nfsd4_spnfs.h
++ *
++ * spNFS - simple pNFS implementation with userspace daemon
++ *
++ */
++
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#ifndef NFS_SPNFS_H
++#define NFS_SPNFS_H
++
++
++#ifdef __KERNEL__
++#include "exportfs.h"
++#include "sunrpc/svc.h"
++#include "nfsd/nfsfh.h"
++#else
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#define SPNFS_STATUS_INVALIDMSG		0x01
++#define SPNFS_STATUS_AGAIN		0x02
++#define SPNFS_STATUS_FAIL		0x04
++#define SPNFS_STATUS_SUCCESS		0x08
++
++#define SPNFS_TYPE_LAYOUTGET		0x01
++#define SPNFS_TYPE_LAYOUTCOMMIT		0x02
++#define SPNFS_TYPE_LAYOUTRETURN		0x03
++#define SPNFS_TYPE_GETDEVICEITER	0x04
++#define SPNFS_TYPE_GETDEVICEINFO	0x05
++#define SPNFS_TYPE_SETATTR		0x06
++#define SPNFS_TYPE_OPEN			0x07
++#define	SPNFS_TYPE_CLOSE		0x08
++#define SPNFS_TYPE_CREATE		0x09
++#define SPNFS_TYPE_REMOVE		0x0a
++#define SPNFS_TYPE_COMMIT		0x0b
++#define SPNFS_TYPE_READ			0x0c
++#define SPNFS_TYPE_WRITE		0x0d
++
++#define	SPNFS_MAX_DEVICES		1
++#define	SPNFS_MAX_DATA_SERVERS		16
++#define SPNFS_MAX_IO			512
++
++/* layout */
++struct spnfs_msg_layoutget_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_filelayout_list {
++	u_int32_t       fh_len;
++	unsigned char   fh_val[128]; /* DMXXX fix this const */
++};
++
++struct spnfs_msg_layoutget_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t stripe_size;
++	u_int32_t stripe_type;
++	u_int32_t stripe_count;
++	struct spnfs_filelayout_list flist[SPNFS_MAX_DATA_SERVERS];
++};
++
++/* layoutcommit */
++struct spnfs_msg_layoutcommit_args {
++	unsigned long inode;
++	unsigned long generation;
++	u_int64_t file_size;
++};
++
++struct spnfs_msg_layoutcommit_res {
++	int status;
++};
++
++/* layoutreturn */
++/* No op for the daemon */
++/*
++struct spnfs_msg_layoutreturn_args {
++};
++
++struct spnfs_msg_layoutreturn_res {
++};
++*/
++
++/* getdeviceiter */
++struct spnfs_msg_getdeviceiter_args {
++	unsigned long inode;
++	u_int64_t cookie;
++	u_int64_t verf;
++};
++
++struct spnfs_msg_getdeviceiter_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t cookie;
++	u_int64_t verf;
++	u_int32_t eof;
++};
++
++/* getdeviceinfo */
++struct spnfs_data_server {
++	u_int32_t dsid;
++	char netid[5];
++	char addr[29];
++};
++
++struct spnfs_device {
++	u_int64_t devid;
++	int dscount;
++	struct spnfs_data_server dslist[SPNFS_MAX_DATA_SERVERS];
++};
++
++struct spnfs_msg_getdeviceinfo_args {
++	u_int64_t devid;
++};
++
++struct spnfs_msg_getdeviceinfo_res {
++	int status;
++	struct spnfs_device devinfo;
++};
++
++/* setattr */
++struct spnfs_msg_setattr_args {
++	unsigned long inode;
++	unsigned long generation;
++	int file_size;
++};
++
++struct spnfs_msg_setattr_res {
++	int status;
++};
++
++/* open */
++struct spnfs_msg_open_args {
++	unsigned long inode;
++	unsigned long generation;
++	int create;
++	int createmode;
++	int truncate;
++};
++
++struct spnfs_msg_open_res {
++	int status;
++};
++
++/* close */
++/* No op for daemon */
++struct spnfs_msg_close_args {
++	int x;
++};
++
++struct spnfs_msg_close_res {
++	int y;
++};
++
++/* create */
++/*
++struct spnfs_msg_create_args {
++	int x;
++};
++
++struct spnfs_msg_create_res {
++	int y;
++};
++*/
++
++/* remove */
++struct spnfs_msg_remove_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_msg_remove_res {
++	int status;
++};
++
++/* commit */
++/*
++struct spnfs_msg_commit_args {
++	int x;
++};
++
++struct spnfs_msg_commit_res {
++	int y;
++};
++*/
++
++/* read */
++struct spnfs_msg_read_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++};
++
++struct spnfs_msg_read_res {
++	int status;
++	char data[SPNFS_MAX_IO];
++};
++
++/* write */
++struct spnfs_msg_write_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++	char data[SPNFS_MAX_IO];
++};
++
++struct spnfs_msg_write_res {
++	int status;
++};
++
++/* bundle args and responses */
++union spnfs_msg_args {
++	struct spnfs_msg_layoutget_args		layoutget_args;
++	struct spnfs_msg_layoutcommit_args	layoutcommit_args;
++/*
++	struct spnfs_msg_layoutreturn_args	layoutreturn_args;
++*/
++	struct spnfs_msg_getdeviceiter_args     getdeviceiter_args;
++	struct spnfs_msg_getdeviceinfo_args     getdeviceinfo_args;
++	struct spnfs_msg_setattr_args		setattr_args;
++	struct spnfs_msg_open_args		open_args;
++	struct spnfs_msg_close_args		close_args;
++/*
++	struct spnfs_msg_create_args		create_args;
++*/
++	struct spnfs_msg_remove_args		remove_args;
++/*
++	struct spnfs_msg_commit_args		commit_args;
++*/
++	struct spnfs_msg_read_args		read_args;
++	struct spnfs_msg_write_args		write_args;
++};
++
++union spnfs_msg_res {
++	struct spnfs_msg_layoutget_res		layoutget_res;
++	struct spnfs_msg_layoutcommit_res	layoutcommit_res;
++/*
++	struct spnfs_msg_layoutreturn_res	layoutreturn_res;
++*/
++	struct spnfs_msg_getdeviceiter_res      getdeviceiter_res;
++	struct spnfs_msg_getdeviceinfo_res      getdeviceinfo_res;
++	struct spnfs_msg_setattr_res		setattr_res;
++	struct spnfs_msg_open_res		open_res;
++	struct spnfs_msg_close_res		close_res;
++/*
++	struct spnfs_msg_create_res		create_res;
++*/
++	struct spnfs_msg_remove_res		remove_res;
++/*
++	struct spnfs_msg_commit_res		commit_res;
++*/
++	struct spnfs_msg_read_res		read_res;
++	struct spnfs_msg_write_res		write_res;
++};
++
++/* a spnfs message, args and response */
++struct spnfs_msg {
++	unsigned char		im_type;
++	unsigned char		im_status;
++	union spnfs_msg_args	im_args;
++	union spnfs_msg_res	im_res;
++};
++
++/* spnfs configuration info */
++struct spnfs_config {
++	unsigned char		dense_striping;
++	int			stripe_size;
++	int			num_ds;
++	char			ds_dir[SPNFS_MAX_DATA_SERVERS][80];  /* XXX */
++};
++
++#if defined(__KERNEL__) && defined(CONFIG_SPNFS)
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* pipe mgmt structure.  messages flow through here */
++struct spnfs {
++	struct dentry		*spnfs_dentry;    /* dentry for pipe */
++	wait_queue_head_t	spnfs_wq;
++	struct spnfs_msg	spnfs_im;         /* spnfs message */
++	struct mutex		spnfs_lock;       /* Serializes upcalls */
++	struct mutex		spnfs_plock;
++};
++
++struct nfsd4_open;
++
++int spnfs_layout_type(struct super_block *);
++enum nfsstat4 spnfs_layoutget(struct inode *, struct exp_xdr_stream *xdr,
++			      const struct nfsd4_pnfs_layoutget_arg *,
++			      struct nfsd4_pnfs_layoutget_res *);
++int spnfs_layoutcommit(void);
++int spnfs_layoutreturn(struct inode *,
++		       const struct nfsd4_pnfs_layoutreturn_arg *);
++int spnfs_getdeviceiter(struct super_block *,
++			u32 layout_type,
++			struct nfsd4_pnfs_dev_iter_res *);
++int spnfs_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++			u32 layout_type,
++			const struct nfsd4_pnfs_deviceid *);
++int spnfs_setattr(void);
++int spnfs_open(struct inode *, struct nfsd4_open *);
++int spnfs_get_state(struct inode *, struct knfsd_fh *, struct pnfs_get_state *);
++int spnfs_remove(unsigned long, unsigned long);
++__be32 spnfs_read(struct inode *, loff_t, unsigned long *,
++		  int, struct svc_rqst *);
++__be32 spnfs_write(struct inode *, loff_t, size_t, int, struct svc_rqst *);
++int spnfs_getfh(int, struct nfs_fh *);
++int spnfs_test_layoutrecall(char *, u64, u64);
++int spnfs_layoutrecall(struct inode *, int, u64, u64);
++
++int nfsd_spnfs_new(void);
++void nfsd_spnfs_delete(void);
++int spnfs_upcall(struct spnfs *, struct spnfs_msg *, union spnfs_msg_res *);
++int spnfs_enabled(void);
++int spnfs_init_proc(void);
++
++extern struct spnfs_config *spnfs_config;
++
++#endif /* __KERNEL__ && CONFIG_SPNFS */
++
++#endif /* NFS_SPNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
+--- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-31 20:42:05.591097762 -0400
+@@ -29,6 +29,7 @@
+ #ifdef __KERNEL__
+ 
+ #include <linux/sunrpc/msg_prot.h>
++#include <linux/sunrpc/svc.h>
+ 
+ /*
+  * Largest number of bytes we need to allocate for an NFS
+diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
+--- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-31 20:42:05.591097762 -0400
+@@ -32,6 +32,8 @@
+ #define NFSDDBG_REPCACHE	0x0080
+ #define NFSDDBG_XDR		0x0100
+ #define NFSDDBG_LOCKD		0x0200
++#define NFSDDBG_PNFS		0x0400
++#define NFSDDBG_FILELAYOUT	0x0800
+ #define NFSDDBG_ALL		0x7FFF
+ #define NFSDDBG_NOCHANGE	0xFFFF
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
+--- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-31 20:42:05.592118086 -0400
+@@ -100,6 +100,7 @@ struct svc_export {
+ 	uid_t			ex_anon_uid;
+ 	gid_t			ex_anon_gid;
+ 	int			ex_fsid;
++	int			ex_pnfs;
+ 	unsigned char *		ex_uuid; /* 16 byte fsid */
+ 	struct nfsd4_fs_locations ex_fslocs;
+ 	int			ex_nflavors;
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-31 20:42:05.592118086 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-31 20:42:05.592118086 -0400
+@@ -0,0 +1,132 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef NFSD_NFS4LAYOUTXDR_H
++#define NFSD_NFS4LAYOUTXDR_H
++
++#include <linux/sunrpc/xdr.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
++struct pnfs_filelayout_devaddr {
++	struct xdr_netobj	r_netid;
++	struct xdr_netobj	r_addr;
++};
++
++/* list of multipath servers */
++struct pnfs_filelayout_multipath {
++	u32				fl_multipath_length;
++	struct pnfs_filelayout_devaddr 	*fl_multipath_list;
++};
++
++struct pnfs_filelayout_device {
++	u32					fl_stripeindices_length;
++	u32       		 		*fl_stripeindices_list;
++	u32					fl_device_length;
++	struct pnfs_filelayout_multipath 	*fl_device_list;
++};
++
++struct pnfs_filelayout_layout {
++	u32                             lg_layout_type; /* response */
++	u32                             lg_stripe_type; /* response */
++	u32                             lg_commit_through_mds; /* response */
++	u64                             lg_stripe_unit; /* response */
++	u64                             lg_pattern_offset; /* response */
++	u32                             lg_first_stripe_index;	/* response */
++	struct nfsd4_pnfs_deviceid	device_id;		/* response */
++	u32                             lg_fh_length;		/* response */
++	struct knfsd_fh                 *lg_fh_list;		/* response */
++};
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++enum pnfs_block_extent_state4 {
++        PNFS_BLOCK_READWRITE_DATA       = 0,
++        PNFS_BLOCK_READ_DATA            = 1,
++        PNFS_BLOCK_INVALID_DATA         = 2,
++        PNFS_BLOCK_NONE_DATA            = 3
++};
++
++enum pnfs_block_volume_type4 {
++        PNFS_BLOCK_VOLUME_SIMPLE = 0,
++        PNFS_BLOCK_VOLUME_SLICE = 1,
++        PNFS_BLOCK_VOLUME_CONCAT = 2,
++        PNFS_BLOCK_VOLUME_STRIPE = 3,
++};
++typedef enum pnfs_block_volume_type4 pnfs_block_volume_type4;
++
++enum bl_cache_state {
++	BLOCK_LAYOUT_NEW	= 0,
++	BLOCK_LAYOUT_CACHE	= 1,
++	BLOCK_LAYOUT_UPDATE	= 2,
++};
++
++typedef struct pnfs_blocklayout_layout {
++        struct list_head                bll_list;
++        struct nfsd4_pnfs_deviceid      bll_vol_id;
++        u64                             bll_foff;	// file offset
++        u64                             bll_len;
++        u64                             bll_soff;	// storage offset
++	int				bll_recalled;
++        enum pnfs_block_extent_state4   bll_es;
++	enum bl_cache_state		bll_cache_state;
++} pnfs_blocklayout_layout_t;
++
++typedef struct pnfs_blocklayout_devinfo {
++        struct list_head                bld_list;
++        pnfs_block_volume_type4         bld_type;
++        struct nfsd4_pnfs_deviceid      bld_devid;
++        int                             bld_index_loc;
++        union {
++                struct {
++                        u64             bld_offset;
++                        u32             bld_sig_len,
++                                        *bld_sig;
++                } simple;
++                struct {
++                        u64             bld_start,
++                                        bld_len;
++                        u32             bld_index;      /* Index of Simple Volume */
++                } slice;
++                struct {
++                        u32             bld_stripes;
++                        u64             bld_chunk_size;
++                        u32             *bld_stripe_indexs;
++                } stripe;
++        } u;
++} pnfs_blocklayout_devinfo_t;
++
++#endif /* NFSD_NFS4LAYOUTXDR_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-31 20:42:05.593020723 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-31 20:42:05.593020723 -0400
+@@ -0,0 +1,54 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++#include <linux/genhd.h>
++
++/*
++ * Length of comma separated pnfs data server IPv4 addresses. Enough room for
++ * 32 addresses.
++ */
++#define NFSD_DLM_DS_LIST_MAX   512
++/*
++ * Length of colon separated pnfs dlm device of the form
++ * disk_name:comma separated data server IPv4 address
++ */
++#define NFSD_PNFS_DLM_DEVICE_MAX (NFSD_DLM_DS_LIST_MAX + DISK_NAME_LEN + 1)
++
++#ifdef CONFIG_PNFSD
++
++/* For use by DLM cluster file systems exported by pNFSD */
++extern const struct pnfs_export_operations pnfs_dlm_export_ops;
++
++int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);
++
++void nfsd4_pnfs_dlm_shutdown(void);
++
++ssize_t nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen);
++
++#else /* CONFIG_PNFSD */
++
++static inline void nfsd4_pnfs_dlm_shutdown(void)
++{
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-31 20:42:05.594107962 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-31 20:42:05.594107962 -0400
+@@ -0,0 +1,271 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef _LINUX_NFSD_NFSD4_PNFS_H
++#define _LINUX_NFSD_NFSD4_PNFS_H
++
++#include <linux/exportfs.h>
++#include <linux/exp_xdr.h>
++#include <linux/nfs_xdr.h>
++
++struct nfsd4_pnfs_deviceid {
++	u64	sbid;			/* per-superblock unique ID */
++	u64	devid;			/* filesystem-wide unique device ID */
++};
++
++struct nfsd4_pnfs_dev_iter_res {
++	u64		gd_cookie;	/* request/repsonse */
++	u64		gd_verf;	/* request/repsonse */
++	u64		gd_devid;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++/* Arguments for set_device_notify */
++struct pnfs_devnotify_arg {
++	struct nfsd4_pnfs_deviceid dn_devid;	/* request */
++	u32 dn_layout_type;			/* request */
++	u32 dn_notify_types;			/* request/response */
++};
++
++struct nfsd4_layout_seg {
++	u64	clientid;
++	u32	layout_type;
++	u32	iomode;
++	u64	offset;
++	u64	length;
++};
++
++/* Used by layout_get to encode layout (loc_body var in spec)
++ * Args:
++ * minlength - min number of accessible bytes given by layout
++ * fsid - Major part of struct pnfs_deviceid.  File system uses this
++ * to build the deviceid returned in the layout.
++ * fh - fs can modify the file handle for use on data servers
++ * seg - layout info requested and layout info returned
++ * xdr - xdr info
++ * return_on_close - true if layout to be returned on file close
++ */
++
++struct nfsd4_pnfs_layoutget_arg {
++	u64			lg_minlength;
++	u64			lg_sbid;
++	const struct knfsd_fh	*lg_fh;
++};
++
++struct nfsd4_pnfs_layoutget_res {
++	struct nfsd4_layout_seg	lg_seg;	/* request/resopnse */
++	u32			lg_return_on_close;
++};
++
++struct nfsd4_pnfs_layoutcommit_arg {
++	struct nfsd4_layout_seg	lc_seg;		/* request */
++	u32			lc_reclaim;	/* request */
++	u32			lc_newoffset;	/* request */
++	u64			lc_last_wr;	/* request */
++	struct nfstime4		lc_mtime;	/* request */
++	u32			lc_up_len;	/* layout length */
++	void			*lc_up_layout;	/* decoded by callback */
++};
++
++struct nfsd4_pnfs_layoutcommit_res {
++	u32			lc_size_chg;	/* boolean for response */
++	u64			lc_newsize;	/* response */
++};
++
++#define PNFS_LAST_LAYOUT_NO_RECALLS ((void *)-1) /* used with lr_cookie below */
++
++struct nfsd4_pnfs_layoutreturn_arg {
++	u32			lr_return_type;	/* request */
++	struct nfsd4_layout_seg	lr_seg;		/* request */
++	u32			lr_reclaim;	/* request */
++	u32			lrf_body_len;	/* request */
++	void			*lrf_body;	/* request */
++	void			*lr_cookie;	/* fs private */
++};
++
++/* pNFS Metadata to Data server state communication */
++struct pnfs_get_state {
++	u32			dsid;    /* request */
++	u64			ino;      /* request */
++	nfs4_stateid		stid;     /* request;response */
++	nfs4_clientid		clid;     /* response */
++	u32			access;    /* response */
++	u32			stid_gen;    /* response */
++	u32			verifier[2]; /* response */
++};
++
++/*
++ * pNFS export operations vector.
++ *
++ * The filesystem must implement the following methods:
++ *   layout_type
++ *   get_device_info
++ *   layout_get
++ *
++ * All other methods are optional and can be set to NULL if not implemented.
++ */
++struct pnfs_export_operations {
++	/* Returns the supported pnfs_layouttype4. */
++	int (*layout_type) (struct super_block *);
++
++	/* Encode device info onto the xdr stream. */
++	int (*get_device_info) (struct super_block *,
++				struct exp_xdr_stream *,
++				u32 layout_type,
++				const struct nfsd4_pnfs_deviceid *);
++
++	/* Retrieve all available devices via an iterator.
++	 * arg->cookie == 0 indicates the beginning of the list,
++	 * otherwise arg->verf is used to verify that the list hasn't changed
++	 * while retrieved.
++	 *
++	 * On output, the filesystem sets the devid based on the current cookie
++	 * and sets res->cookie and res->verf corresponding to the next entry.
++	 * When the last entry in the list is retrieved, res->eof is set to 1.
++	 */
++	int (*get_device_iter) (struct super_block *,
++				u32 layout_type,
++				struct nfsd4_pnfs_dev_iter_res *);
++
++	int (*set_device_notify) (struct super_block *,
++				  struct pnfs_devnotify_arg *);
++
++	/* Retrieve and encode a layout for inode onto the xdr stream.
++	 * arg->minlength is the minimum number of accessible bytes required
++	 *   by the client.
++	 * The maximum number of bytes to encode the layout is given by
++	 *   the xdr stream end pointer.
++	 * arg->fsid contains the major part of struct pnfs_deviceid.
++	 *   The file system uses this to build the deviceid returned
++	 *   in the layout.
++	 * res->seg - layout segment requested and layout info returned.
++	 * res->fh can be modified the file handle for use on data servers
++	 * res->return_on_close - true if layout to be returned on file close
++	 *
++	 * return one of the following nfs errors:
++	 * NFS_OK			Success
++	 * NFS4ERR_ACCESS		Permission error
++	 * NFS4ERR_BADIOMODE		Server does not support requested iomode
++	 * NFS4ERR_BADLAYOUT		No layout matching loga_minlength rules
++	 * NFS4ERR_INVAL		Parameter other than layout is invalid
++	 * NFS4ERR_IO			I/O error
++	 * NFS4ERR_LAYOUTTRYLATER	Layout may be retrieved later
++	 * NFS4ERR_LAYOUTUNAVAILABLE	Layout unavailable for this file
++	 * NFS4ERR_LOCKED		Lock conflict
++	 * NFS4ERR_NOSPC		Out-of-space error occured
++	 * NFS4ERR_RECALLCONFLICT	Layout currently unavialable due to
++	 *				a conflicting CB_LAYOUTRECALL
++	 * NFS4ERR_SERVERFAULT		Server went bezerk
++	 * NFS4ERR_TOOSMALL		loga_maxcount too small to fit layout
++	 * NFS4ERR_WRONG_TYPE		Wrong file type (not a regular file)
++	 */
++	enum nfsstat4 (*layout_get) (struct inode *,
++				     struct exp_xdr_stream *xdr,
++				     const struct nfsd4_pnfs_layoutget_arg *,
++				     struct nfsd4_pnfs_layoutget_res *);
++
++	/* Commit changes to layout */
++	int (*layout_commit) (struct inode *,
++			      const struct nfsd4_pnfs_layoutcommit_arg *,
++			      struct nfsd4_pnfs_layoutcommit_res *);
++
++	/* Returns the layout */
++	int (*layout_return) (struct inode *,
++			      const struct nfsd4_pnfs_layoutreturn_arg *);
++
++	/* Can layout segments be merged for this layout type? */
++	int (*can_merge_layouts) (u32 layout_type);
++
++	/* pNFS Files layout specific operations */
++
++	/* Get the write verifier for DS (called on MDS only) */
++	void (*get_verifier) (struct super_block *, u32 *p);
++	/* Call fs on DS only */
++	int (*get_state) (struct inode *, struct knfsd_fh *,
++			  struct pnfs_get_state *);
++};
++
++struct nfsd4_pnfs_cb_layout {
++	u32			cbl_recall_type;	/* request */
++	struct nfsd4_layout_seg cbl_seg;		/* request */
++	u32			cbl_layoutchanged;	/* request */
++	nfs4_stateid		cbl_sid;		/* request */
++	struct nfs4_fsid	cbl_fsid;
++	void			*cbl_cookie;		/* fs private */
++};
++
++/* layoutrecall request (from exported filesystem) */
++struct nfs4_layoutrecall {
++	struct kref			clr_ref;
++	struct nfsd4_pnfs_cb_layout	cb;	/* request */
++	struct list_head		clr_perclnt; /* on cl_layoutrecalls */
++	struct nfs4_client	       *clr_client;
++	struct nfs4_file	       *clr_file;
++	struct timespec			clr_time;	/* last activity */
++	struct super_block 		*clr_sb; /* We might not have a file */
++	struct nfs4_layoutrecall	*parent; /* The initiating recall */
++
++	void				*clr_args;	/* nfsd internal */
++};
++
++struct nfsd4_pnfs_cb_dev_item {
++	u32			cbd_notify_type;	/* request */
++	u32			cbd_layout_type;	/* request */
++	struct nfsd4_pnfs_deviceid cbd_devid;		/* request */
++	u32			cbd_immediate;		/* request */
++};
++
++struct nfsd4_pnfs_cb_dev_list {
++	u32				cbd_len;  /* request */
++	struct nfsd4_pnfs_cb_dev_item  *cbd_list; /* request */
++};
++
++/*
++ * callbacks provided by the nfsd
++ */
++struct pnfsd_cb_operations {
++	/* Generic callbacks */
++	int (*cb_layout_recall) (struct super_block *, struct inode *,
++				 struct nfsd4_pnfs_cb_layout *);
++	int (*cb_device_notify) (struct super_block *,
++				 struct nfsd4_pnfs_cb_dev_list *);
++
++	/* pNFS Files layout specific callbacks */
++
++	/* Callback from fs on MDS only */
++	int (*cb_get_state) (struct super_block *, struct pnfs_get_state *);
++	/* Callback from fs on DS only */
++	int (*cb_change_state) (struct pnfs_get_state *);
++};
++
++#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
+--- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-31 20:42:05.594107962 -0400
+@@ -29,6 +29,7 @@
+ /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
+ #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
+ #define	NFSCTL_GETFS		8	/* get an fh by path with max FH len */
++#define	NFSCTL_FD2FH		9	/* get a fh from a fd */
+ 
+ /* SVC */
+ struct nfsctl_svc {
+@@ -71,6 +72,11 @@ struct nfsctl_fsparm {
+ 	int			gd_maxlen;
+ };
+ 
++/* FD2FH */
++struct nfsctl_fd2fh {
++	int			fd;
++};
++
+ /*
+  * This is the argument union.
+  */
+@@ -82,6 +88,7 @@ struct nfsctl_arg {
+ 		struct nfsctl_export	u_export;
+ 		struct nfsctl_fdparm	u_getfd;
+ 		struct nfsctl_fsparm	u_getfs;
++		struct nfsctl_fd2fh	u_fd2fh;
+ 		/*
+ 		 * The following dummy member is needed to preserve binary compatibility
+ 		 * on platforms where alignof(void*)>alignof(int).  It's needed because
+@@ -95,6 +102,7 @@ struct nfsctl_arg {
+ #define ca_export	u.u_export
+ #define ca_getfd	u.u_getfd
+ #define	ca_getfs	u.u_getfs
++#define	ca_fd2fh	u.u_fd2fh
+ };
+ 
+ union nfsctl_res {
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-31 20:41:19.168160480 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-31 20:42:05.584098019 -0400
+@@ -72,13 +72,20 @@ struct nfs_access_entry {
+ 	int			mask;
+ };
+ 
++struct nfs_lock_context {
++	atomic_t count;
++	struct list_head list;
++	struct nfs_open_context *open_context;
++	fl_owner_t lockowner;
++	pid_t pid;
++};
++
+ struct nfs4_state;
+ struct nfs_open_context {
+-	atomic_t count;
++	struct nfs_lock_context lock_context;
+ 	struct path path;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+-	fl_owner_t lockowner;
+ 	fmode_t mode;
+ 
+ 	unsigned long flags;
+@@ -97,6 +104,27 @@ struct nfs_delegation;
+ 
+ struct posix_acl;
+ 
++struct pnfs_layout_hdr {
++	int			refcount;
++	struct list_head	layouts;   /* other client layouts */
++	struct list_head	segs;      /* layout segments list */
++	int			roc_iomode;/* return on close iomode, 0=none */
++	seqlock_t		seqlock;   /* Protects the stateid */
++	nfs4_stateid		stateid;
++	unsigned long		state;
++#define NFS_INO_RO_LAYOUT_FAILED 0         /* ro layoutget failed stop trying */
++#define NFS_INO_RW_LAYOUT_FAILED 1         /* rw layoutget failed stop trying */
++#define NFS_INO_LAYOUTCOMMIT     2         /* LAYOUTCOMMIT needed */
++
++	struct rpc_cred		*cred;     /* layoutcommit credential */
++	/* DH: These vars keep track of the maximum write range
++	 * so the values can be used for layoutcommit.
++	 */
++	loff_t			write_begin_pos;
++	loff_t			write_end_pos;
++	struct inode		*inode;
++};
++
+ /*
+  * nfs fs inode data in memory
+  */
+@@ -181,6 +209,13 @@ struct nfs_inode {
+ 	struct nfs_delegation	*delegation;
+ 	fmode_t			 delegation_state;
+ 	struct rw_semaphore	rwsem;
++
++	/* pNFS layout information */
++#if defined(CONFIG_NFS_V4_1)
++	wait_queue_head_t lo_waitq;
++	struct pnfs_layout_hdr *layout;
++	time_t pnfs_layout_suspend;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif /* CONFIG_NFS_V4*/
+ #ifdef CONFIG_NFS_FSCACHE
+ 	struct fscache_cookie	*fscache;
+@@ -353,6 +388,8 @@ extern void nfs_setattr_update_inode(str
+ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
+ extern void put_nfs_open_context(struct nfs_open_context *ctx);
+ extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
++extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
++extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
+@@ -481,8 +518,12 @@ extern void nfs_unblock_sillyrename(stru
+ extern int  nfs_congestion_kb;
+ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
+ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
+-extern int  nfs_flush_incompatible(struct file *file, struct page *page);
+-extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
++struct pnfs_layout_segment;
++extern int  nfs_flush_incompatible(struct file *file, struct page *page,
++				   struct pnfs_layout_segment *lseg);
++extern int  nfs_updatepage(struct file *, struct page *,
++			   unsigned int offset, unsigned int count,
++			   struct pnfs_layout_segment *lseg, void *fsdata);
+ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+ 
+ /*
+@@ -604,6 +645,8 @@ extern void * nfs_root_data(void);
+ #define NFSDBG_CLIENT		0x0200
+ #define NFSDBG_MOUNT		0x0400
+ #define NFSDBG_FSCACHE		0x0800
++#define NFSDBG_PNFS		0x1000
++#define NFSDBG_PNFS_LD		0x2000
+ #define NFSDBG_ALL		0xFFFF
+ 
+ #ifdef __KERNEL__
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-31 20:41:19.168160480 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-31 20:42:05.586087719 -0400
+@@ -15,6 +15,7 @@ struct nlm_host;
+ struct nfs4_sequence_args;
+ struct nfs4_sequence_res;
+ struct nfs_server;
++struct nfs4_minor_version_ops;
+ 
+ /*
+  * The nfs_client identifies our client state to the server.
+@@ -70,11 +71,7 @@ struct nfs_client {
+ 	 */
+ 	char			cl_ipaddr[48];
+ 	unsigned char		cl_id_uniquifier;
+-	int		     (* cl_call_sync)(struct nfs_server *server,
+-					      struct rpc_message *msg,
+-					      struct nfs4_sequence_args *args,
+-					      struct nfs4_sequence_res *res,
+-					      int cache_reply);
++	const struct nfs4_minor_version_ops *cl_mvops;
+ #endif /* CONFIG_NFS_V4 */
+ 
+ #ifdef CONFIG_NFS_V4_1
+@@ -85,6 +82,8 @@ struct nfs_client {
+ 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_session	*cl_session; 	/* sharred session */
++	struct list_head	cl_layouts;
++	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ #ifdef CONFIG_NFS_FSCACHE
+@@ -92,6 +91,16 @@ struct nfs_client {
+ #endif
+ };
+ 
++static inline bool
++is_ds_only_client(struct nfs_client *clp)
++{
++#ifdef CONFIG_NFS_V4_1
++	return is_ds_only_session(clp->cl_exchange_flags);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * NFS client parameters stored in the superblock.
+  */
+@@ -136,7 +145,7 @@ struct nfs_server {
+ #endif
+ 
+ #ifdef CONFIG_NFS_V4
+-	u32			attr_bitmask[2];/* V4 bitmask representing the set
++	u32			attr_bitmask[3];/* V4 bitmask representing the set
+ 						   of attributes supported on this
+ 						   filesystem */
+ 	u32			cache_consistency_bitmask[2];
+@@ -148,6 +157,15 @@ struct nfs_server {
+ 						   that are supported on this
+ 						   filesystem */
+ #endif
++
++#ifdef CONFIG_NFS_V4_1
++	u32				pnfs_blksize; /* layout_blksize attr */
++	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
++	void			       *pnfs_ld_data; /* Per-mount data */
++	unsigned int			ds_rsize;  /* Data server read size */
++	unsigned int			ds_wsize;  /* Data server write size */
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	void (*destroy)(struct nfs_server *);
+ 
+ 	atomic_t active; /* Keep trace of any activity to this server */
+diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
+--- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-31 20:42:05.587097913 -0400
+@@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
+ 	NFSIOS_SHORTREAD,
+ 	NFSIOS_SHORTWRITE,
+ 	NFSIOS_DELAY,
++	NFSIOS_PNFS_READ,
++	NFSIOS_PNFS_WRITE,
++	NFSIOS_PNFS_COMMIT,
+ 	__NFSIOS_COUNTSMAX,
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
+--- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-31 20:42:05.588097898 -0400
+@@ -39,6 +39,7 @@ struct nfs_page {
+ 	struct list_head	wb_list;	/* Defines state of page: */
+ 	struct page		*wb_page;	/* page to read in/write out */
+ 	struct nfs_open_context	*wb_context;	/* File state context info */
++	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
+ 	atomic_t		wb_complete;	/* i/os we're waiting for */
+ 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
+ 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+@@ -47,6 +48,7 @@ struct nfs_page {
+ 	struct kref		wb_kref;	/* reference count */
+ 	unsigned long		wb_flags;
+ 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
++	struct pnfs_layout_segment *wb_lseg;	/* Pnfs layout info */
+ };
+ 
+ struct nfs_pageio_descriptor {
+@@ -60,6 +62,12 @@ struct nfs_pageio_descriptor {
+ 	int			(*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
+ 	int 			pg_ioflags;
+ 	int			pg_error;
++	struct pnfs_layout_segment *pg_lseg;
++#ifdef CONFIG_NFS_V4_1
++	int			pg_iswrite;
++	int			pg_boundary;
++	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
+@@ -68,13 +76,15 @@ extern	struct nfs_page *nfs_create_reque
+ 					    struct inode *inode,
+ 					    struct page *page,
+ 					    unsigned int offset,
+-					    unsigned int count);
++					    unsigned int count,
++					    struct pnfs_layout_segment *lseg);
+ extern	void nfs_clear_request(struct nfs_page *req);
+ extern	void nfs_release_request(struct nfs_page *req);
+ 
+ 
+ extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+-			  pgoff_t idx_start, unsigned int npages, int tag);
++			  pgoff_t idx_start, unsigned int npages, int tag,
++			  int *use_pnfs);
+ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ 			     struct inode *inode,
+ 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-31 20:41:19.169171911 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-31 20:42:05.590087729 -0400
+@@ -3,6 +3,8 @@
+ 
+ #include <linux/nfsacl.h>
+ #include <linux/nfs3.h>
++#include <linux/nfs4.h>
++#include <linux/sunrpc/sched.h>
+ 
+ /*
+  * To change the maximum rsize and wsize supported by the NFS client, adjust
+@@ -10,7 +12,7 @@
+  * support a megabyte or more.  The default is left at 4096 bytes, which is
+  * reasonable for NFS over UDP.
+  */
+-#define NFS_MAX_FILE_IO_SIZE	(1048576U)
++#define NFS_MAX_FILE_IO_SIZE	(4U * 1048576U)
+ #define NFS_DEF_FILE_IO_SIZE	(4096U)
+ #define NFS_MIN_FILE_IO_SIZE	(1024U)
+ 
+@@ -113,6 +115,10 @@ struct nfs_fsinfo {
+ 	__u32			dtpref;	/* pref. readdir transfer size */
+ 	__u64			maxfilesize;
+ 	__u32			lease_time; /* in seconds */
++#if defined(CONFIG_NFS_V4_1)
++	__u32			layouttype; /* supported pnfs layout driver */
++	__u32			blksize; /* preferred pnfs io block size */
++#endif
+ };
+ 
+ struct nfs_fsstat {
+@@ -185,6 +191,125 @@ struct nfs4_get_lease_time_res {
+ 	struct nfs4_sequence_res	lr_seq_res;
+ };
+ 
++#define PNFS_LAYOUT_MAXSIZE 4096
++#define NFS4_PNFS_DEVICEID4_SIZE 16
++
++struct pnfs_deviceid {
++	char data[NFS4_PNFS_DEVICEID4_SIZE];
++};
++
++struct nfs4_layoutdriver_data {
++	__u32 len;
++	void *buf;
++};
++
++struct pnfs_layout_range {
++	u32 iomode;
++	u64 offset;
++	u64 length;
++};
++
++struct nfs4_layoutget_args {
++	__u32 type;
++	struct pnfs_layout_range range;
++	__u64 minlength;
++	__u32 maxcount;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutget_res {
++	__u32 return_on_close;
++	struct pnfs_layout_range range;
++	__u32 type;
++	nfs4_stateid stateid;
++	struct nfs4_layoutdriver_data layout;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_layoutget {
++	struct nfs4_layoutget_args args;
++	struct nfs4_layoutget_res res;
++	struct pnfs_layout_segment **lsegpp;
++	int status;
++};
++
++struct nfs4_layoutcommit_args {
++	nfs4_stateid stateid;
++	__u64 lastbytewritten;
++	__u32 time_modify_changed;
++	struct timespec time_modify;
++	const u32 *bitmask;
++	struct nfs_fh *fh;
++	struct inode *inode;
++
++	/* Values set by layout driver */
++	struct pnfs_layout_range range;
++	__u32 layout_type;
++	void *layoutdriver_data;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutcommit_res {
++	__u32 sizechanged;
++	__u64 newsize;
++	struct nfs_fattr *fattr;
++	const struct nfs_server *server;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_layoutcommit_data {
++	struct rpc_task task;
++	struct rpc_cred *cred;
++	struct nfs_fattr fattr;
++	struct nfs4_layoutcommit_args args;
++	struct nfs4_layoutcommit_res res;
++	int status;
++};
++
++struct nfs4_layoutreturn_args {
++	__u32   reclaim;
++	__u32   layout_type;
++	__u32   return_type;
++	struct pnfs_layout_range range;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutreturn_res {
++	struct nfs4_sequence_res seq_res;
++	u32 lrs_present;
++	nfs4_stateid stateid;
++};
++
++struct nfs4_layoutreturn {
++	struct nfs4_layoutreturn_args args;
++	struct nfs4_layoutreturn_res res;
++	struct rpc_cred *cred;
++	int rpc_status;
++};
++
++struct nfs4_getdevicelist_args {
++	const struct nfs_fh *fh;
++	u32 layoutclass;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_getdevicelist_res {
++	struct pnfs_devicelist *devlist;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_getdeviceinfo_args {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_getdeviceinfo_res {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_res seq_res;
++};
++
+ /*
+  * Arguments to the open call.
+  */
+@@ -196,8 +321,10 @@ struct nfs_openargs {
+ 	__u64                   clientid;
+ 	__u64                   id;
+ 	union {
+-		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+-		nfs4_verifier   verifier; /* EXCLUSIVE */
++		struct {
++			struct iattr *  attrs;    /* UNCHECKED, GUARDED */
++			nfs4_verifier   verifier; /* EXCLUSIVE */
++		};
+ 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
+ 		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
+ 	} u;
+@@ -313,6 +440,10 @@ struct nfs_lockt_res {
+ 	struct nfs4_sequence_res	seq_res;
+ };
+ 
++struct nfs_release_lockowner_args {
++	struct nfs_lowner	lock_owner;
++};
++
+ struct nfs4_delegreturnargs {
+ 	const struct nfs_fh *fhandle;
+ 	const nfs4_stateid *stateid;
+@@ -332,6 +463,7 @@ struct nfs4_delegreturnres {
+ struct nfs_readargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	unsigned int		pgbase;
+@@ -352,6 +484,7 @@ struct nfs_readres {
+ struct nfs_writeargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	enum nfs3_stable_how	stable;
+@@ -846,7 +979,7 @@ struct nfs4_server_caps_arg {
+ };
+ 
+ struct nfs4_server_caps_res {
+-	u32				attr_bitmask[2];
++	u32				attr_bitmask[3];
+ 	u32				acl_bitmask;
+ 	u32				has_links;
+ 	u32				has_symlinks;
+@@ -961,6 +1094,27 @@ struct nfs_page;
+ 
+ #define NFS_PAGEVEC_SIZE	(8U)
+ 
++#if defined(CONFIG_NFS_V4_1)
++/* pnfsflag values */
++#define PNFS_NO_RPC		0x0001   /* non rpc result callback switch */
++
++/* pnfs-specific data needed for read, write, and commit calls */
++struct pnfs_call_data {
++	struct pnfs_layout_segment *lseg;
++	const struct rpc_call_ops *call_ops;
++	u32			orig_count;	/* for retry via MDS */
++	int			pnfs_error;
++	u8			pnfsflags;
++	u8			how;		/* for FLUSH_STABLE */
++};
++
++/* files layout-type specific data for read, write, and commit */
++struct pnfs_fl_call_data {
++	struct nfs_client	*ds_nfs_client;
++	__u64			orig_offset;
++};
++#endif /* CONFIG_NFS_V4_1 */
++
+ struct nfs_read_data {
+ 	int			flags;
+ 	struct rpc_task		task;
+@@ -976,10 +1130,16 @@ struct nfs_read_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+ struct nfs_write_data {
++	struct kref		refcount;	/* For pnfs commit splitting */
++	struct nfs_write_data	*parent;	/* For pnfs commit splitting */
+ 	int			flags;
+ 	struct rpc_task		task;
+ 	struct inode		*inode;
+@@ -995,6 +1155,10 @@ struct nfs_write_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+@@ -1008,6 +1172,7 @@ struct nfs_rpc_ops {
+ 	const struct dentry_operations *dentry_ops;
+ 	const struct inode_operations *dir_inode_ops;
+ 	const struct inode_operations *file_inode_ops;
++	const struct file_operations *file_ops;
+ 
+ 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
+ 			    struct nfs_fsinfo *);
+@@ -1072,6 +1237,7 @@ struct nfs_rpc_ops {
+ extern const struct nfs_rpc_ops	nfs_v2_clientops;
+ extern const struct nfs_rpc_ops	nfs_v3_clientops;
+ extern const struct nfs_rpc_ops	nfs_v4_clientops;
++extern const struct nfs_rpc_ops	pnfs_v4_clientops;
+ extern struct rpc_version	nfs_version2;
+ extern struct rpc_version	nfs_version3;
+ extern struct rpc_version	nfs_version4;
+diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-31 20:42:05.598087997 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-31 20:42:05.599087710 -0400
+@@ -0,0 +1,57 @@
++#ifndef _PANFS_SHIM_API_H
++#define _PANFS_SHIM_API_H
++
++/*
++ * imported panfs functions
++ */
++struct panfs_export_operations {
++	int (*convert_rc)(pan_status_t rc);
++
++	int (*sm_sec_t_get_size_otw)(
++		pan_sm_sec_otw_t *var,
++		pan_size_t *core_sizep,
++		pan_size_t *wire_size,
++		void *buf_end);
++
++	int (*sm_sec_t_unmarshall)(
++		pan_sm_sec_otw_t *in,
++		pan_sm_sec_t *out,
++		void *buf,
++		pan_size_t size,
++		pan_size_t *otw_consumed,
++		pan_size_t *in_core_consumed);
++
++	int (*ucreds_get)(void **ucreds_pp);
++
++	void (*ucreds_put)(void *ucreds);
++
++	int (*sam_read)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_read_args_t    *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_read_cb_t       closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_read_res_t     *res_p);
++
++	int (*sam_write)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_write_args_t   *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_write_cb_t      closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_write_res_t    *res_p);
++};
++
++extern int
++panfs_shim_register(struct panfs_export_operations *ops);
++
++extern int
++panfs_shim_unregister(void);
++
++#endif /* _PANFS_SHIM_API_H */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-31 20:42:05.600025088 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-31 20:42:05.600025088 -0400
+@@ -0,0 +1,439 @@
++/*
++ *  pnfs_osd_xdr.h
++ *
++ *  pNFS-osd on-the-wire data structures
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#ifndef __PNFS_OSD_XDR_H__
++#define __PNFS_OSD_XDR_H__
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/exp_xdr.h>
++#include <scsi/osd_protocol.h>
++
++#define PNFS_OSD_OSDNAME_MAXSIZE 256
++
++/*
++ * START OF "GENERIC" DECODE ROUTINES.
++ *   These may look a little ugly since they are imported from a "generic"
++ * set of XDR encode/decode routines which are intended to be shared by
++ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
++ *
++ * If the pain of reading these is too great, it should be a straightforward
++ * task to translate them into Linux-specific versions which are more
++ * consistent with the style used in NFSv2/v3...
++ */
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {			\
++	(x) = (u64)ntohl(*p++) << 32;		\
++	(x) |= ntohl(*p++);			\
++} while (0)
++#define COPYMEM(x, nbytes) do {			\
++	memcpy((x), p, nbytes);			\
++	p += XDR_QUADLEN(nbytes);		\
++} while (0)
++
++/*
++ * draft-ietf-nfsv4-minorversion-22
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/* Layout Structure */
++
++enum pnfs_osd_raid_algorithm4 {
++	PNFS_OSD_RAID_0		= 1,
++	PNFS_OSD_RAID_4		= 2,
++	PNFS_OSD_RAID_5		= 3,
++	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
++};
++
++/*   struct pnfs_osd_data_map4 {
++ *       uint32_t                    odm_num_comps;
++ *       length4                     odm_stripe_unit;
++ *       uint32_t                    odm_group_width;
++ *       uint32_t                    odm_group_depth;
++ *       uint32_t                    odm_mirror_cnt;
++ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
++ *   };
++ */
++struct pnfs_osd_data_map {
++	u32	odm_num_comps;
++	u64	odm_stripe_unit;
++	u32	odm_group_width;
++	u32	odm_group_depth;
++	u32	odm_mirror_cnt;
++	u32	odm_raid_algorithm;
++};
++
++static inline int
++pnfs_osd_data_map_xdr_sz(void)
++{
++	return 1 + 2 + 1 + 1 + 1 + 1;
++}
++
++static inline size_t
++pnfs_osd_data_map_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_data_map);
++}
++
++/*   struct pnfs_osd_objid4 {
++ *       deviceid4       oid_device_id;
++ *       uint64_t        oid_partition_id;
++ *       uint64_t        oid_object_id;
++ *   };
++ */
++struct pnfs_osd_objid {
++	struct pnfs_deviceid	oid_device_id;
++	u64			oid_partition_id;
++	u64			oid_object_id;
++};
++
++/* For printout. I use "dev(%llx:%llx)", _DEVID_LO(), _DEVID_HI BE style */
++#define _DEVID_LO(oid_device_id) \
++	(unsigned long long)be64_to_cpup((__be64 *)oid_device_id.data)
++
++#define _DEVID_HI(oid_device_id) \
++	(unsigned long long)be64_to_cpup(((__be64 *)oid_device_id.data) + 1)
++
++static inline int
++pnfs_osd_objid_xdr_sz(void)
++{
++	return (NFS4_PNFS_DEVICEID4_SIZE / 4) + 2 + 2;
++}
++
++static inline size_t
++pnfs_osd_objid_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_objid);
++}
++
++enum pnfs_osd_version {
++	PNFS_OSD_MISSING              = 0,
++	PNFS_OSD_VERSION_1            = 1,
++	PNFS_OSD_VERSION_2            = 2
++};
++
++struct pnfs_osd_opaque_cred {
++	u32 cred_len;
++	u8 *cred;
++};
++
++static inline int
++pnfs_osd_opaque_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	READ32(n);
++	p += XDR_QUADLEN(n);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_opaque_cred_incore_sz(u32 *p)
++{
++	u32 n;
++
++	READ32(n);
++	return XDR_QUADLEN(n) * 4;
++}
++
++enum pnfs_osd_cap_key_sec {
++	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
++	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
++};
++
++/*   struct pnfs_osd_object_cred4 {
++ *       pnfs_osd_objid4         oc_object_id;
++ *       pnfs_osd_version4       oc_osd_version;
++ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
++ *       opaque                  oc_capability_key<>;
++ *       opaque                  oc_capability<>;
++ *   };
++ */
++struct pnfs_osd_object_cred {
++	struct pnfs_osd_objid		oc_object_id;
++	u32				oc_osd_version;
++	u32				oc_cap_key_sec;
++	struct pnfs_osd_opaque_cred	oc_cap_key;
++	struct pnfs_osd_opaque_cred	oc_cap;
++};
++
++static inline int
++pnfs_osd_object_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_object_cred_incore_sz(u32 *p)
++{
++	size_t sz = sizeof(struct pnfs_osd_object_cred);
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	return sz;
++}
++
++/*   struct pnfs_osd_layout4 {
++ *       pnfs_osd_data_map4      olo_map;
++ *       uint32_t                olo_comps_index;
++ *       pnfs_osd_object_cred4   olo_components<>;
++ *   };
++ */
++struct pnfs_osd_layout {
++	struct pnfs_osd_data_map	olo_map;
++	u32				olo_comps_index;
++	u32				olo_num_comps;
++	struct pnfs_osd_object_cred	*olo_comps;
++};
++
++static inline int
++pnfs_osd_layout_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	while ((int)(n--) > 0)
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_layout_incore_sz(u32 *p)
++{
++	u32 n;
++	size_t sz;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	sz = sizeof(struct pnfs_osd_layout);
++	while ((int)(n--) > 0) {
++		sz += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++	return sz;
++}
++
++/* Device Address */
++
++enum pnfs_osd_targetid_type {
++	OBJ_TARGET_ANON = 1,
++	OBJ_TARGET_SCSI_NAME = 2,
++	OBJ_TARGET_SCSI_DEVICE_ID = 3,
++};
++
++/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
++ *       case OBJ_TARGET_SCSI_NAME:
++ *           string              oti_scsi_name<>;
++ *
++ *       case OBJ_TARGET_SCSI_DEVICE_ID:
++ *           opaque              oti_scsi_device_id<>;
++ *
++ *       default:
++ *           void;
++ *   };
++ *
++ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
++ *       case TRUE:
++ *           netaddr4            ota_netaddr;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_deviceaddr4 {
++ *       pnfs_osd_targetid4      oda_targetid;
++ *       pnfs_osd_targetaddr4    oda_targetaddr;
++ *       uint64_t                oda_lun;
++ *       opaque                  oda_systemid<>;
++ *       pnfs_osd_object_cred4   oda_root_obj_cred;
++ *       opaque                  oda_osdname<>;
++ *   };
++ */
++struct pnfs_osd_targetid {
++	u32				oti_type;
++	struct nfs4_string		oti_scsi_device_id;
++};
++
++enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
++
++/*   struct netaddr4 {
++ *       // see struct rpcb in RFC1833
++ *       string r_netid<>;    // network id
++ *       string r_addr<>;     // universal address
++ *   };
++ */
++struct pnfs_osd_net_addr {
++	struct nfs4_string	r_netid;
++	struct nfs4_string	r_addr;
++};
++
++struct pnfs_osd_targetaddr {
++	u32				ota_available;
++	struct pnfs_osd_net_addr	ota_netaddr;
++};
++
++enum {
++	NETWORK_ID_MAX = 16 / 4,
++	UNIVERSAL_ADDRESS_MAX = 64 / 4,
++	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
++};
++
++struct pnfs_osd_deviceaddr {
++	struct pnfs_osd_targetid	oda_targetid;
++	struct pnfs_osd_targetaddr	oda_targetaddr;
++	u8				oda_lun[8];
++	struct nfs4_string		oda_systemid;
++	struct pnfs_osd_object_cred	oda_root_obj_cred;
++	struct nfs4_string		oda_osdname;
++};
++
++enum {
++	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
++	PNFS_OSD_DEVICEADDR_MAX =
++		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
++		2 /*oda_lun*/ +
++		1 + OSD_SYSTEMID_LEN +
++		1 + ODA_OSDNAME_MAX,
++};
++
++/* LAYOUTCOMMIT: layoutupdate */
++
++/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
++ *       case TRUE:
++ *           int64_t     dsu_delta;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_layoutupdate4 {
++ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
++ *       bool                        olu_ioerr_flag;
++ *   };
++ */
++struct pnfs_osd_layoutupdate {
++	u32	dsu_valid;
++	s64	dsu_delta;
++	u32	olu_ioerr_flag;
++};
++
++/* LAYOUTRETURN: I/O Rrror Report */
++
++enum pnfs_osd_errno {
++	PNFS_OSD_ERR_EIO		= 1,
++	PNFS_OSD_ERR_NOT_FOUND		= 2,
++	PNFS_OSD_ERR_NO_SPACE		= 3,
++	PNFS_OSD_ERR_BAD_CRED		= 4,
++	PNFS_OSD_ERR_NO_ACCESS		= 5,
++	PNFS_OSD_ERR_UNREACHABLE	= 6,
++	PNFS_OSD_ERR_RESOURCE		= 7
++};
++
++/*   struct pnfs_osd_ioerr4 {
++ *       pnfs_osd_objid4     oer_component;
++ *       length4             oer_comp_offset;
++ *       length4             oer_comp_length;
++ *       bool                oer_iswrite;
++ *       pnfs_osd_errno4     oer_errno;
++ *   };
++ */
++struct pnfs_osd_ioerr {
++	struct pnfs_osd_objid	oer_component;
++	u64			oer_comp_offset;
++	u64			oer_comp_length;
++	u32			oer_iswrite;
++	u32			oer_errno;
++};
++
++static inline unsigned
++pnfs_osd_ioerr_xdr_sz(void)
++{
++	return pnfs_osd_objid_xdr_sz() + 2 + 2 + 1 + 1;
++}
++
++/* OSD XDR API */
++
++/* Layout helpers */
++extern struct pnfs_osd_layout *pnfs_osd_xdr_decode_layout(
++	struct pnfs_osd_layout *layout, u32 *p);
++
++extern int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *layout);
++
++/* Device Info helpers */
++
++/* First pass calculate total size for space needed */
++extern size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p);
++
++/* Note: some strings pointed to inside @deviceaddr might point
++ * to space inside @p. @p should stay valid while @deviceaddr
++ * is in use.
++ * It is assumed that @deviceaddr points to bigger memory of size
++ * calculated in first pass by pnfs_osd_xdr_deviceaddr_incore_sz()
++ */
++extern void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p);
++
++/* For Servers */
++extern int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr);
++
++/* layoutupdate (layout_commit) xdr helpers */
++extern int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou);
++extern __be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p);
++
++/* osd_ioerror encoding/decoding (layout_return) */
++extern int
++pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr, struct pnfs_osd_ioerr *ioerr);
++extern __be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p);
++
++#endif /* __PNFS_OSD_XDR_H__ */
+diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
+--- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-31 20:42:05.601087875 -0400
+@@ -8,6 +8,7 @@
+ #ifndef __LINUX_POSIX_ACL_H
+ #define __LINUX_POSIX_ACL_H
+ 
++#include <linux/fs.h>
+ #include <linux/slab.h>
+ 
+ #define ACL_UNDEFINED_ID	(-1)
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-31 20:42:05.602100892 -0400
+@@ -14,6 +14,8 @@
+ /* size of an XDR encoding unit in bytes, i.e. 32bit */
+ #define XDR_UNIT	(4)
+ 
++#include <linux/types.h>
++
+ /* spec defines authentication flavor as an unsigned 32 bit integer */
+ typedef u32	rpc_authflavor_t;
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-31 20:42:05.603108001 -0400
+@@ -3,6 +3,7 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/fs.h>
+ #include <linux/workqueue.h>
+ 
+ struct rpc_pipe_msg {
+@@ -11,6 +12,10 @@ struct rpc_pipe_msg {
+ 	size_t len;
+ 	size_t copied;
+ 	int errno;
++#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
++#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
++#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
++	u8 flags;
+ };
+ 
+ struct rpc_pipe_ops {
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-31 20:42:05.603108001 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-31 20:42:05.603108001 -0400
+@@ -0,0 +1,111 @@
++/*
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#ifndef _SIMPLE_RPC_PIPEFS_H_
++#define _SIMPLE_RPC_PIPEFS_H_
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++
++
++#define payload_of(headerp)  ((void *)(headerp + 1))
++
++/*
++ * struct pipefs_hdr -- the generic message format for simple_rpc_pipefs.
++ * Messages may simply be the header itself, although having an optional
++ * data payload follow the header allows much more flexibility.
++ *
++ * Messages are created using pipefs_alloc_init_msg() and
++ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
++ * (optional) data payload.
++ *
++ * Given a struct pipefs_hdr *msg that has a struct foo payload, the data
++ * can be accessed using: struct foo *foop = payload_of(msg)
++ */
++struct pipefs_hdr {
++	u32 msgid;
++	u8  type;
++	u8  flags;
++	u16 totallen; /* length of entire message, including hdr itself */
++	u32 status;
++};
++
++/*
++ * struct pipefs_list -- a type of list used for tracking callers who've made an
++ * upcall and are blocked waiting for a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
++ */
++struct pipefs_list {
++	struct list_head list;
++	spinlock_t list_lock;
++};
++
++
++/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
++extern struct dentry *pipefs_mkpipe(const char *name,
++				    const struct rpc_pipe_ops *ops,
++				    int wait_for_open);
++extern void pipefs_closepipe(struct dentry *pipe);
++extern void pipefs_init_list(struct pipefs_list *list);
++extern struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++						void *data, u16 datalen);
++extern struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
++						       u8 flags, void *data,
++						       u16 datalen, u16 padlen);
++extern struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++							struct pipefs_hdr *msg,
++							struct pipefs_list
++							*uplist, u8 upflags,
++							u32 timeout);
++extern int pipefs_queue_upcall_noreply(struct dentry *pipe,
++				       struct pipefs_hdr *msg, u8 upflags);
++extern int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++				      struct pipefs_list *uplist);
++extern struct pipefs_hdr *pipefs_readmsg(struct file *filp,
++					 const char __user *src, size_t len);
++extern ssize_t pipefs_generic_upcall(struct file *filp,
++				     struct rpc_pipe_msg *rpcmsg,
++				     char __user *dst, size_t buflen);
++extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
++
++#endif /* _SIMPLE_RPC_PIPEFS_H_ */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-31 20:42:05.604049784 -0400
+@@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
+ 
+ 	return buf;
+ }
++
++/*
++ * Print a network address in a universal format (see rfc1833 and nfsv4.1)
++ */
++static inline int __svc_print_netaddr(struct sockaddr *addr,
++				      struct xdr_netobj *na)
++{
++	u16 port;
++	ssize_t len;
++
++	switch (addr->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *sin = (struct sockaddr_in *)addr;
++		port = ntohs(sin->sin_port);
++
++		len = snprintf(na->data, na->len, "%pI4.%u.%u",
++				&sin->sin_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
++		port = ntohs(sin6->sin6_port);
++
++		len = snprintf(na->data, na->len, "%pI6.%u.%u",
++				&sin6->sin6_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	default:
++		snprintf(na->data, na->len, "unknown address type: %d",
++			 addr->sa_family);
++		len = -EINVAL;
++		break;
++	}
++	return len;
++}
+ #endif /* SUNRPC_SVC_XPRT_H */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-31 20:41:19.173118431 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-31 20:42:05.605107904 -0400
+@@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
+ 	return p + 2;
+ }
+ 
++static inline __be32 *
++xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
++{
++	memcpy(ptr, p, len);
++	return p + XDR_QUADLEN(len);
++}
++
+ /*
+  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
+  */
+@@ -197,6 +204,7 @@ struct xdr_stream {
+ 
+ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
++extern __be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q);
+ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
+ 		unsigned int base, unsigned int len);
+ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
+--- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-31 20:42:05.605107904 -0400
++++ linux-2.6.34.noarch/localversion-pnfs	2010-08-31 20:42:05.605107904 -0400
+@@ -0,0 +1 @@
++-pnfs
+diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-31 20:42:05.606020148 -0400
+@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
+ 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+ 	    addr.o rpcb_clnt.o timer.o xdr.o \
+ 	    sunrpc_syms.o cache.o rpc_pipe.o \
+-	    svc_xprt.o
++	    svc_xprt.o simple_rpc_pipefs.o
+ sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+ sunrpc-$(CONFIG_PROC_FS) += stats.o
+ sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-31 20:42:05.606020148 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-31 20:42:05.607108065 -0400
+@@ -0,0 +1,424 @@
++/*
++ *  net/sunrpc/simple_rpc_pipefs.c
++ *
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#include <linux/completion.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++
++/*
++ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
++ * filesystem.
++ *
++ * If @wait_for_open is non-zero and an upcall is later queued but the userland
++ * end of the pipe has not yet been opened, the upcall will remain queued until
++ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
++ */
++struct dentry *pipefs_mkpipe(const char *name, const struct rpc_pipe_ops *ops,
++			     int wait_for_open)
++{
++	struct dentry *dir, *pipe;
++	struct vfsmount *mnt;
++
++	mnt = rpc_get_mount();
++	if (IS_ERR(mnt)) {
++		pipe = ERR_CAST(mnt);
++		goto out;
++	}
++	dir = mnt->mnt_root;
++	if (!dir) {
++		pipe = ERR_PTR(-ENOENT);
++		goto out;
++	}
++	pipe = rpc_mkpipe(dir, name, NULL, ops,
++			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
++out:
++	return pipe;
++}
++EXPORT_SYMBOL(pipefs_mkpipe);
++
++/*
++ * Shutdown a pipe made by pipefs_mkpipe().
++ * XXX: do we need to retain an extra reference on the mount?
++ */
++void pipefs_closepipe(struct dentry *pipe)
++{
++	rpc_unlink(pipe);
++	rpc_put_mount();
++}
++EXPORT_SYMBOL(pipefs_closepipe);
++
++/*
++ * Initialize a struct pipefs_list -- which are a way to keep track of callers
++ * who're blocked having made an upcall and are awaiting a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
++ * to use them.
++ */
++inline void pipefs_init_list(struct pipefs_list *list)
++{
++	INIT_LIST_HEAD(&list->list);
++	spin_lock_init(&list->list_lock);
++}
++EXPORT_SYMBOL(pipefs_init_list);
++
++/*
++ * Alloc/init a generic pipefs message header and copy into its message body
++ * an arbitrary data payload.
++ *
++ * struct pipefs_hdr's are meant to serve as generic, general-purpose message
++ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
++ * struct pipefs_hdr is assigned to a struct rpc_pipe_msg and delivered
++ * therein.  --And yes, the naming can seem a little confusing at first:
++ *
++ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
++ * struct pipefs_hdr (possibly with an attached message body).  A
++ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
++ * message is delivered and processed.
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
++					   void *data, u16 datalen, u16 padlen)
++{
++	u16 totallen;
++	struct pipefs_hdr *msg = NULL;
++
++	totallen = sizeof(*msg) + datalen + padlen;
++	if (totallen > PAGE_SIZE) {
++		msg = ERR_PTR(-E2BIG);
++		goto out;
++	}
++
++	msg = kzalloc(totallen, GFP_KERNEL);
++	if (!msg) {
++		msg = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	msg->msgid = msgid;
++	msg->type = type;
++	msg->flags = flags;
++	msg->totallen = totallen;
++	memcpy(payload_of(msg), data, datalen);
++out:
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
++
++/*
++ * See the description of pipefs_alloc_init_msg_padded().
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++				    void *data, u16 datalen)
++{
++	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
++					    datalen, 0);
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg);
++
++
++static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg,
++			       struct pipefs_hdr *msg, u8 upflags)
++{
++	memset(rpcmsg, 0, sizeof(*rpcmsg));
++	rpcmsg->data = msg;
++	rpcmsg->len = msg->totallen;
++	rpcmsg->flags = upflags;
++}
++
++static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(struct pipefs_hdr *msg,
++						     u8 upflags)
++{
++	struct rpc_pipe_msg *rpcmsg;
++
++	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
++	if (!rpcmsg)
++		return ERR_PTR(-ENOMEM);
++
++	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
++	return rpcmsg;
++}
++
++
++/* represents an upcall that'll block and wait for a reply */
++struct pipefs_upcall {
++	u32 msgid;
++	struct rpc_pipe_msg rpcmsg;
++	struct list_head list;
++	wait_queue_head_t waitq;
++	struct pipefs_hdr *reply;
++};
++
++
++static void pipefs_init_upcall_waitreply(struct pipefs_upcall *upcall,
++					 struct pipefs_hdr *msg, u8 upflags)
++{
++	upcall->reply = NULL;
++	upcall->msgid = msg->msgid;
++	INIT_LIST_HEAD(&upcall->list);
++	init_waitqueue_head(&upcall->waitq);
++	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
++}
++
++static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					   struct pipefs_upcall *upcall,
++					   struct pipefs_list *uplist,
++					   u32 timeout)
++{
++	int err = 0;
++	DECLARE_WAITQUEUE(wq, current);
++
++	add_wait_queue(&upcall->waitq, &wq);
++	spin_lock(&uplist->list_lock);
++	list_add(&upcall->list, &uplist->list);
++	spin_unlock(&uplist->list_lock);
++
++	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
++	if (err < 0)
++		goto out;
++
++	if (timeout) {
++		/* retval of 0 means timer expired */
++		err = schedule_timeout_uninterruptible(timeout);
++		if (err == 0 && upcall->reply == NULL)
++			err = -ETIMEDOUT;
++	} else {
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule();
++		__set_current_state(TASK_RUNNING);
++	}
++
++out:
++	spin_lock(&uplist->list_lock);
++	list_del_init(&upcall->list);
++	spin_unlock(&uplist->list_lock);
++	remove_wait_queue(&upcall->waitq, &wq);
++	return err;
++}
++
++/*
++ * Queue a pipefs msg for an upcall to userspace, place the calling thread
++ * on @uplist, and block the thread to wait for a reply.  If @timeout is
++ * nonzero, the thread will be blocked for at most @timeout jiffies.
++ *
++ * (To convert time units into jiffies, consider the functions
++ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
++ *  timespec_to_jiffies().)
++ *
++ * Once a reply is received by your downcall handler, call
++ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
++ * assign the reply, and wake the waiting thread.
++ *
++ * This function's return value pointer may be an error and should be checked
++ * with IS_ERR() before attempting to access the reply message.
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					    struct pipefs_hdr *msg,
++					    struct pipefs_list *uplist,
++					    u8 upflags, u32 timeout)
++{
++	int err = 0;
++	struct pipefs_upcall upcall;
++
++	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
++	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
++	if (err < 0) {
++		kfree(upcall.reply);
++		upcall.reply = ERR_PTR(err);
++	}
++
++	return upcall.reply;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
++
++/*
++ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
++ * no reply is expected).
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++int pipefs_queue_upcall_noreply(struct dentry *pipe, struct pipefs_hdr *msg,
++				u8 upflags)
++{
++	int err = 0;
++	struct rpc_pipe_msg *rpcmsg;
++
++	upflags |= PIPEFS_AUTOFREE_RPCMSG;
++	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
++	if (IS_ERR(rpcmsg)) {
++		err = PTR_ERR(rpcmsg);
++		goto out;
++	}
++	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
++
++
++static struct pipefs_upcall *pipefs_find_upcall_msgid(u32 msgid,
++						 struct pipefs_list *uplist)
++{
++	struct pipefs_upcall *upcall;
++
++	spin_lock(&uplist->list_lock);
++	list_for_each_entry(upcall, &uplist->list, list)
++		if (upcall->msgid == msgid)
++			goto out;
++	upcall = NULL;
++out:
++	spin_unlock(&uplist->list_lock);
++	return upcall;
++}
++
++/*
++ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
++ * message and have determined that it is a reply to a waiting upcall,
++ * you can use this function to find the appropriate upcall, assign the result,
++ * and wake the upcall thread.
++ *
++ * The reply message must have the same msgid as the original upcall message's.
++ *
++ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
++ */
++int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++			       struct pipefs_list *uplist)
++{
++	int err = 0;
++	struct pipefs_upcall *upcall;
++
++	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
++	if (!upcall) {
++		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
++			"for msgid %d\n", __func__, reply->msgid);
++		err = -ENOENT;
++		goto out;
++	}
++	upcall->reply = reply;
++	wake_up(&upcall->waitq);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_assign_upcall_reply);
++
++/*
++ * Generic method to read-in and return a newly-allocated message which begins
++ * with a struct pipefs_hdr.
++ */
++struct pipefs_hdr *pipefs_readmsg(struct file *filp, const char __user *src,
++			     size_t len)
++{
++	int err = 0, hdrsize;
++	struct pipefs_hdr *msg = NULL;
++
++	hdrsize = sizeof(*msg);
++	if (len < hdrsize) {
++		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
++		       __func__, (int) len, hdrsize);
++		err = -EINVAL;
++		goto out;
++	}
++
++	msg = kzalloc(len, GFP_KERNEL);
++	if (!msg) {
++		err = -ENOMEM;
++		goto out;
++	}
++	if (copy_from_user(msg, src, len))
++		err = -EFAULT;
++out:
++	if (err) {
++		kfree(msg);
++		msg = ERR_PTR(err);
++	}
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_readmsg);
++
++/*
++ * Generic rpc_pipe_ops->upcall() handler implementation.
++ *
++ * Don't call this directly: to make an upcall, use
++ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
++ */
++ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
++			      char __user *dst, size_t buflen)
++{
++	char *data;
++	ssize_t len, left;
++
++	data = (char *)rpcmsg->data + rpcmsg->copied;
++	len = rpcmsg->len - rpcmsg->copied;
++	if (len > buflen)
++		len = buflen;
++
++	left = copy_to_user(dst, data, len);
++	if (left < 0) {
++		rpcmsg->errno = left;
++		return left;
++	}
++
++	len -= left;
++	rpcmsg->copied += len;
++	rpcmsg->errno = 0;
++	return len;
++}
++EXPORT_SYMBOL(pipefs_generic_upcall);
++
++/*
++ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
++ *
++ * Items are only freed if @rpcmsg->flags has been set appropriately.
++ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
++ */
++void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
++{
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
++		kfree(rpcmsg->data);
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
++		kfree(rpcmsg);
++}
++EXPORT_SYMBOL(pipefs_generic_destroy_msg);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-31 20:41:19.188144022 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-31 20:42:05.607108065 -0400
+@@ -395,24 +395,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
+ {
+ 	struct kvec *tail;
+ 	size_t copy;
+-	char *p;
+ 	unsigned int pglen = buf->page_len;
++	unsigned int tailbuf_len;
+ 
+ 	tail = buf->tail;
+ 	BUG_ON (len > pglen);
+ 
++	tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
++
+ 	/* Shift the tail first */
+-	if (tail->iov_len != 0) {
+-		p = (char *)tail->iov_base + len;
++	if (tailbuf_len != 0) {
++		unsigned int free_space = tailbuf_len - tail->iov_len;
++
++		if (len < free_space)
++			free_space = len;
++		tail->iov_len += free_space;
++
++		copy = len;
+ 		if (tail->iov_len > len) {
+-			copy = tail->iov_len - len;
+-			memmove(p, tail->iov_base, copy);
++			char *p = (char *)tail->iov_base + len;
++			memmove(p, tail->iov_base, tail->iov_len - len);
+ 		} else
+-			buf->buflen -= len;
+-		/* Copy from the inlined pages into the tail */
+-		copy = len;
+-		if (copy > tail->iov_len)
+ 			copy = tail->iov_len;
++		/* Copy from the inlined pages into the tail */
+ 		_copy_from_pages((char *)tail->iov_base,
+ 				buf->pages, buf->page_base + pglen - len,
+ 				copy);
+@@ -496,6 +501,27 @@ __be32 * xdr_reserve_space(struct xdr_st
+ EXPORT_SYMBOL_GPL(xdr_reserve_space);
+ 
+ /**
++ * xdr_rewind_stream - rewind a stream back to some checkpoint
++ * @xdr: pointer to xdr_stream
++ * @q: some checkpoint at historical place of @xdr
++ *
++ * Restors an xdr stream to some historical point. @q must be
++ * a logical xdr point in the past that was sampled by @q = @xdr->p.
++ */
++__be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q)
++{
++	size_t nbytes = (xdr->p - q) << 2;
++
++	BUG_ON(xdr->p < q);
++	BUG_ON(nbytes > xdr->iov->iov_len || nbytes > xdr->buf->len);
++	xdr->p = q;
++	xdr->iov->iov_len -= nbytes;
++	xdr->buf->len -= nbytes;
++	return q;
++}
++EXPORT_SYMBOL_GPL(xdr_rewind_stream);
++
++/**
+  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
+  * @xdr: pointer to xdr_stream
+  * @pages: list of pages

From c368aef481d0efe35e12a274d914417e1beb972e Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Sat, 4 Sep 2010 09:23:12 -0400
Subject: [PATCH 10/20] Removed localversion-nfs file

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 pnfs-all-2.6.35-2010-08-24-f13.patch | 393 +++++++++++++--------------
 1 file changed, 194 insertions(+), 199 deletions(-)

diff --git a/pnfs-all-2.6.35-2010-08-24-f13.patch b/pnfs-all-2.6.35-2010-08-24-f13.patch
index 17d1c844d..7d82d9fa4 100644
--- a/pnfs-all-2.6.35-2010-08-24-f13.patch
+++ b/pnfs-all-2.6.35-2010-08-24-f13.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
---- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-31 20:41:16.924243041 -0400
-+++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-31 20:42:05.486160576 -0400
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-09-04 09:20:04.110038647 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-09-04 09:21:44.875202803 -0400
 @@ -13,6 +13,7 @@
  #include <sys/stat.h>
  #include <sys/mman.h>
@@ -11,7 +11,7 @@ diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arc
  #include "os.h"
 diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
 --- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/block/genhd.c	2010-08-31 20:42:05.487160201 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-09-04 09:21:44.875202803 -0400
 @@ -1009,6 +1009,7 @@ static void disk_release(struct device *
  struct class block_class = {
  	.name		= "block",
@@ -21,8 +21,8 @@ diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.
  static char *block_devnode(struct device *dev, mode_t *mode)
  {
 diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
---- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-31 20:42:05.486160576 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-31 20:42:05.486160576 -0400
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-09-04 09:21:44.876222743 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-09-04 09:21:44.876222743 -0400
 @@ -0,0 +1,211 @@
 +(c) 2007 Network Appliance Inc.
 +
@@ -236,8 +236,8 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.
 +
 +
 diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
---- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-08-31 20:41:17.063232968 -0400
-+++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-31 20:42:05.488160560 -0400
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-09-04 09:20:04.252180557 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-09-04 09:21:44.877242928 -0400
 @@ -657,6 +657,12 @@ static int dev_create(struct dm_ioctl *p
  	return r;
  }
@@ -292,7 +292,7 @@ diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/driv
  	int r;
 diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
 --- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-31 20:42:05.489160594 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-09-04 09:21:44.879035601 -0400
 @@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
  	put_device(&class_to_shost(dev)->shost_gendev);
  }
@@ -304,7 +304,7 @@ diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drive
  };
 diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
 --- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-31 20:42:05.492243039 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-09-04 09:21:44.879035601 -0400
 @@ -36,13 +36,9 @@
  #include <linux/fs.h>
  #include <linux/time.h>
@@ -360,8 +360,8 @@ diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/
 +
  #endif
 diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
---- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-31 20:42:05.493222759 -0400
-+++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-31 20:42:05.493222759 -0400
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-09-04 09:21:44.880171068 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-09-04 09:21:44.880171068 -0400
 @@ -0,0 +1,396 @@
 +/*
 + * export.c - Implementation of the pnfs_export_operations
@@ -761,7 +761,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs
 +}
 diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
 --- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-31 20:42:05.494222756 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-09-04 09:21:44.881160952 -0400
 @@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
  	if (unlikely(wait_obj_created(oi)))
  		goto fail;
@@ -781,7 +781,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/
   * Fill in an inode read from the OSD and set it up for use
 diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
 --- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-31 20:42:05.490222933 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-09-04 09:21:44.882160660 -0400
 @@ -13,4 +13,5 @@
  #
  
@@ -790,7 +790,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/K
  obj-$(CONFIG_EXOFS_FS) += exofs.o
 diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
 --- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-31 20:42:05.491232880 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-09-04 09:21:44.883039027 -0400
 @@ -1,6 +1,7 @@
  config EXOFS_FS
  	tristate "exofs: OSD based file system support"
@@ -801,7 +801,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/
  	  as its backing storage.
 diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
 --- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-31 20:42:05.496073173 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-09-04 09:21:44.883039027 -0400
 @@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
  	sb->s_fs_info = sbi;
  	sb->s_op = &exofs_sops;
@@ -812,7 +812,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/
  		EXOFS_ERR("ERROR: exofs_iget failed\n");
 diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
 --- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-31 20:42:05.497212975 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-09-04 09:21:44.884180594 -0400
 @@ -16,6 +16,13 @@
  #include <linux/namei.h>
  #include <linux/sched.h>
@@ -829,7 +829,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exp
  
 diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
 --- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-31 20:42:05.496073173 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-09-04 09:21:44.885160697 -0400
 @@ -3,4 +3,7 @@
  
  obj-$(CONFIG_EXPORTFS) += exportfs.o
@@ -840,8 +840,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/ex
 +exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
 +exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-31 20:42:05.497212975 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-09-04 09:21:44.885160697 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-09-04 09:21:44.885160697 -0400
 @@ -0,0 +1,158 @@
 +/*
 + *  linux/fs/nfsd/nfs4blocklayoutxdr.c
@@ -1002,8 +1002,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.
 +}
 +EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-31 20:42:05.498113655 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-09-04 09:21:44.886051895 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-09-04 09:21:44.886051895 -0400
 @@ -0,0 +1,218 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -1224,8 +1224,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.n
 +}
 +EXPORT_SYMBOL(filelayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
---- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-31 20:42:05.499125509 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-31 20:42:05.499125509 -0400
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-09-04 09:21:44.887054758 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-09-04 09:21:44.887054758 -0400
 @@ -0,0 +1,289 @@
 +/*
 + *  pnfs_osd_xdr_enc.c
@@ -1518,7 +1518,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.no
 +EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
 diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
 --- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-31 20:42:05.500123860 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-09-04 09:21:44.888035389 -0400
 @@ -19,6 +19,7 @@
  #include <linux/gfs2_ondisk.h>
  #include <linux/slow-work.h>
@@ -1539,7 +1539,7 @@ diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gf
  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
 --- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/Kconfig	2010-08-31 20:42:05.490222933 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-09-04 09:21:44.889035490 -0400
 @@ -224,6 +224,31 @@ config LOCKD_V4
  config EXPORTFS
  	tristate
@@ -1573,8 +1573,8 @@ diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
  	tristate
  	select FS_POSIX_ACL
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-31 20:42:05.503222878 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-31 20:42:05.503222878 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-09-04 09:21:44.890035431 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-09-04 09:21:44.890035431 -0400
 @@ -0,0 +1,66 @@
 +#include <linux/module.h>
 +#include <linux/uaccess.h>
@@ -1643,8 +1643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.or
 +	return;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-31 20:42:05.504232855 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-31 20:42:05.504232855 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-09-04 09:21:44.891045310 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-09-04 09:21:44.891045310 -0400
 @@ -0,0 +1,1160 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.c
@@ -2807,8 +2807,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.
 +module_init(nfs4blocklayout_init);
 +module_exit(nfs4blocklayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-31 20:42:05.506119071 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-31 20:42:05.506119071 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-09-04 09:21:44.892025716 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-09-04 09:21:44.892025716 -0400
 @@ -0,0 +1,335 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdev.c
@@ -3146,8 +3146,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.
 +	goto out;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-31 20:42:05.506119071 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-31 20:42:05.506119071 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-09-04 09:21:44.893035500 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-09-04 09:21:44.893035500 -0400
 @@ -0,0 +1,120 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdm.c
@@ -3270,8 +3270,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.3
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-31 20:42:05.505169618 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-31 20:42:05.505169618 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-09-04 09:21:44.894045279 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-09-04 09:21:44.894045279 -0400
 @@ -0,0 +1,302 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -3576,8 +3576,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.
 +
 +#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-31 20:42:05.507113260 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-31 20:42:05.508119925 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-09-04 09:21:44.895035248 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-09-04 09:21:44.895035248 -0400
 @@ -0,0 +1,948 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -4528,8 +4528,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noar
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
---- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-31 20:42:05.502212803 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-31 20:42:05.502212803 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-09-04 09:21:44.895035248 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-09-04 09:21:44.896025369 -0400
 @@ -0,0 +1,6 @@
 +#
 +# Makefile for the pNFS block layout driver kernel module
@@ -4539,7 +4539,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarc
 +			extents.o block-device-discovery-pipe.o
 diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
 --- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-31 20:42:05.508119925 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-09-04 09:21:44.896025369 -0400
 @@ -111,6 +111,13 @@ extern int nfs41_validate_delegation_sta
  
  #define RCA4_TYPE_MASK_RDATA_DLG	0
@@ -4596,7 +4596,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/c
  extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
 --- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-31 20:42:05.509093330 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-09-04 09:21:44.897056128 -0400
 @@ -8,10 +8,15 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -5079,7 +5079,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/
  	return status;
 diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
 --- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-31 20:42:05.510143651 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-09-04 09:21:44.898072186 -0400
 @@ -22,6 +22,8 @@
  #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
  
@@ -5281,8 +5281,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/n
  		.process_op = (callback_process_op_t)nfs4_callback_sequence,
  		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
---- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-31 20:41:19.144140225 -0400
-+++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-31 20:42:05.511222861 -0400
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-09-04 09:20:05.988202702 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-09-04 09:21:44.900025165 -0400
 @@ -39,6 +39,7 @@
  #include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
@@ -5491,8 +5491,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/cli
  		goto error;
  
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
---- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-31 20:42:05.550110844 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-31 20:42:05.550110844 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-09-04 09:21:44.900025165 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-09-04 09:21:44.901035455 -0400
 @@ -0,0 +1,292 @@
 +#if defined(CONFIG_SPNFS_BLOCK)
 +
@@ -5787,8 +5787,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +}
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
---- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-31 20:42:05.551222888 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-31 20:42:05.551222888 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-09-04 09:21:44.902035254 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-09-04 09:21:44.902035254 -0400
 @@ -0,0 +1,1672 @@
 +/*
 + *  bl_ops.c
@@ -7463,8 +7463,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
---- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-31 20:41:19.144140225 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-31 20:42:05.512106042 -0400
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-09-04 09:20:05.988202702 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-09-04 09:21:44.903025737 -0400
 @@ -104,7 +104,8 @@ again:
  			continue;
  		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
@@ -7541,7 +7541,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs
  	rcu_read_unlock();
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
 --- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-31 20:42:05.513114811 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-09-04 09:21:44.904035627 -0400
 @@ -34,9 +34,7 @@ enum {
  int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
  void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -7554,8 +7554,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs
  
  struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
---- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-31 20:41:19.196140434 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-31 20:42:05.553222784 -0400
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-09-04 09:20:06.039203080 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-09-04 09:21:44.905045348 -0400
 @@ -17,11 +17,19 @@
  #include <linux/module.h>
  #include <linux/exportfs.h>
@@ -7733,7 +7733,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  	svcauth_unix_purge();
 diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
 --- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-31 20:42:05.514196343 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-09-04 09:21:44.906025356 -0400
 @@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
  	.rpc_release = nfs_direct_read_release,
  };
@@ -7979,7 +7979,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/dir
  		user_addr += bytes;
 diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
 --- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-31 20:42:05.549222922 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-09-04 09:21:44.907035472 -0400
 @@ -79,3 +79,52 @@ config NFSD_V4
  	  available from http://linux-nfs.org/.
  
@@ -8035,7 +8035,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kc
 +	  If unsure, say N.
 diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
 --- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-31 20:42:05.549222922 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-09-04 09:21:44.907035472 -0400
 @@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
@@ -8045,8 +8045,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/M
 +nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
 +nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-31 20:41:19.197150385 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-31 20:42:05.554114789 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-09-04 09:20:06.040212867 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-09-04 09:21:44.908055511 -0400
 @@ -40,7 +40,6 @@
  
  #define NFSPROC4_CB_NULL 0
@@ -8586,8 +8586,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
 +}
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-31 20:42:05.556172071 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-31 20:42:05.556172071 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-09-04 09:21:44.910025108 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-09-04 09:21:44.910025108 -0400
 @@ -0,0 +1,1679 @@
 +/******************************************************************************
 + *
@@ -10269,8 +10269,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfs
 +	return status;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-31 20:42:05.557222774 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-31 20:42:05.557222774 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-09-04 09:21:44.911025728 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-09-04 09:21:44.911025728 -0400
 @@ -0,0 +1,461 @@
 +/******************************************************************************
 + *
@@ -10734,8 +10734,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/n
 +};
 +EXPORT_SYMBOL(pnfs_dlm_export_ops);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-31 20:42:05.558141620 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-31 20:42:05.558141620 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-09-04 09:21:44.912035398 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-09-04 09:21:44.912035398 -0400
 @@ -0,0 +1,620 @@
 +/*
 +*  linux/fs/nfsd/nfs4pnfsds.c
@@ -11358,8 +11358,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nf
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-31 20:41:19.198160463 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-31 20:42:05.559129617 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-09-04 09:20:06.041223204 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-09-04 09:21:44.913035888 -0400
 @@ -34,10 +34,14 @@
   */
  #include <linux/file.h>
@@ -11834,8 +11834,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-31 20:41:19.200150153 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-31 20:42:05.561202607 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-09-04 09:20:06.043212709 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-09-04 09:21:44.916015197 -0400
 @@ -42,6 +42,8 @@
  #include "xdr4.h"
  #include "vfs.h"
@@ -12351,8 +12351,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
  }
  
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-31 20:41:19.202150173 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-31 20:42:05.563232916 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-09-04 09:20:06.045212665 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-09-04 09:21:44.918025318 -0400
 @@ -47,9 +47,14 @@
  #include <linux/nfsd_idmap.h>
  #include <linux/nfs4_acl.h>
@@ -12971,8 +12971,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
  	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
  	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
---- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-31 20:41:19.203150982 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-31 20:42:05.565212801 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-09-04 09:20:06.047233081 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-09-04 09:21:44.920025397 -0400
 @@ -13,10 +13,15 @@
  #include <linux/nfsd/syscall.h>
  #include <linux/lockd/lockd.h>
@@ -13149,8 +13149,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  	remove_proc_entry("fs/nfs/exports", NULL);
  	remove_proc_entry("fs/nfs", NULL);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
---- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-31 20:41:19.204160960 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-31 20:42:05.565212801 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-09-04 09:20:06.047233081 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-09-04 09:21:44.920025397 -0400
 @@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
  #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
  	NFSD4_SUPPORTED_ATTRS_WORD0
@@ -13172,7 +13172,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-31 20:42:05.566222921 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-09-04 09:21:44.921045937 -0400
 @@ -10,6 +10,7 @@
  #include <linux/exportfs.h>
  
@@ -13210,7 +13210,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nf
  		__u32 tfh[2];
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-31 20:42:05.567233002 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-09-04 09:21:44.922035547 -0400
 @@ -14,6 +14,7 @@ enum nfsd_fsid {
  	FSID_UUID8,
  	FSID_UUID16,
@@ -13263,8 +13263,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nf
 +
  #endif /* _LINUX_NFSD_FH_INT_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
---- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-31 20:41:17.274232911 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-31 20:42:05.568144414 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-09-04 09:20:04.514160362 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-09-04 09:21:44.923045353 -0400
 @@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
  
  };
@@ -13275,8 +13275,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/n
  int nfsd_vers(int vers, enum vers_op change)
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
---- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-31 20:42:05.569090615 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-31 20:42:05.569090615 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-09-04 09:21:44.923045353 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-09-04 09:21:44.923045353 -0400
 @@ -0,0 +1,143 @@
 +/*
 + *  Copyright (c) 2005 The Regents of the University of Michigan.
@@ -13422,8 +13422,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pn
 +
 +#endif /* LINUX_NFSD_PNFSD_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
---- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-31 20:42:05.569090615 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-31 20:42:05.569090615 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-09-04 09:21:44.924046083 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-09-04 09:21:44.924046083 -0400
 @@ -0,0 +1,225 @@
 +/*
 + * linux/fs/nfsd/pnfs_lexp.c
@@ -13651,8 +13651,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nf
 +	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-31 20:42:05.570119170 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-31 20:42:05.570119170 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-09-04 09:21:44.925035828 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-09-04 09:21:44.925035828 -0400
 @@ -0,0 +1,535 @@
 +/*
 + * fs/nfsd/spnfs_com.c
@@ -14190,8 +14190,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfs
 +}
 +#endif /* CONFIG_PROC_FS */
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-31 20:42:05.571097807 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-31 20:42:05.572091128 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-09-04 09:21:44.926030099 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-09-04 09:21:44.926030099 -0400
 @@ -0,0 +1,878 @@
 +/*
 + * fs/nfsd/spnfs_ops.c
@@ -15072,8 +15072,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfs
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
---- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-31 20:41:19.205016844 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-31 20:42:05.572091128 -0400
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-09-04 09:20:06.048233523 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-09-04 09:21:44.927025219 -0400
 @@ -242,6 +242,12 @@ struct nfs4_client {
  	u32			cl_cb_seq_nr;
  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
@@ -15190,8 +15190,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
 +
  #endif   /* NFSD4_STATE_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
---- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-31 20:41:17.275233561 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-31 20:42:05.573121119 -0400
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-09-04 09:20:04.515160297 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-09-04 09:21:44.929025356 -0400
 @@ -37,7 +37,12 @@
  #ifdef CONFIG_NFSD_V4
  #include <linux/nfs4_acl.h>
@@ -15318,8 +15318,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.
  out_nfserr:
  	err = nfserrno(host_err);
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
---- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-31 20:41:19.206170424 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-31 20:42:05.575139084 -0400
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-09-04 09:20:06.049232898 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-09-04 09:21:44.930035442 -0400
 @@ -37,6 +37,8 @@
  #ifndef _LINUX_NFSD_XDR4_H
  #define _LINUX_NFSD_XDR4_H
@@ -15396,8 +15396,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  	struct nfs4_replay *			replay;
  };
 diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
---- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-31 20:41:19.146161064 -0400
-+++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-31 20:42:05.515139585 -0400
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-09-04 09:20:05.990223533 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-09-04 09:21:44.930035442 -0400
 @@ -36,6 +36,7 @@
  #include "internal.h"
  #include "iostat.h"
@@ -15515,8 +15515,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.
  	if (!ret)
  		return VM_FAULT_LOCKED;
 diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
---- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-31 20:41:19.149170418 -0400
-+++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-31 20:42:05.516222809 -0400
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-09-04 09:20:05.993222927 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-09-04 09:21:44.932035441 -0400
 @@ -48,6 +48,7 @@
  #include "internal.h"
  #include "fscache.h"
@@ -15730,8 +15730,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inod
  	nfs_fs_proc_exit();
  	nfsiod_stop();
 diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
---- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-31 20:41:19.149170418 -0400
-+++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-31 20:42:05.517099944 -0400
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-09-04 09:20:05.993222927 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-09-04 09:21:44.933035332 -0400
 @@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
  					   struct nfs_fattr *);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
@@ -15792,7 +15792,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/i
  		struct page *, struct page *);
 diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
 --- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-31 20:42:05.500123860 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-09-04 09:21:44.933035332 -0400
 @@ -79,10 +79,48 @@ config NFS_V4_1
  	depends on NFS_V4 && EXPERIMENTAL
  	help
@@ -15845,7 +15845,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kcon
  	depends on NFS_FS=y && IP_PNP
 diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
 --- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-31 20:42:05.501268752 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-09-04 09:21:44.934046035 -0400
 @@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
  			   delegation.o idmap.o \
  			   callback.o callback_xdr.o callback_proc.o \
@@ -15860,8 +15860,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Mak
 +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 +obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-31 20:41:19.152180625 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-31 20:42:05.518232887 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-09-04 09:20:05.996242985 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-09-04 09:21:44.935035426 -0400
 @@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
  	.dentry_ops	= &nfs_dentry_operations,
  	.dir_inode_ops	= &nfs3_dir_inode_operations,
@@ -15871,8 +15871,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.getattr	= nfs3_proc_getattr,
  	.setattr	= nfs3_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-31 20:42:05.519163219 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-31 20:42:05.520222923 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-09-04 09:21:44.936035595 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-09-04 09:21:44.936035595 -0400
 @@ -0,0 +1,768 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayout.c
@@ -16643,8 +16643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs
 +module_init(nfs4filelayout_init);
 +module_exit(nfs4filelayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-31 20:42:05.521233147 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-31 20:42:05.521233147 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-09-04 09:21:44.937035580 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-09-04 09:21:44.937035580 -0400
 @@ -0,0 +1,635 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayoutdev.c
@@ -17282,8 +17282,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch
 +}
 +
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-31 20:42:05.520222923 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-31 20:42:05.520222923 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-09-04 09:21:44.938035519 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-09-04 09:21:44.938035519 -0400
 @@ -0,0 +1,96 @@
 +/*
 + *  pnfs_nfs4filelayout.h
@@ -17382,8 +17382,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs
 +
 +#endif /* FS_NFS_NFS4FILELAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
---- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-31 20:41:19.154160465 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-31 20:42:05.519163219 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-09-04 09:20:05.998222938 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-09-04 09:21:44.939035693 -0400
 @@ -45,8 +45,28 @@ enum nfs4_client_state {
  	NFS4CLNT_RECLAIM_NOGRACE,
  	NFS4CLNT_DELEGRETURN,
@@ -17532,8 +17532,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nf
  
  /* nfs4xdr.c */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-31 20:41:19.157140145 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-31 20:42:05.524099925 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-09-04 09:20:06.001202714 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-09-04 09:21:44.942015100 -0400
 @@ -49,12 +49,14 @@
  #include <linux/mount.h>
  #include <linux/module.h>
@@ -19198,7 +19198,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.setattr	= nfs4_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
 --- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-31 20:42:05.526213255 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-09-04 09:21:44.944045456 -0400
 @@ -54,17 +54,17 @@
  void
  nfs4_renew_state(struct work_struct *work)
@@ -19221,8 +19221,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs
  	spin_lock(&clp->cl_lock);
  	lease = clp->cl_lease_time;
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
---- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-31 20:41:19.158078621 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-31 20:42:05.527232994 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-09-04 09:20:06.002213222 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-09-04 09:21:44.945035417 -0400
 @@ -48,11 +48,13 @@
  #include <linux/random.h>
  #include <linux/workqueue.h>
@@ -19545,8 +19545,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/
  			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
  			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-31 20:41:19.160150207 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-31 20:42:05.530092192 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-09-04 09:20:06.004212730 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-09-04 09:21:44.948015074 -0400
 @@ -50,8 +50,10 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -21056,8 +21056,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nf
  };
  
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
---- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-31 20:42:05.532213157 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-31 20:42:05.532213157 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-09-04 09:21:44.950025182 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-09-04 09:21:44.950025182 -0400
 @@ -0,0 +1,11 @@
 +#
 +# Makefile for the pNFS Objects Layout Driver kernel module
@@ -21071,8 +21071,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs
 +panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
 +obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-31 20:42:05.533243491 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-31 20:42:05.534105468 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-09-04 09:21:44.951035482 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-09-04 09:21:44.951035482 -0400
 @@ -0,0 +1,1087 @@
 +/*
 + *  objio_osd.c
@@ -22162,8 +22162,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noar
 +module_init(objlayout_init);
 +module_exit(objlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-31 20:42:05.535059115 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-31 20:42:05.535059115 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-09-04 09:21:44.952035857 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-09-04 09:21:44.952035857 -0400
 @@ -0,0 +1,790 @@
 +/*
 + *  objlayout.c
@@ -22956,8 +22956,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noar
 +	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
 +};
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-31 20:42:05.535059115 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-31 20:42:05.535059115 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-09-04 09:21:44.953025191 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-09-04 09:21:44.953025191 -0400
 @@ -0,0 +1,171 @@
 +/*
 + *  objlayout.h
@@ -23131,8 +23131,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noar
 +
 +#endif /* _OBJLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-31 20:42:05.536110535 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-31 20:42:05.536110535 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-09-04 09:21:44.954045432 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-09-04 09:21:44.954045432 -0400
 @@ -0,0 +1,734 @@
 +/*
 + *  panfs_shim.c
@@ -23869,8 +23869,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noa
 +module_init(panlayout_init);
 +module_exit(panlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-31 20:42:05.537124598 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-31 20:42:05.537124598 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-09-04 09:21:44.955035904 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-09-04 09:21:44.955035904 -0400
 @@ -0,0 +1,482 @@
 +/*
 + *  panfs_shim.h
@@ -24355,8 +24355,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noa
 +
 +#endif /* _PANLAYOUT_PANFS_SHIM_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-31 20:42:05.538121971 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-31 20:42:05.538121971 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-09-04 09:21:44.956036011 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-09-04 09:21:44.956036011 -0400
 @@ -0,0 +1,435 @@
 +/*
 + *  pnfs_osd_xdr.c
@@ -24794,8 +24794,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
---- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-31 20:41:19.162150222 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-31 20:42:05.539131687 -0400
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-09-04 09:20:06.006202442 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-09-04 09:21:44.957035861 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  
@@ -24918,8 +24918,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/p
  				if (res == INT_MAX)
  					goto out;
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
---- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-31 20:42:05.541150301 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-31 20:42:05.541150301 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-09-04 09:21:44.959025145 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-09-04 09:21:44.959025145 -0400
 @@ -0,0 +1,2037 @@
 +/*
 + *  linux/fs/nfs/pnfs.c
@@ -26959,8 +26959,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +}
 +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
---- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-31 20:42:05.542222767 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-31 20:42:05.542222767 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-09-04 09:21:44.960025819 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-09-04 09:21:44.960025819 -0400
 @@ -0,0 +1,354 @@
 +/*
 + *  fs/nfs/pnfs.h
@@ -27317,8 +27317,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +
 +#endif /* FS_NFS_PNFS_H */
 diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
---- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-31 20:41:19.163155499 -0400
-+++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-31 20:42:05.543103394 -0400
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-09-04 09:20:06.007232858 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-09-04 09:21:44.961035556 -0400
 @@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
  	fattr = nfs_alloc_fattr();
  	status = -ENOMEM;
@@ -27346,8 +27346,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.
  	.getattr	= nfs_proc_getattr,
  	.setattr	= nfs_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
---- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-31 20:41:19.164160482 -0400
-+++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-31 20:42:05.544233042 -0400
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-09-04 09:20:06.008232903 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-09-04 09:21:44.962035703 -0400
 @@ -18,8 +18,12 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
@@ -27562,8 +27562,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.
  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
  read_complete:
 diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
---- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-31 20:41:19.165170508 -0400
-+++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-31 20:42:05.545114737 -0400
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-09-04 09:20:06.009232934 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-09-04 09:21:44.963035469 -0400
 @@ -64,6 +64,7 @@
  #include "iostat.h"
  #include "internal.h"
@@ -27611,8 +27611,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/supe
  #endif
  
 diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
---- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-31 20:41:19.166151095 -0400
-+++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-31 20:42:05.546131839 -0400
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-09-04 09:20:06.010203248 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-09-04 09:21:44.964036069 -0400
 @@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
  	struct nfs_unlinkdata *data = calldata;
  	struct nfs_server *server = NFS_SERVER(data->dir);
@@ -27623,8 +27623,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unl
  		return;
  	rpc_call_start(task);
 diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
---- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-31 20:41:17.273213379 -0400
-+++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-31 20:42:05.548212682 -0400
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-09-04 09:20:04.513160311 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-09-04 09:21:44.966025174 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  #include <linux/nfs_page.h>
@@ -28313,7 +28313,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/writ
  int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
 --- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-31 20:42:05.577222704 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-09-04 09:21:44.967035352 -0400
 @@ -2,6 +2,7 @@
  #define LINUX_EXPORTFS_H 1
  
@@ -28386,8 +28386,8 @@ diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/i
 +#endif /* CONFIG_PNFSD */
  #endif /* LINUX_EXPORTFS_H */
 diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
---- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-31 20:42:05.576053304 -0400
-+++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-31 20:42:05.576053304 -0400
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-09-04 09:21:44.969025737 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-09-04 09:21:44.969025737 -0400
 @@ -0,0 +1,141 @@
 +#ifndef _LINUX_EXP_XDR_H
 +#define _LINUX_EXP_XDR_H
@@ -28531,8 +28531,8 @@ diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/in
 +}
 +#endif /* _LINUX_EXP_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
---- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-31 20:41:19.120034834 -0400
-+++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-31 20:42:05.579212604 -0400
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-09-04 09:20:05.965243003 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-09-04 09:21:44.971015113 -0400
 @@ -387,6 +387,7 @@ struct inodes_stat_t {
  #include <asm/byteorder.h>
  
@@ -28551,7 +28551,7 @@ diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include
  	struct dentry		*s_root;
 diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
 --- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-31 20:42:05.581035627 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-09-04 09:21:44.973025301 -0400
 @@ -17,7 +17,10 @@
  
  #define NFS4_BITMAP_SIZE	2
@@ -28681,8 +28681,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/inclu
  #endif
  
 diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-31 20:42:05.583087731 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-31 20:42:05.583087731 -0400
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-09-04 09:21:44.974035325 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-09-04 09:21:44.974035325 -0400
 @@ -0,0 +1,329 @@
 +/*
 + *  include/linux/nfs4_pnfs.h
@@ -29014,8 +29014,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/
 +
 +#endif /* LINUX_NFS4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
---- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-31 20:42:05.596098115 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-31 20:42:05.596098115 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-09-04 09:21:44.976025566 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-09-04 09:21:44.976025566 -0400
 @@ -0,0 +1,101 @@
 +#ifndef NFSD4_BLOCK
 +#define NFSD4_BLOCK
@@ -29119,8 +29119,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarc
 +#endif /* NFSD4_BLOCK */
 +
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-31 20:42:05.597097942 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-31 20:42:05.597097942 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-09-04 09:21:44.977035317 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-09-04 09:21:44.977035317 -0400
 @@ -0,0 +1,345 @@
 +/*
 + * include/linux/nfsd4_spnfs.h
@@ -29469,7 +29469,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarc
 +#endif /* NFS_SPNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
 --- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-31 20:42:05.591097762 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-09-04 09:21:44.978015841 -0400
 @@ -29,6 +29,7 @@
  #ifdef __KERNEL__
  
@@ -29480,7 +29480,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch
   * Largest number of bytes we need to allocate for an NFS
 diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
 --- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-31 20:42:05.591097762 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-09-04 09:21:44.978015841 -0400
 @@ -32,6 +32,8 @@
  #define NFSDDBG_REPCACHE	0x0080
  #define NFSDDBG_XDR		0x0100
@@ -29492,7 +29492,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
 --- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-31 20:42:05.592118086 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-09-04 09:21:44.979055116 -0400
 @@ -100,6 +100,7 @@ struct svc_export {
  	uid_t			ex_anon_uid;
  	gid_t			ex_anon_gid;
@@ -29502,8 +29502,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarc
  	struct nfsd4_fs_locations ex_fslocs;
  	int			ex_nflavors;
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-31 20:42:05.592118086 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-31 20:42:05.592118086 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-09-04 09:21:44.979055116 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-09-04 09:21:44.980035474 -0400
 @@ -0,0 +1,132 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29638,8 +29638,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.3
 +
 +#endif /* NFSD_NFS4LAYOUTXDR_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-31 20:42:05.593020723 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-31 20:42:05.593020723 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-09-04 09:21:44.980035474 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-09-04 09:21:44.980035474 -0400
 @@ -0,0 +1,54 @@
 +/******************************************************************************
 + *
@@ -29696,8 +29696,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-31 20:42:05.594107962 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-31 20:42:05.594107962 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-09-04 09:21:44.981055721 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-09-04 09:21:44.981055721 -0400
 @@ -0,0 +1,271 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29972,7 +29972,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.n
 +#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
 --- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-31 20:42:05.594107962 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-09-04 09:21:44.982035422 -0400
 @@ -29,6 +29,7 @@
  /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
  #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
@@ -30010,8 +30010,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noar
  
  union nfsctl_res {
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
---- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-31 20:41:19.168160480 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-31 20:42:05.584098019 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-09-04 09:20:06.012232950 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-09-04 09:21:44.983045467 -0400
 @@ -72,13 +72,20 @@ struct nfs_access_entry {
  	int			mask;
  };
@@ -30111,8 +30111,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/inc
  
  #ifdef __KERNEL__
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
---- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-31 20:41:19.168160480 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-31 20:42:05.586087719 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-09-04 09:20:06.012232950 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-09-04 09:21:44.985025570 -0400
 @@ -15,6 +15,7 @@ struct nlm_host;
  struct nfs4_sequence_args;
  struct nfs4_sequence_res;
@@ -30187,7 +30187,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/
  	atomic_t active; /* Keep trace of any activity to this server */
 diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
 --- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-31 20:42:05.587097913 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-09-04 09:21:44.986035288 -0400
 @@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
  	NFSIOS_SHORTREAD,
  	NFSIOS_SHORTWRITE,
@@ -30200,7 +30200,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
 --- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-31 20:42:05.588097898 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-09-04 09:21:44.987025532 -0400
 @@ -39,6 +39,7 @@ struct nfs_page {
  	struct list_head	wb_list;	/* Defines state of page: */
  	struct page		*wb_page;	/* page to read in/write out */
@@ -30249,8 +30249,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/i
  			     struct inode *inode,
  			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
 diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
---- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-31 20:41:19.169171911 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-31 20:42:05.590087729 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-09-04 09:20:06.013233555 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-09-04 09:21:44.989035583 -0400
 @@ -3,6 +3,8 @@
  
  #include <linux/nfsacl.h>
@@ -30528,8 +30528,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/in
  extern struct rpc_version	nfs_version3;
  extern struct rpc_version	nfs_version4;
 diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
---- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-31 20:42:05.598087997 -0400
-+++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-31 20:42:05.599087710 -0400
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-09-04 09:21:44.990025422 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-09-04 09:21:44.991025218 -0400
 @@ -0,0 +1,57 @@
 +#ifndef _PANFS_SHIM_API_H
 +#define _PANFS_SHIM_API_H
@@ -30589,8 +30589,8 @@ diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.no
 +
 +#endif /* _PANFS_SHIM_API_H */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-31 20:42:05.600025088 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-31 20:42:05.600025088 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-09-04 09:21:44.992035338 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-09-04 09:21:44.992035338 -0400
 @@ -0,0 +1,439 @@
 +/*
 + *  pnfs_osd_xdr.h
@@ -31033,7 +31033,7 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noar
 +#endif /* __PNFS_OSD_XDR_H__ */
 diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
 --- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-31 20:42:05.601087875 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-09-04 09:21:44.993025468 -0400
 @@ -8,6 +8,7 @@
  #ifndef __LINUX_POSIX_ACL_H
  #define __LINUX_POSIX_ACL_H
@@ -31044,7 +31044,7 @@ diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/
  #define ACL_UNDEFINED_ID	(-1)
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-31 20:42:05.602100892 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-09-04 09:21:44.994025129 -0400
 @@ -14,6 +14,8 @@
  /* size of an XDR encoding unit in bytes, i.e. 32bit */
  #define XDR_UNIT	(4)
@@ -31056,7 +31056,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.n
  
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-31 20:42:05.603108001 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-09-04 09:21:44.995045529 -0400
 @@ -3,6 +3,7 @@
  
  #ifdef __KERNEL__
@@ -31077,8 +31077,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.3
  
  struct rpc_pipe_ops {
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
---- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-31 20:42:05.603108001 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-31 20:42:05.603108001 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-09-04 09:21:44.995045529 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-09-04 09:21:44.995045529 -0400
 @@ -0,0 +1,111 @@
 +/*
 + *  Copyright (c) 2008 The Regents of the University of Michigan.
@@ -31193,7 +31193,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux
 +#endif /* _SIMPLE_RPC_PIPEFS_H_ */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-31 20:42:05.604049784 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-09-04 09:21:44.996061803 -0400
 @@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
  
  	return buf;
@@ -31237,8 +31237,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.n
 +}
  #endif /* SUNRPC_SVC_XPRT_H */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
---- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-31 20:41:19.173118431 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-31 20:42:05.605107904 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-09-04 09:20:06.017243774 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-09-04 09:21:44.997045653 -0400
 @@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
  	return p + 2;
  }
@@ -31261,14 +31261,9 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch
  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
  		unsigned int base, unsigned int len);
  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
---- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-31 20:42:05.605107904 -0400
-+++ linux-2.6.34.noarch/localversion-pnfs	2010-08-31 20:42:05.605107904 -0400
-@@ -0,0 +1 @@
-+-pnfs
 diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
 --- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-31 20:42:05.606020148 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-09-04 09:21:44.998058968 -0400
 @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
  	    addr.o rpcb_clnt.o timer.o xdr.o \
@@ -31279,8 +31274,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/su
  sunrpc-$(CONFIG_PROC_FS) += stats.o
  sunrpc-$(CONFIG_SYSCTL) += sysctl.o
 diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
---- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-31 20:42:05.606020148 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-31 20:42:05.607108065 -0400
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-09-04 09:21:44.999045582 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-09-04 09:21:44.999045582 -0400
 @@ -0,0 +1,424 @@
 +/*
 + *  net/sunrpc/simple_rpc_pipefs.c
@@ -31707,8 +31702,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.no
 +}
 +EXPORT_SYMBOL(pipefs_generic_destroy_msg);
 diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
---- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-31 20:41:19.188144022 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-31 20:42:05.607108065 -0400
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-09-04 09:20:06.031222775 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-09-04 09:21:45.000045387 -0400
 @@ -395,24 +395,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
  {
  	struct kvec *tail;

From e13651e22a981ffcd6c27e1c6ab9c30704a69bbc Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 12:20:57 -0400
Subject: [PATCH 11/20] Updated to the latest pNFS tag:
 pnfs-all-2.6.35-2010-08-19

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 config-generic                       |    12 +
 kernel.spec                          |    15 +-
 linux-2.6-pnfs-compile.patch         |    13 +
 linux-2.6.35-inline.patch            |    11 +
 nfs-35-fc.patch                      |  7235 ++++++
 nfsd-35-fc.patch                     |  1808 ++
 pnfs-all-2.6.35-2010-08-19-f13.patch | 31788 +++++++++++++++++++++++++
 7 files changed, 40880 insertions(+), 2 deletions(-)
 create mode 100644 linux-2.6-pnfs-compile.patch
 create mode 100644 linux-2.6.35-inline.patch
 create mode 100644 nfs-35-fc.patch
 create mode 100644 nfsd-35-fc.patch
 create mode 100644 pnfs-all-2.6.35-2010-08-19-f13.patch

diff --git a/config-generic b/config-generic
index a25e79f7e..898e7a3d7 100644
--- a/config-generic
+++ b/config-generic
@@ -3322,6 +3322,18 @@ CONFIG_NFSD_V3=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFS_FSCACHE=y
+# Enable pNFS
+CONFIG_PNFS=y
+CONFIG_PNFSD=y
+CONFIG_PNFSD_LOCAL_EXPORT=y
+CONFIG_SPNFS=y
+CONFIG_SPNFS_LAYOUTSEGMENTS=y
+CONFIG_SPNFS_BLOCK=y
+CONFIG_PNFS_OBJLAYOUT=m
+CONFIG_PNFS_BLOCK=m
+CONFIG_PNFS_PANLAYOUT=m
+CONFIG_PNFS_FILE_LAYOUT=m
+#
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
diff --git a/kernel.spec b/kernel.spec
index e2e43696d..6e4442efc 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-# % define buildid .local
+%define buildid .pnfs_all_2.6.35_2010_08_19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
@@ -107,7 +107,7 @@ Summary: The Linux kernel
 # kernel-headers
 %define with_headers   %{?_without_headers:   0} %{?!_without_headers:   1}
 # kernel-firmware
-%define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     0}
+%define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     1}
 # tools/perf
 %define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  1}
 # perf noarch subpkg
@@ -816,6 +816,12 @@ Patch12570: sched-00-fix-user-time-incorrectly-accounted-as-system-time-on-32-bi
 Patch12580: xen-handle-events-as-edge-triggered.patch
 Patch12581: xen-use-percpu-interrupts-for-ipis-and-virqs.patch
 
+Patch30000: nfs-35-fc.patch
+Patch30001: nfsd-35-fc.patch
+Patch30002: pnfs-all-2.6.35-2010-08-19-f13.patch
+Patch30003: linux-2.6-pnfs-compile.patch
+Patch30004: linux-2.6.35-inline.patch
+
 %endif
 
 BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
@@ -1543,6 +1549,11 @@ ApplyPatch sched-00-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit
 ApplyPatch xen-handle-events-as-edge-triggered.patch
 ApplyPatch xen-use-percpu-interrupts-for-ipis-and-virqs.patch
 
+ApplyPatch nfs-35-fc.patch  
+ApplyPatch nfsd-35-fc.patch  
+ApplyPatch pnfs-all-2.6.35-2010-08-19-f13.patch
+ApplyPatch linux-2.6-pnfs-compile.patch
+ApplyPatch linux-2.6.35-inline.patch
 # END OF PATCH APPLICATIONS
 
 %endif
diff --git a/linux-2.6-pnfs-compile.patch b/linux-2.6-pnfs-compile.patch
new file mode 100644
index 000000000..7c8cc4248
--- /dev/null
+++ b/linux-2.6-pnfs-compile.patch
@@ -0,0 +1,13 @@
+diff -up linux-2.6.32.x86_64/fs/nfs/objlayout/pnfs_osd_xdr.h.orig linux-2.6.32.x86_64/fs/nfs/objlayout/pnfs_osd_xdr.h
+diff -up linux-2.6.32.x86_64/include/net/inet_connection_sock.h.orig linux-2.6.32.x86_64/include/net/inet_connection_sock.h
+--- linux-2.6.32.x86_64/include/net/inet_connection_sock.h.orig	2009-12-02 22:51:21.000000000 -0500
++++ linux-2.6.32.x86_64/include/net/inet_connection_sock.h	2010-04-21 14:26:24.475659551 -0400
+@@ -23,7 +23,7 @@
+ #include <net/inet_sock.h>
+ #include <net/request_sock.h>
+ 
+-#define INET_CSK_DEBUG 1
++//#define INET_CSK_DEBUG 1
+ 
+ /* Cancel timers, when they are not required. */
+ #undef INET_CSK_CLEAR_TIMERS
diff --git a/linux-2.6.35-inline.patch b/linux-2.6.35-inline.patch
new file mode 100644
index 000000000..c56d8da5e
--- /dev/null
+++ b/linux-2.6.35-inline.patch
@@ -0,0 +1,11 @@
+diff -up linux-2.6.34.noarch/arch/x86/Makefile.orig linux-2.6.34.noarch/arch/x86/Makefile
+--- linux-2.6.34.noarch/arch/x86/Makefile.orig	2010-07-01 13:33:21.859627499 -0400
++++ linux-2.6.34.noarch/arch/x86/Makefile	2010-07-01 13:36:26.751576450 -0400
+@@ -81,6 +81,7 @@ ifdef CONFIG_CC_STACKPROTECTOR
+                 $(warning stack protector enabled but no compiler support)
+         endif
+ endif
++KBUILD_CFLAGS += -fno-inline-functions-called-once
+ 
+ # Don't unroll struct assignments with kmemcheck enabled
+ ifeq ($(CONFIG_KMEMCHECK),y)
diff --git a/nfs-35-fc.patch b/nfs-35-fc.patch
new file mode 100644
index 000000000..c3ad25f65
--- /dev/null
+++ b/nfs-35-fc.patch
@@ -0,0 +1,7235 @@
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 11:01:00.352376393 -0400
+@@ -934,7 +934,6 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	}
+ 
+ 	fsinfo.fattr = fattr;
+-	nfs_fattr_init(fattr);
+ 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+ 	if (error < 0)
+ 		goto out_error;
+@@ -1047,13 +1046,18 @@ struct nfs_server *nfs_create_server(con
+ 				     struct nfs_fh *mntfh)
+ {
+ 	struct nfs_server *server;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	int error;
+ 
+ 	server = nfs_alloc_server();
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	/* Get a client representation */
+ 	error = nfs_init_server(server, data);
+ 	if (error < 0)
+@@ -1064,7 +1068,7 @@ struct nfs_server *nfs_create_server(con
+ 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ 
+ 	/* Probe the root fh to retrieve its FSID */
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 	if (server->nfs_client->rpc_ops->version == 3) {
+@@ -1077,14 +1081,14 @@ struct nfs_server *nfs_create_server(con
+ 			server->namelen = NFS2_MAXNAMLEN;
+ 	}
+ 
+-	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+-		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
++	if (!(fattr->valid & NFS_ATTR_FATTR)) {
++		error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
+ 		if (error < 0) {
+ 			dprintk("nfs_create_server: getattr error = %d\n", -error);
+ 			goto error;
+ 		}
+ 	}
+-	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
++	memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+ 
+ 	dprintk("Server FSID: %llx:%llx\n",
+ 		(unsigned long long) server->fsid.major,
+@@ -1096,9 +1100,11 @@ struct nfs_server *nfs_create_server(con
+ 	spin_unlock(&nfs_client_lock);
+ 
+ 	server->mount_time = jiffies;
++	nfs_free_fattr(fattr);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	return ERR_PTR(error);
+ }
+@@ -1340,7 +1346,7 @@ error:
+ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
+ 				      struct nfs_fh *mntfh)
+ {
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	struct nfs_server *server;
+ 	int error;
+ 
+@@ -1350,6 +1356,11 @@ struct nfs_server *nfs4_create_server(co
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	/* set up the general RPC client */
+ 	error = nfs4_init_server(server, data);
+ 	if (error < 0)
+@@ -1364,7 +1375,7 @@ struct nfs_server *nfs4_create_server(co
+ 		goto error;
+ 
+ 	/* Probe the root fh to retrieve its FSID */
+-	error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
++	error = nfs4_get_rootfh(server, mntfh);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1375,7 +1386,7 @@ struct nfs_server *nfs4_create_server(co
+ 
+ 	nfs4_session_set_rwsize(server);
+ 
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1389,9 +1400,11 @@ struct nfs_server *nfs4_create_server(co
+ 
+ 	server->mount_time = jiffies;
+ 	dprintk("<-- nfs4_create_server() = %p\n", server);
++	nfs_free_fattr(fattr);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs4_create_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+@@ -1405,7 +1418,7 @@ struct nfs_server *nfs4_create_referral_
+ {
+ 	struct nfs_client *parent_client;
+ 	struct nfs_server *server, *parent_server;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	int error;
+ 
+ 	dprintk("--> nfs4_create_referral_server()\n");
+@@ -1414,6 +1427,11 @@ struct nfs_server *nfs4_create_referral_
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto error;
++
+ 	parent_server = NFS_SB(data->sb);
+ 	parent_client = parent_server->nfs_client;
+ 
+@@ -1443,12 +1461,12 @@ struct nfs_server *nfs4_create_referral_
+ 	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ 
+ 	/* Probe the root fh to retrieve its FSID and filehandle */
+-	error = nfs4_path_walk(server, mntfh, data->mnt_path);
++	error = nfs4_get_rootfh(server, mntfh);
+ 	if (error < 0)
+ 		goto error;
+ 
+ 	/* probe the filesystem info for this server filesystem */
+-	error = nfs_probe_fsinfo(server, mntfh, &fattr);
++	error = nfs_probe_fsinfo(server, mntfh, fattr);
+ 	if (error < 0)
+ 		goto error;
+ 
+@@ -1466,10 +1484,12 @@ struct nfs_server *nfs4_create_referral_
+ 
+ 	server->mount_time = jiffies;
+ 
++	nfs_free_fattr(fattr);
+ 	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+ 	return server;
+ 
+ error:
++	nfs_free_fattr(fattr);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+@@ -1485,7 +1505,7 @@ struct nfs_server *nfs_clone_server(stru
+ 				    struct nfs_fattr *fattr)
+ {
+ 	struct nfs_server *server;
+-	struct nfs_fattr fattr_fsinfo;
++	struct nfs_fattr *fattr_fsinfo;
+ 	int error;
+ 
+ 	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+@@ -1496,6 +1516,11 @@ struct nfs_server *nfs_clone_server(stru
+ 	if (!server)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	error = -ENOMEM;
++	fattr_fsinfo = nfs_alloc_fattr();
++	if (fattr_fsinfo == NULL)
++		goto out_free_server;
++
+ 	/* Copy data from the source */
+ 	server->nfs_client = source->nfs_client;
+ 	atomic_inc(&server->nfs_client->cl_count);
+@@ -1512,7 +1537,7 @@ struct nfs_server *nfs_clone_server(stru
+ 		nfs_init_server_aclclient(server);
+ 
+ 	/* probe the filesystem info for this server filesystem */
+-	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
++	error = nfs_probe_fsinfo(server, fh, fattr_fsinfo);
+ 	if (error < 0)
+ 		goto out_free_server;
+ 
+@@ -1534,10 +1559,12 @@ struct nfs_server *nfs_clone_server(stru
+ 
+ 	server->mount_time = jiffies;
+ 
++	nfs_free_fattr(fattr_fsinfo);
+ 	dprintk("<-- nfs_clone_server() = %p\n", server);
+ 	return server;
+ 
+ out_free_server:
++	nfs_free_fattr(fattr_fsinfo);
+ 	nfs_free_server(server);
+ 	dprintk("<-- nfs_clone_server() = error %d\n", error);
+ 	return ERR_PTR(error);
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 11:01:00.352376393 -0400
+@@ -213,7 +213,7 @@ int nfs_inode_set_delegation(struct inod
+ 	struct nfs_delegation *freeme = NULL;
+ 	int status = 0;
+ 
+-	delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
++	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+ 	memcpy(delegation->stateid.data, res->delegation.data,
+diff -up linux-2.6.34.noarch/fs/nfs/dir.c.orig linux-2.6.34.noarch/fs/nfs/dir.c
+--- linux-2.6.34.noarch/fs/nfs/dir.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/dir.c	2010-08-23 11:01:00.353376419 -0400
+@@ -530,9 +530,7 @@ static int nfs_readdir(struct file *filp
+ 	nfs_readdir_descriptor_t my_desc,
+ 			*desc = &my_desc;
+ 	struct nfs_entry my_entry;
+-	struct nfs_fh	 fh;
+-	struct nfs_fattr fattr;
+-	long		res;
++	int res = -ENOMEM;
+ 
+ 	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
+ 			dentry->d_parent->d_name.name, dentry->d_name.name,
+@@ -554,9 +552,11 @@ static int nfs_readdir(struct file *filp
+ 
+ 	my_entry.cookie = my_entry.prev_cookie = 0;
+ 	my_entry.eof = 0;
+-	my_entry.fh = &fh;
+-	my_entry.fattr = &fattr;
+-	nfs_fattr_init(&fattr);
++	my_entry.fh = nfs_alloc_fhandle();
++	my_entry.fattr = nfs_alloc_fattr();
++	if (my_entry.fh == NULL || my_entry.fattr == NULL)
++		goto out_alloc_failed;
++
+ 	desc->entry = &my_entry;
+ 
+ 	nfs_block_sillyrename(dentry);
+@@ -598,7 +598,10 @@ out:
+ 	nfs_unblock_sillyrename(dentry);
+ 	if (res > 0)
+ 		res = 0;
+-	dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n",
++out_alloc_failed:
++	nfs_free_fattr(my_entry.fattr);
++	nfs_free_fhandle(my_entry.fh);
++	dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
+ 			dentry->d_parent->d_name.name, dentry->d_name.name,
+ 			res);
+ 	return res;
+@@ -776,9 +779,9 @@ static int nfs_lookup_revalidate(struct 
+ 	struct inode *dir;
+ 	struct inode *inode;
+ 	struct dentry *parent;
++	struct nfs_fh *fhandle = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int error;
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+ 
+ 	parent = dget_parent(dentry);
+ 	dir = parent->d_inode;
+@@ -811,14 +814,22 @@ static int nfs_lookup_revalidate(struct 
+ 	if (NFS_STALE(inode))
+ 		goto out_bad;
+ 
+-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
++	error = -ENOMEM;
++	fhandle = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fhandle == NULL || fattr == NULL)
++		goto out_error;
++
++	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ 	if (error)
+ 		goto out_bad;
+-	if (nfs_compare_fh(NFS_FH(inode), &fhandle))
++	if (nfs_compare_fh(NFS_FH(inode), fhandle))
+ 		goto out_bad;
+-	if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
++	if ((error = nfs_refresh_inode(inode, fattr)) != 0)
+ 		goto out_bad;
+ 
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ out_set_verifier:
+ 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+  out_valid:
+@@ -842,11 +853,21 @@ out_zap_parent:
+ 		shrink_dcache_parent(dentry);
+ 	}
+ 	d_drop(dentry);
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ 	dput(parent);
+ 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+ 			__func__, dentry->d_parent->d_name.name,
+ 			dentry->d_name.name);
+ 	return 0;
++out_error:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
++	dput(parent);
++	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
++			__func__, dentry->d_parent->d_name.name,
++			dentry->d_name.name, error);
++	return error;
+ }
+ 
+ /*
+@@ -911,9 +932,9 @@ static struct dentry *nfs_lookup(struct 
+ 	struct dentry *res;
+ 	struct dentry *parent;
+ 	struct inode *inode = NULL;
++	struct nfs_fh *fhandle = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int error;
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+ 
+ 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
+ 		dentry->d_parent->d_name.name, dentry->d_name.name);
+@@ -923,7 +944,6 @@ static struct dentry *nfs_lookup(struct 
+ 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
+ 		goto out;
+ 
+-	res = ERR_PTR(-ENOMEM);
+ 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ 
+ 	/*
+@@ -936,17 +956,23 @@ static struct dentry *nfs_lookup(struct 
+ 		goto out;
+ 	}
+ 
++	res = ERR_PTR(-ENOMEM);
++	fhandle = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fhandle == NULL || fattr == NULL)
++		goto out;
++
+ 	parent = dentry->d_parent;
+ 	/* Protect against concurrent sillydeletes */
+ 	nfs_block_sillyrename(parent);
+-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
++	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+ 	if (error == -ENOENT)
+ 		goto no_entry;
+ 	if (error < 0) {
+ 		res = ERR_PTR(error);
+ 		goto out_unblock_sillyrename;
+ 	}
+-	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
++	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
+ 	res = (struct dentry *)inode;
+ 	if (IS_ERR(res))
+ 		goto out_unblock_sillyrename;
+@@ -962,6 +988,8 @@ no_entry:
+ out_unblock_sillyrename:
+ 	nfs_unblock_sillyrename(parent);
+ out:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fhandle);
+ 	return res;
+ }
+ 
+@@ -1669,28 +1697,33 @@ static void nfs_access_free_entry(struct
+ 	smp_mb__after_atomic_dec();
+ }
+ 
++static void nfs_access_free_list(struct list_head *head)
++{
++	struct nfs_access_entry *cache;
++
++	while (!list_empty(head)) {
++		cache = list_entry(head->next, struct nfs_access_entry, lru);
++		list_del(&cache->lru);
++		nfs_access_free_entry(cache);
++	}
++}
++
+ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+ {
+ 	LIST_HEAD(head);
+ 	struct nfs_inode *nfsi;
+ 	struct nfs_access_entry *cache;
+ 
+-restart:
++	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
++		return (nr_to_scan == 0) ? 0 : -1;
++
+ 	spin_lock(&nfs_access_lru_lock);
+ 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+-		struct rw_semaphore *s_umount;
+ 		struct inode *inode;
+ 
+ 		if (nr_to_scan-- == 0)
+ 			break;
+-		s_umount = &nfsi->vfs_inode.i_sb->s_umount;
+-		if (!down_read_trylock(s_umount))
+-			continue;
+-		inode = igrab(&nfsi->vfs_inode);
+-		if (inode == NULL) {
+-			up_read(s_umount);
+-			continue;
+-		}
++		inode = &nfsi->vfs_inode;
+ 		spin_lock(&inode->i_lock);
+ 		if (list_empty(&nfsi->access_cache_entry_lru))
+ 			goto remove_lru_entry;
+@@ -1704,61 +1737,47 @@ restart:
+ 		else {
+ remove_lru_entry:
+ 			list_del_init(&nfsi->access_cache_inode_lru);
++			smp_mb__before_clear_bit();
+ 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
++			smp_mb__after_clear_bit();
+ 		}
+-		spin_unlock(&inode->i_lock);
+-		spin_unlock(&nfs_access_lru_lock);
+-		iput(inode);
+-		up_read(s_umount);
+-		goto restart;
+ 	}
+ 	spin_unlock(&nfs_access_lru_lock);
+-	while (!list_empty(&head)) {
+-		cache = list_entry(head.next, struct nfs_access_entry, lru);
+-		list_del(&cache->lru);
+-		nfs_access_free_entry(cache);
+-	}
++	nfs_access_free_list(&head);
+ 	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+-static void __nfs_access_zap_cache(struct inode *inode)
++static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
+ {
+-	struct nfs_inode *nfsi = NFS_I(inode);
+ 	struct rb_root *root_node = &nfsi->access_cache;
+-	struct rb_node *n, *dispose = NULL;
++	struct rb_node *n;
+ 	struct nfs_access_entry *entry;
+ 
+ 	/* Unhook entries from the cache */
+ 	while ((n = rb_first(root_node)) != NULL) {
+ 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+ 		rb_erase(n, root_node);
+-		list_del(&entry->lru);
+-		n->rb_left = dispose;
+-		dispose = n;
++		list_move(&entry->lru, head);
+ 	}
+ 	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+-	spin_unlock(&inode->i_lock);
+-
+-	/* Now kill them all! */
+-	while (dispose != NULL) {
+-		n = dispose;
+-		dispose = n->rb_left;
+-		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+-	}
+ }
+ 
+ void nfs_access_zap_cache(struct inode *inode)
+ {
++	LIST_HEAD(head);
++
++	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
++		return;
+ 	/* Remove from global LRU init */
+-	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
+-		spin_lock(&nfs_access_lru_lock);
++	spin_lock(&nfs_access_lru_lock);
++	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
+ 		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+-		spin_unlock(&nfs_access_lru_lock);
+-	}
+ 
+ 	spin_lock(&inode->i_lock);
+-	/* This will release the spinlock */
+-	__nfs_access_zap_cache(inode);
++	__nfs_access_zap_cache(NFS_I(inode), &head);
++	spin_unlock(&inode->i_lock);
++	spin_unlock(&nfs_access_lru_lock);
++	nfs_access_free_list(&head);
+ }
+ 
+ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+@@ -1809,8 +1828,8 @@ out_stale:
+ 	nfs_access_free_entry(cache);
+ 	return -ENOENT;
+ out_zap:
+-	/* This will release the spinlock */
+-	__nfs_access_zap_cache(inode);
++	spin_unlock(&inode->i_lock);
++	nfs_access_zap_cache(inode);
+ 	return -ENOENT;
+ }
+ 
+@@ -1865,9 +1884,11 @@ static void nfs_access_add_cache(struct 
+ 	smp_mb__after_atomic_inc();
+ 
+ 	/* Add inode to global LRU list */
+-	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
++	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
+ 		spin_lock(&nfs_access_lru_lock);
+-		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
++		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
++			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
++					&nfs_access_lru_list);
+ 		spin_unlock(&nfs_access_lru_lock);
+ 	}
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 11:00:23.790502081 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 11:01:00.354376416 -0400
+@@ -162,14 +162,17 @@ static int nfs_revalidate_file_size(stru
+ 	struct nfs_server *server = NFS_SERVER(inode);
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+-	if (server->flags & NFS_MOUNT_NOAC)
+-		goto force_reval;
++	if (nfs_have_delegated_attributes(inode))
++		goto out_noreval;
++
+ 	if (filp->f_flags & O_DIRECT)
+ 		goto force_reval;
+-	if (nfsi->npages != 0)
+-		return 0;
+-	if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
+-		return 0;
++	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
++		goto force_reval;
++	if (nfs_attribute_timeout(inode))
++		goto force_reval;
++out_noreval:
++	return 0;
+ force_reval:
+ 	return __nfs_revalidate_inode(server, inode);
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/fscache.c.orig linux-2.6.34.noarch/fs/nfs/fscache.c
+--- linux-2.6.34.noarch/fs/nfs/fscache.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/fscache.c	2010-08-23 11:01:00.355376416 -0400
+@@ -467,7 +467,8 @@ int __nfs_readpages_from_fscache(struct 
+ 				 struct list_head *pages,
+ 				 unsigned *nr_pages)
+ {
+-	int ret, npages = *nr_pages;
++	unsigned npages = *nr_pages;
++	int ret;
+ 
+ 	dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
+ 		 NFS_I(inode)->fscache, npages, inode);
+diff -up linux-2.6.34.noarch/fs/nfs/getroot.c.orig linux-2.6.34.noarch/fs/nfs/getroot.c
+--- linux-2.6.34.noarch/fs/nfs/getroot.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/getroot.c	2010-08-23 11:01:00.356376417 -0400
+@@ -78,159 +78,94 @@ struct dentry *nfs_get_root(struct super
+ {
+ 	struct nfs_server *server = NFS_SB(sb);
+ 	struct nfs_fsinfo fsinfo;
+-	struct nfs_fattr fattr;
+-	struct dentry *mntroot;
++	struct dentry *ret;
+ 	struct inode *inode;
+ 	int error;
+ 
+ 	/* get the actual root for this mount */
+-	fsinfo.fattr = &fattr;
++	fsinfo.fattr = nfs_alloc_fattr();
++	if (fsinfo.fattr == NULL)
++		return ERR_PTR(-ENOMEM);
+ 
+ 	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ 	if (error < 0) {
+ 		dprintk("nfs_get_root: getattr error = %d\n", -error);
+-		return ERR_PTR(error);
++		ret = ERR_PTR(error);
++		goto out;
+ 	}
+ 
+ 	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+ 	if (IS_ERR(inode)) {
+ 		dprintk("nfs_get_root: get root inode failed\n");
+-		return ERR_CAST(inode);
++		ret = ERR_CAST(inode);
++		goto out;
+ 	}
+ 
+ 	error = nfs_superblock_set_dummy_root(sb, inode);
+-	if (error != 0)
+-		return ERR_PTR(error);
++	if (error != 0) {
++		ret = ERR_PTR(error);
++		goto out;
++	}
+ 
+ 	/* root dentries normally start off anonymous and get spliced in later
+ 	 * if the dentry tree reaches them; however if the dentry already
+ 	 * exists, we'll pick it up at this point and use it as the root
+ 	 */
+-	mntroot = d_obtain_alias(inode);
+-	if (IS_ERR(mntroot)) {
++	ret = d_obtain_alias(inode);
++	if (IS_ERR(ret)) {
+ 		dprintk("nfs_get_root: get root dentry failed\n");
+-		return mntroot;
++		goto out;
+ 	}
+ 
+-	security_d_instantiate(mntroot, inode);
+-
+-	if (!mntroot->d_op)
+-		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
++	security_d_instantiate(ret, inode);
+ 
+-	return mntroot;
++	if (ret->d_op == NULL)
++		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
++out:
++	nfs_free_fattr(fsinfo.fattr);
++	return ret;
+ }
+ 
+ #ifdef CONFIG_NFS_V4
+ 
+-/*
+- * Do a simple pathwalk from the root FH of the server to the nominated target
+- * of the mountpoint
+- * - give error on symlinks
+- * - give error on ".." occurring in the path
+- * - follow traversals
+- */
+-int nfs4_path_walk(struct nfs_server *server,
+-		   struct nfs_fh *mntfh,
+-		   const char *path)
++int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
+ {
+ 	struct nfs_fsinfo fsinfo;
+-	struct nfs_fattr fattr;
+-	struct nfs_fh lastfh;
+-	struct qstr name;
+-	int ret;
+-
+-	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+-
+-	fsinfo.fattr = &fattr;
+-	nfs_fattr_init(&fattr);
+-
+-	/* Eat leading slashes */
+-	while (*path == '/')
+-		path++;
++	int ret = -ENOMEM;
++
++	dprintk("--> nfs4_get_rootfh()\n");
++
++	fsinfo.fattr = nfs_alloc_fattr();
++	if (fsinfo.fattr == NULL)
++		goto out;
+ 
+ 	/* Start by getting the root filehandle from the server */
+ 	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ 	if (ret < 0) {
+-		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+-		return ret;
++		dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
++		goto out;
+ 	}
+ 
+-	if (!S_ISDIR(fattr.mode)) {
+-		printk(KERN_ERR "nfs4_get_root:"
++	if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
++			|| !S_ISDIR(fsinfo.fattr->mode)) {
++		printk(KERN_ERR "nfs4_get_rootfh:"
+ 		       " getroot encountered non-directory\n");
+-		return -ENOTDIR;
++		ret = -ENOTDIR;
++		goto out;
+ 	}
+ 
+-	/* FIXME: It is quite valid for the server to return a referral here */
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+-		printk(KERN_ERR "nfs4_get_root:"
++	if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
++		printk(KERN_ERR "nfs4_get_rootfh:"
+ 		       " getroot obtained referral\n");
+-		return -EREMOTE;
++		ret = -EREMOTE;
++		goto out;
+ 	}
+ 
+-next_component:
+-	dprintk("Next: %s\n", path);
+-
+-	/* extract the next bit of the path */
+-	if (!*path)
+-		goto path_walk_complete;
+-
+-	name.name = path;
+-	while (*path && *path != '/')
+-		path++;
+-	name.len = path - (const char *) name.name;
+-
+-	if (name.len > NFS4_MAXNAMLEN)
+-		return -ENAMETOOLONG;
+-
+-eat_dot_dir:
+-	while (*path == '/')
+-		path++;
+-
+-	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+-		path += 2;
+-		goto eat_dot_dir;
+-	}
+-
+-	/* FIXME: Why shouldn't the user be able to use ".." in the path? */
+-	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+-	    ) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " Mount path contains reference to \"..\"\n");
+-		return -EINVAL;
+-	}
+-
+-	/* lookup the next FH in the sequence */
+-	memcpy(&lastfh, mntfh, sizeof(lastfh));
+-
+-	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+-
+-	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+-						    mntfh, &fattr);
+-	if (ret < 0) {
+-		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+-		return ret;
+-	}
+-
+-	if (!S_ISDIR(fattr.mode)) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " lookupfh encountered non-directory\n");
+-		return -ENOTDIR;
+-	}
+-
+-	/* FIXME: Referrals are quite valid here too */
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+-		printk(KERN_ERR "nfs4_get_root:"
+-		       " lookupfh obtained referral\n");
+-		return -EREMOTE;
+-	}
+-
+-	goto next_component;
+-
+-path_walk_complete:
+-	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+-	dprintk("<-- nfs4_path_walk() = 0\n");
+-	return 0;
++	memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
++out:
++	nfs_free_fattr(fsinfo.fattr);
++	dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
++	return ret;
+ }
+ 
+ /*
+@@ -239,8 +174,8 @@ path_walk_complete:
+ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+ {
+ 	struct nfs_server *server = NFS_SB(sb);
+-	struct nfs_fattr fattr;
+-	struct dentry *mntroot;
++	struct nfs_fattr *fattr = NULL;
++	struct dentry *ret;
+ 	struct inode *inode;
+ 	int error;
+ 
+@@ -254,40 +189,50 @@ struct dentry *nfs4_get_root(struct supe
+ 		return ERR_PTR(error);
+ 	}
+ 
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		return ERR_PTR(-ENOMEM);;
++
+ 	/* get the actual root for this mount */
+-	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
++	error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
+ 	if (error < 0) {
+ 		dprintk("nfs_get_root: getattr error = %d\n", -error);
+-		return ERR_PTR(error);
++		ret = ERR_PTR(error);
++		goto out;
+ 	}
+ 
+-	inode = nfs_fhget(sb, mntfh, &fattr);
++	inode = nfs_fhget(sb, mntfh, fattr);
+ 	if (IS_ERR(inode)) {
+ 		dprintk("nfs_get_root: get root inode failed\n");
+-		return ERR_CAST(inode);
++		ret = ERR_CAST(inode);
++		goto out;
+ 	}
+ 
+ 	error = nfs_superblock_set_dummy_root(sb, inode);
+-	if (error != 0)
+-		return ERR_PTR(error);
++	if (error != 0) {
++		ret = ERR_PTR(error);
++		goto out;
++	}
+ 
+ 	/* root dentries normally start off anonymous and get spliced in later
+ 	 * if the dentry tree reaches them; however if the dentry already
+ 	 * exists, we'll pick it up at this point and use it as the root
+ 	 */
+-	mntroot = d_obtain_alias(inode);
+-	if (IS_ERR(mntroot)) {
++	ret = d_obtain_alias(inode);
++	if (IS_ERR(ret)) {
+ 		dprintk("nfs_get_root: get root dentry failed\n");
+-		return mntroot;
++		goto out;
+ 	}
+ 
+-	security_d_instantiate(mntroot, inode);
++	security_d_instantiate(ret, inode);
+ 
+-	if (!mntroot->d_op)
+-		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
++	if (ret->d_op == NULL)
++		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+ 
++out:
++	nfs_free_fattr(fattr);
+ 	dprintk("<-- nfs4_get_root()\n");
+-	return mntroot;
++	return ret;
+ }
+ 
+ #endif /* CONFIG_NFS_V4 */
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 11:01:00.357376378 -0400
+@@ -393,8 +393,8 @@ int
+ nfs_setattr(struct dentry *dentry, struct iattr *attr)
+ {
+ 	struct inode *inode = dentry->d_inode;
+-	struct nfs_fattr fattr;
+-	int error;
++	struct nfs_fattr *fattr;
++	int error = -ENOMEM;
+ 
+ 	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+ 
+@@ -417,14 +417,20 @@ nfs_setattr(struct dentry *dentry, struc
+ 		filemap_write_and_wait(inode->i_mapping);
+ 		nfs_wb_all(inode);
+ 	}
++
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
+ 	/*
+ 	 * Return any delegations if we're going to change ACLs
+ 	 */
+ 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+ 		nfs_inode_return_delegation(inode);
+-	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
++	error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
+ 	if (error == 0)
+-		nfs_refresh_inode(inode, &fattr);
++		nfs_refresh_inode(inode, fattr);
++	nfs_free_fattr(fattr);
++out:
+ 	return error;
+ }
+ 
+@@ -682,7 +688,7 @@ int
+ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+ {
+ 	int		 status = -ESTALE;
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr = NULL;
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+ 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
+@@ -693,8 +699,13 @@ __nfs_revalidate_inode(struct nfs_server
+ 	if (NFS_STALE(inode))
+ 		goto out;
+ 
++	status = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
++
+ 	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
+-	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
++	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr);
+ 	if (status != 0) {
+ 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+ 			 inode->i_sb->s_id,
+@@ -707,7 +718,7 @@ __nfs_revalidate_inode(struct nfs_server
+ 		goto out;
+ 	}
+ 
+-	status = nfs_refresh_inode(inode, &fattr);
++	status = nfs_refresh_inode(inode, fattr);
+ 	if (status) {
+ 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+ 			 inode->i_sb->s_id,
+@@ -723,6 +734,7 @@ __nfs_revalidate_inode(struct nfs_server
+ 		(long long)NFS_FILEID(inode));
+ 
+  out:
++	nfs_free_fattr(fattr);
+ 	return status;
+ }
+ 
+@@ -730,9 +742,14 @@ int nfs_attribute_timeout(struct inode *
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
++	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
++}
++
++static int nfs_attribute_cache_expired(struct inode *inode)
++{
+ 	if (nfs_have_delegated_attributes(inode))
+ 		return 0;
+-	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
++	return nfs_attribute_timeout(inode);
+ }
+ 
+ /**
+@@ -745,7 +762,7 @@ int nfs_attribute_timeout(struct inode *
+ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+ {
+ 	if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
+-			&& !nfs_attribute_timeout(inode))
++			&& !nfs_attribute_cache_expired(inode))
+ 		return NFS_STALE(inode) ? -ESTALE : 0;
+ 	return __nfs_revalidate_inode(server, inode);
+ }
+@@ -782,7 +799,8 @@ int nfs_revalidate_mapping(struct inode 
+ 	int ret = 0;
+ 
+ 	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
++			|| nfs_attribute_cache_expired(inode)
++			|| NFS_STALE(inode)) {
+ 		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ 		if (ret < 0)
+ 			goto out;
+@@ -916,6 +934,26 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
+ 
++struct nfs_fattr *nfs_alloc_fattr(void)
++{
++	struct nfs_fattr *fattr;
++
++	fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
++	if (fattr != NULL)
++		nfs_fattr_init(fattr);
++	return fattr;
++}
++
++struct nfs_fh *nfs_alloc_fhandle(void)
++{
++	struct nfs_fh *fh;
++
++	fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
++	if (fh != NULL)
++		fh->size = 0;
++	return fh;
++}
++
+ /**
+  * nfs_inode_attrs_need_update - check if the inode attributes need updating
+  * @inode - pointer to inode
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 11:01:00.358564151 -0400
+@@ -244,9 +244,7 @@ extern struct dentry *nfs_get_root(struc
+ #ifdef CONFIG_NFS_V4
+ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+ 
+-extern int nfs4_path_walk(struct nfs_server *server,
+-			  struct nfs_fh *mntfh,
+-			  const char *path);
++extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
+ #endif
+ 
+ /* read.c */
+diff -up linux-2.6.34.noarch/fs/nfs/iostat.h.orig linux-2.6.34.noarch/fs/nfs/iostat.h
+--- linux-2.6.34.noarch/fs/nfs/iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/iostat.h	2010-08-23 11:01:00.358564151 -0400
+@@ -36,14 +36,14 @@ static inline void nfs_inc_stats(const s
+ 
+ static inline void nfs_add_server_stats(const struct nfs_server *server,
+ 					enum nfs_stat_bytecounters stat,
+-					unsigned long addend)
++					long addend)
+ {
+ 	this_cpu_add(server->io_stats->bytes[stat], addend);
+ }
+ 
+ static inline void nfs_add_stats(const struct inode *inode,
+ 				 enum nfs_stat_bytecounters stat,
+-				 unsigned long addend)
++				 long addend)
+ {
+ 	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+ }
+@@ -51,7 +51,7 @@ static inline void nfs_add_stats(const s
+ #ifdef CONFIG_NFS_FSCACHE
+ static inline void nfs_add_fscache_stats(struct inode *inode,
+ 					 enum nfs_stat_fscachecounters stat,
+-					 unsigned long addend)
++					 long addend)
+ {
+ 	this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/namespace.c.orig linux-2.6.34.noarch/fs/nfs/namespace.c
+--- linux-2.6.34.noarch/fs/nfs/namespace.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/namespace.c	2010-08-23 11:01:00.359420147 -0400
+@@ -105,8 +105,8 @@ static void * nfs_follow_mountpoint(stru
+ 	struct vfsmount *mnt;
+ 	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ 	struct dentry *parent;
+-	struct nfs_fh fh;
+-	struct nfs_fattr fattr;
++	struct nfs_fh *fh = NULL;
++	struct nfs_fattr *fattr = NULL;
+ 	int err;
+ 
+ 	dprintk("--> nfs_follow_mountpoint()\n");
+@@ -115,6 +115,12 @@ static void * nfs_follow_mountpoint(stru
+ 	if (IS_ROOT(dentry))
+ 		goto out_err;
+ 
++	err = -ENOMEM;
++	fh = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	if (fh == NULL || fattr == NULL)
++		goto out_err;
++
+ 	dprintk("%s: enter\n", __func__);
+ 	dput(nd->path.dentry);
+ 	nd->path.dentry = dget(dentry);
+@@ -123,16 +129,16 @@ static void * nfs_follow_mountpoint(stru
+ 	parent = dget_parent(nd->path.dentry);
+ 	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+ 						  &nd->path.dentry->d_name,
+-						  &fh, &fattr);
++						  fh, fattr);
+ 	dput(parent);
+ 	if (err != 0)
+ 		goto out_err;
+ 
+-	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL)
++	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+ 		mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
+ 	else
+-		mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh,
+-				      &fattr);
++		mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh,
++				      fattr);
+ 	err = PTR_ERR(mnt);
+ 	if (IS_ERR(mnt))
+ 		goto out_err;
+@@ -151,6 +157,8 @@ static void * nfs_follow_mountpoint(stru
+ 	nd->path.dentry = dget(mnt->mnt_root);
+ 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+ out:
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fh);
+ 	dprintk("%s: done, returned %d\n", __func__, err);
+ 
+ 	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3acl.c.orig linux-2.6.34.noarch/fs/nfs/nfs3acl.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3acl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3acl.c	2010-08-23 11:01:00.359420147 -0400
+@@ -185,7 +185,6 @@ static void nfs3_cache_acls(struct inode
+ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
+ 	struct page *pages[NFSACL_MAXPAGES] = { };
+ 	struct nfs3_getaclargs args = {
+ 		.fh = NFS_FH(inode),
+@@ -193,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 		.pages = pages,
+ 	};
+ 	struct nfs3_getaclres res = {
+-		.fattr =	&fattr,
++		0
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_argp	= &args,
+@@ -228,7 +227,10 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 
+ 	dprintk("NFS call getacl\n");
+ 	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+-	nfs_fattr_init(&fattr);
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		return ERR_PTR(-ENOMEM);
++
+ 	status = rpc_call_sync(server->client_acl, &msg, 0);
+ 	dprintk("NFS reply getacl: %d\n", status);
+ 
+@@ -238,7 +240,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ 
+ 	switch (status) {
+ 		case 0:
+-			status = nfs_refresh_inode(inode, &fattr);
++			status = nfs_refresh_inode(inode, res.fattr);
+ 			break;
+ 		case -EPFNOSUPPORT:
+ 		case -EPROTONOSUPPORT:
+@@ -278,6 +280,7 @@ struct posix_acl *nfs3_proc_getacl(struc
+ getout:
+ 	posix_acl_release(res.acl_access);
+ 	posix_acl_release(res.acl_default);
++	nfs_free_fattr(res.fattr);
+ 
+ 	if (status != 0) {
+ 		posix_acl_release(acl);
+@@ -290,7 +293,7 @@ static int nfs3_proc_setacls(struct inod
+ 		  struct posix_acl *dfacl)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
++	struct nfs_fattr *fattr;
+ 	struct page *pages[NFSACL_MAXPAGES];
+ 	struct nfs3_setaclargs args = {
+ 		.inode = inode,
+@@ -335,8 +338,13 @@ static int nfs3_proc_setacls(struct inod
+ 	}
+ 
+ 	dprintk("NFS call setacl\n");
++	status = -ENOMEM;
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out_freepages;
++
+ 	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+-	nfs_fattr_init(&fattr);
++	msg.rpc_resp = fattr;
+ 	status = rpc_call_sync(server->client_acl, &msg, 0);
+ 	nfs_access_zap_cache(inode);
+ 	nfs_zap_acl_cache(inode);
+@@ -344,7 +352,7 @@ static int nfs3_proc_setacls(struct inod
+ 
+ 	switch (status) {
+ 		case 0:
+-			status = nfs_refresh_inode(inode, &fattr);
++			status = nfs_refresh_inode(inode, fattr);
+ 			nfs3_cache_acls(inode, acl, dfacl);
+ 			break;
+ 		case -EPFNOSUPPORT:
+@@ -355,6 +363,7 @@ static int nfs3_proc_setacls(struct inod
+ 		case -ENOTSUPP:
+ 			status = -EOPNOTSUPP;
+ 	}
++	nfs_free_fattr(fattr);
+ out_freepages:
+ 	while (args.npages != 0) {
+ 		args.npages--;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 11:01:00.360574301 -0400
+@@ -144,14 +144,12 @@ static int
+ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
+ 		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ {
+-	struct nfs_fattr	dir_attr;
+ 	struct nfs3_diropargs	arg = {
+ 		.fh		= NFS_FH(dir),
+ 		.name		= name->name,
+ 		.len		= name->len
+ 	};
+ 	struct nfs3_diropres	res = {
+-		.dir_attr	= &dir_attr,
+ 		.fh		= fhandle,
+ 		.fattr		= fattr
+ 	};
+@@ -163,29 +161,30 @@ nfs3_proc_lookup(struct inode *dir, stru
+ 	int			status;
+ 
+ 	dprintk("NFS call  lookup %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		return -ENOMEM;
++
+ 	nfs_fattr_init(fattr);
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_refresh_inode(dir, &dir_attr);
++	nfs_refresh_inode(dir, res.dir_attr);
+ 	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+ 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+ 		msg.rpc_argp = fhandle;
+ 		msg.rpc_resp = fattr;
+ 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	}
++	nfs_free_fattr(res.dir_attr);
+ 	dprintk("NFS reply lookup: %d\n", status);
+ 	return status;
+ }
+ 
+ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+ {
+-	struct nfs_fattr	fattr;
+ 	struct nfs3_accessargs	arg = {
+ 		.fh		= NFS_FH(inode),
+ 	};
+-	struct nfs3_accessres	res = {
+-		.fattr		= &fattr,
+-	};
++	struct nfs3_accessres	res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
+ 		.rpc_argp	= &arg,
+@@ -193,7 +192,7 @@ static int nfs3_proc_access(struct inode
+ 		.rpc_cred	= entry->cred,
+ 	};
+ 	int mode = entry->mask;
+-	int status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  access\n");
+ 
+@@ -210,9 +209,13 @@ static int nfs3_proc_access(struct inode
+ 		if (mode & MAY_EXEC)
+ 			arg.access |= NFS3_ACCESS_EXECUTE;
+ 	}
+-	nfs_fattr_init(&fattr);
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_refresh_inode(inode, &fattr);
++	nfs_refresh_inode(inode, res.fattr);
+ 	if (status == 0) {
+ 		entry->mask = 0;
+ 		if (res.access & NFS3_ACCESS_READ)
+@@ -222,6 +225,8 @@ static int nfs3_proc_access(struct inode
+ 		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
+ 			entry->mask |= MAY_EXEC;
+ 	}
++	nfs_free_fattr(res.fattr);
++out:
+ 	dprintk("NFS reply access: %d\n", status);
+ 	return status;
+ }
+@@ -229,7 +234,7 @@ static int nfs3_proc_access(struct inode
+ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
+ 		unsigned int pgbase, unsigned int pglen)
+ {
+-	struct nfs_fattr	fattr;
++	struct nfs_fattr	*fattr;
+ 	struct nfs3_readlinkargs args = {
+ 		.fh		= NFS_FH(inode),
+ 		.pgbase		= pgbase,
+@@ -239,14 +244,19 @@ static int nfs3_proc_readlink(struct ino
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READLINK],
+ 		.rpc_argp	= &args,
+-		.rpc_resp	= &fattr,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  readlink\n");
+-	nfs_fattr_init(&fattr);
++	fattr = nfs_alloc_fattr();
++	if (fattr == NULL)
++		goto out;
++	msg.rpc_resp = fattr;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_refresh_inode(inode, &fattr);
++	nfs_refresh_inode(inode, fattr);
++	nfs_free_fattr(fattr);
++out:
+ 	dprintk("NFS reply readlink: %d\n", status);
+ 	return status;
+ }
+@@ -396,12 +406,17 @@ nfs3_proc_remove(struct inode *dir, stru
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  remove %s\n", name->name);
+-	nfs_fattr_init(&res.dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_post_op_update_inode(dir, &res.dir_attr);
++	nfs_post_op_update_inode(dir, res.dir_attr);
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	dprintk("NFS reply remove: %d\n", status);
+ 	return status;
+ }
+@@ -419,7 +434,7 @@ nfs3_proc_unlink_done(struct rpc_task *t
+ 	if (nfs3_async_handle_jukebox(task, dir))
+ 		return 0;
+ 	res = task->tk_msg.rpc_resp;
+-	nfs_post_op_update_inode(dir, &res->dir_attr);
++	nfs_post_op_update_inode(dir, res->dir_attr);
+ 	return 1;
+ }
+ 
+@@ -427,7 +442,6 @@ static int
+ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ 		 struct inode *new_dir, struct qstr *new_name)
+ {
+-	struct nfs_fattr	old_dir_attr, new_dir_attr;
+ 	struct nfs3_renameargs	arg = {
+ 		.fromfh		= NFS_FH(old_dir),
+ 		.fromname	= old_name->name,
+@@ -436,23 +450,27 @@ nfs3_proc_rename(struct inode *old_dir, 
+ 		.toname		= new_name->name,
+ 		.tolen		= new_name->len
+ 	};
+-	struct nfs3_renameres	res = {
+-		.fromattr	= &old_dir_attr,
+-		.toattr		= &new_dir_attr
+-	};
++	struct nfs3_renameres res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
+ 		.rpc_argp	= &arg,
+ 		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
+-	nfs_fattr_init(&old_dir_attr);
+-	nfs_fattr_init(&new_dir_attr);
++
++	res.fromattr = nfs_alloc_fattr();
++	res.toattr = nfs_alloc_fattr();
++	if (res.fromattr == NULL || res.toattr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+-	nfs_post_op_update_inode(old_dir, &old_dir_attr);
+-	nfs_post_op_update_inode(new_dir, &new_dir_attr);
++	nfs_post_op_update_inode(old_dir, res.fromattr);
++	nfs_post_op_update_inode(new_dir, res.toattr);
++out:
++	nfs_free_fattr(res.toattr);
++	nfs_free_fattr(res.fromattr);
+ 	dprintk("NFS reply rename: %d\n", status);
+ 	return status;
+ }
+@@ -460,30 +478,32 @@ nfs3_proc_rename(struct inode *old_dir, 
+ static int
+ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+ {
+-	struct nfs_fattr	dir_attr, fattr;
+ 	struct nfs3_linkargs	arg = {
+ 		.fromfh		= NFS_FH(inode),
+ 		.tofh		= NFS_FH(dir),
+ 		.toname		= name->name,
+ 		.tolen		= name->len
+ 	};
+-	struct nfs3_linkres	res = {
+-		.dir_attr	= &dir_attr,
+-		.fattr		= &fattr
+-	};
++	struct nfs3_linkres	res;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_LINK],
+ 		.rpc_argp	= &arg,
+ 		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  link %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
+-	nfs_fattr_init(&fattr);
++	res.fattr = nfs_alloc_fattr();
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.fattr == NULL || res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+-	nfs_post_op_update_inode(dir, &dir_attr);
+-	nfs_post_op_update_inode(inode, &fattr);
++	nfs_post_op_update_inode(dir, res.dir_attr);
++	nfs_post_op_update_inode(inode, res.fattr);
++out:
++	nfs_free_fattr(res.dir_attr);
++	nfs_free_fattr(res.fattr);
+ 	dprintk("NFS reply link: %d\n", status);
+ 	return status;
+ }
+@@ -554,7 +574,7 @@ out:
+ static int
+ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
+ {
+-	struct nfs_fattr	dir_attr;
++	struct nfs_fattr	*dir_attr;
+ 	struct nfs3_diropargs	arg = {
+ 		.fh		= NFS_FH(dir),
+ 		.name		= name->name,
+@@ -563,14 +583,19 @@ nfs3_proc_rmdir(struct inode *dir, struc
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs3_procedures[NFS3PROC_RMDIR],
+ 		.rpc_argp	= &arg,
+-		.rpc_resp	= &dir_attr,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  rmdir %s\n", name->name);
+-	nfs_fattr_init(&dir_attr);
++	dir_attr = nfs_alloc_fattr();
++	if (dir_attr == NULL)
++		goto out;
++
++	msg.rpc_resp = dir_attr;
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+-	nfs_post_op_update_inode(dir, &dir_attr);
++	nfs_post_op_update_inode(dir, dir_attr);
++	nfs_free_fattr(dir_attr);
++out:
+ 	dprintk("NFS reply rmdir: %d\n", status);
+ 	return status;
+ }
+@@ -589,7 +614,6 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		  u64 cookie, struct page *page, unsigned int count, int plus)
+ {
+ 	struct inode		*dir = dentry->d_inode;
+-	struct nfs_fattr	dir_attr;
+ 	__be32			*verf = NFS_COOKIEVERF(dir);
+ 	struct nfs3_readdirargs	arg = {
+ 		.fh		= NFS_FH(dir),
+@@ -600,7 +624,6 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		.pages		= &page
+ 	};
+ 	struct nfs3_readdirres	res = {
+-		.dir_attr	= &dir_attr,
+ 		.verf		= verf,
+ 		.plus		= plus
+ 	};
+@@ -610,7 +633,7 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 		.rpc_resp	= &res,
+ 		.rpc_cred	= cred
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	if (plus)
+ 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+@@ -618,12 +641,17 @@ nfs3_proc_readdir(struct dentry *dentry,
+ 	dprintk("NFS call  readdir%s %d\n",
+ 			plus? "plus" : "", (unsigned int) cookie);
+ 
+-	nfs_fattr_init(&dir_attr);
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 
+ 	nfs_invalidate_atime(dir);
++	nfs_refresh_inode(dir, res.dir_attr);
+ 
+-	nfs_refresh_inode(dir, &dir_attr);
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	dprintk("NFS reply readdir: %d\n", status);
+ 	return status;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs3xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3xdr.c	2010-08-23 11:01:00.361593802 -0400
+@@ -762,7 +762,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, _
+ static int
+ nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
+ {
+-	return nfs3_xdr_wccstat(req, p, &res->dir_attr);
++	return nfs3_xdr_wccstat(req, p, res->dir_attr);
+ }
+ 
+ /*
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 11:01:00.362574935 -0400
+@@ -206,14 +206,14 @@ extern ssize_t nfs4_listxattr(struct den
+ 
+ 
+ /* nfs4proc.c */
+-extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+-extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
++extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
++extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
+ extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
+ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
+ extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
+-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
++extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
+ extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+@@ -286,7 +286,7 @@ extern void nfs4_put_lock_state(struct n
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+ extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
+ 
+-extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
++extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
+ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4namespace.c.orig linux-2.6.34.noarch/fs/nfs/nfs4namespace.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4namespace.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4namespace.c	2010-08-23 11:01:00.363574219 -0400
+@@ -115,6 +115,7 @@ static struct vfsmount *try_location(str
+ 				     char *page, char *page2,
+ 				     const struct nfs4_fs_location *location)
+ {
++	const size_t addr_bufsize = sizeof(struct sockaddr_storage);
+ 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+ 	char *mnt_path;
+ 	unsigned int maxbuflen;
+@@ -126,9 +127,12 @@ static struct vfsmount *try_location(str
+ 	mountdata->mnt_path = mnt_path;
+ 	maxbuflen = mnt_path - 1 - page2;
+ 
++	mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
++	if (mountdata->addr == NULL)
++		return ERR_PTR(-ENOMEM);
++
+ 	for (s = 0; s < location->nservers; s++) {
+ 		const struct nfs4_string *buf = &location->servers[s];
+-		struct sockaddr_storage addr;
+ 
+ 		if (buf->len <= 0 || buf->len >= maxbuflen)
+ 			continue;
+@@ -137,11 +141,10 @@ static struct vfsmount *try_location(str
+ 			continue;
+ 
+ 		mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
+-				(struct sockaddr *)&addr, sizeof(addr));
++				mountdata->addr, addr_bufsize);
+ 		if (mountdata->addrlen == 0)
+ 			continue;
+ 
+-		mountdata->addr = (struct sockaddr *)&addr;
+ 		rpc_set_port(mountdata->addr, NFS_PORT);
+ 
+ 		memcpy(page2, buf->data, buf->len);
+@@ -156,6 +159,7 @@ static struct vfsmount *try_location(str
+ 		if (!IS_ERR(mnt))
+ 			break;
+ 	}
++	kfree(mountdata->addr);
+ 	return mnt;
+ }
+ 
+@@ -221,8 +225,8 @@ out:
+ 
+ /*
+  * nfs_do_refmount - handle crossing a referral on server
++ * @mnt_parent - mountpoint of referral
+  * @dentry - dentry of referral
+- * @nd - nameidata info
+  *
+  */
+ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 11:01:00.365544029 -0400
+@@ -70,6 +70,9 @@ static int nfs4_do_fsinfo(struct nfs_ser
+ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
++static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
++			    struct nfs_fattr *fattr, struct iattr *sattr,
++			    struct nfs4_state *state);
+ 
+ /* Prevent leaks of NFSv4 errors into userland */
+ static int nfs4_map_errors(int err)
+@@ -714,17 +717,18 @@ static void nfs4_init_opendata_res(struc
+ 
+ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
+ 		struct nfs4_state_owner *sp, fmode_t fmode, int flags,
+-		const struct iattr *attrs)
++		const struct iattr *attrs,
++		gfp_t gfp_mask)
+ {
+ 	struct dentry *parent = dget_parent(path->dentry);
+ 	struct inode *dir = parent->d_inode;
+ 	struct nfs_server *server = NFS_SERVER(dir);
+ 	struct nfs4_opendata *p;
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), gfp_mask);
+ 	if (p == NULL)
+ 		goto err;
+-	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
++	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
+ 	if (p->o_arg.seqid == NULL)
+ 		goto err_free;
+ 	path_get(path);
+@@ -1060,7 +1064,7 @@ static struct nfs4_opendata *nfs4_open_r
+ {
+ 	struct nfs4_opendata *opendata;
+ 
+-	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
++	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS);
+ 	if (opendata == NULL)
+ 		return ERR_PTR(-ENOMEM);
+ 	opendata->state = state;
+@@ -1648,7 +1652,7 @@ static int _nfs4_do_open(struct inode *d
+ 	if (path->dentry->d_inode != NULL)
+ 		nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
+ 	status = -ENOMEM;
+-	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
++	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL);
+ 	if (opendata == NULL)
+ 		goto err_put_state_owner;
+ 
+@@ -1659,15 +1663,24 @@ static int _nfs4_do_open(struct inode *d
+ 	if (status != 0)
+ 		goto err_opendata_put;
+ 
+-	if (opendata->o_arg.open_flags & O_EXCL)
+-		nfs4_exclusive_attrset(opendata, sattr);
+-
+ 	state = nfs4_opendata_to_nfs4_state(opendata);
+ 	status = PTR_ERR(state);
+ 	if (IS_ERR(state))
+ 		goto err_opendata_put;
+ 	if (server->caps & NFS_CAP_POSIX_LOCK)
+ 		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
++
++	if (opendata->o_arg.open_flags & O_EXCL) {
++		nfs4_exclusive_attrset(opendata, sattr);
++
++		nfs_fattr_init(opendata->o_res.f_attr);
++		status = nfs4_do_setattr(state->inode, cred,
++				opendata->o_res.f_attr, sattr,
++				state);
++		if (status == 0)
++			nfs_setattr_update_inode(state->inode, sattr);
++		nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
++	}
+ 	nfs4_opendata_put(opendata);
+ 	nfs4_put_state_owner(sp);
+ 	*res = state;
+@@ -1914,7 +1927,7 @@ static const struct rpc_call_ops nfs4_cl
+  *
+  * NOTE: Caller must be holding the sp->so_owner semaphore!
+  */
+-int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
++int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
+ {
+ 	struct nfs_server *server = NFS_SERVER(state->inode);
+ 	struct nfs4_closedata *calldata;
+@@ -1933,7 +1946,7 @@ int nfs4_do_close(struct path *path, str
+ 	};
+ 	int status = -ENOMEM;
+ 
+-	calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
++	calldata = kzalloc(sizeof(*calldata), gfp_mask);
+ 	if (calldata == NULL)
+ 		goto out;
+ 	calldata->inode = state->inode;
+@@ -1941,7 +1954,7 @@ int nfs4_do_close(struct path *path, str
+ 	calldata->arg.fh = NFS_FH(state->inode);
+ 	calldata->arg.stateid = &state->open_stateid;
+ 	/* Serialization for the sequence id */
+-	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
++	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
+ 	if (calldata->arg.seqid == NULL)
+ 		goto out_free_calldata;
+ 	calldata->arg.fmode = 0;
+@@ -2404,14 +2417,12 @@ static int nfs4_proc_lookup(struct inode
+ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+ {
+ 	struct nfs_server *server = NFS_SERVER(inode);
+-	struct nfs_fattr fattr;
+ 	struct nfs4_accessargs args = {
+ 		.fh = NFS_FH(inode),
+ 		.bitmask = server->attr_bitmask,
+ 	};
+ 	struct nfs4_accessres res = {
+ 		.server = server,
+-		.fattr = &fattr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
+@@ -2438,7 +2449,11 @@ static int _nfs4_proc_access(struct inod
+ 		if (mode & MAY_EXEC)
+ 			args.access |= NFS4_ACCESS_EXECUTE;
+ 	}
+-	nfs_fattr_init(&fattr);
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		return -ENOMEM;
++
+ 	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+ 	if (!status) {
+ 		entry->mask = 0;
+@@ -2448,8 +2463,9 @@ static int _nfs4_proc_access(struct inod
+ 			entry->mask |= MAY_WRITE;
+ 		if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
+ 			entry->mask |= MAY_EXEC;
+-		nfs_refresh_inode(inode, &fattr);
++		nfs_refresh_inode(inode, res.fattr);
+ 	}
++	nfs_free_fattr(res.fattr);
+ 	return status;
+ }
+ 
+@@ -2562,13 +2578,6 @@ nfs4_proc_create(struct inode *dir, stru
+ 	}
+ 	d_add(dentry, igrab(state->inode));
+ 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+-	if (flags & O_EXCL) {
+-		struct nfs_fattr fattr;
+-		status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state);
+-		if (status == 0)
+-			nfs_setattr_update_inode(state->inode, sattr);
+-		nfs_post_op_update_inode(state->inode, &fattr);
+-	}
+ 	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
+ 		status = nfs4_intent_set_file(nd, &path, state, fmode);
+ 	else
+@@ -2596,14 +2605,19 @@ static int _nfs4_proc_remove(struct inod
+ 		.rpc_argp = &args,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
++
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.dir_attr == NULL)
++		goto out;
+ 
+-	nfs_fattr_init(&res.dir_attr);
+ 	status = nfs4_call_sync(server, &msg, &args, &res, 1);
+ 	if (status == 0) {
+ 		update_changeattr(dir, &res.cinfo);
+-		nfs_post_op_update_inode(dir, &res.dir_attr);
++		nfs_post_op_update_inode(dir, res.dir_attr);
+ 	}
++	nfs_free_fattr(res.dir_attr);
++out:
+ 	return status;
+ }
+ 
+@@ -2638,7 +2652,7 @@ static int nfs4_proc_unlink_done(struct 
+ 	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+-	nfs_post_op_update_inode(dir, &res->dir_attr);
++	nfs_post_op_update_inode(dir, res->dir_attr);
+ 	return 1;
+ }
+ 
+@@ -2653,29 +2667,31 @@ static int _nfs4_proc_rename(struct inod
+ 		.new_name = new_name,
+ 		.bitmask = server->attr_bitmask,
+ 	};
+-	struct nfs_fattr old_fattr, new_fattr;
+ 	struct nfs4_rename_res res = {
+ 		.server = server,
+-		.old_fattr = &old_fattr,
+-		.new_fattr = &new_fattr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 	
+-	nfs_fattr_init(res.old_fattr);
+-	nfs_fattr_init(res.new_fattr);
+-	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
++	res.old_fattr = nfs_alloc_fattr();
++	res.new_fattr = nfs_alloc_fattr();
++	if (res.old_fattr == NULL || res.new_fattr == NULL)
++		goto out;
+ 
++	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+ 	if (!status) {
+ 		update_changeattr(old_dir, &res.old_cinfo);
+ 		nfs_post_op_update_inode(old_dir, res.old_fattr);
+ 		update_changeattr(new_dir, &res.new_cinfo);
+ 		nfs_post_op_update_inode(new_dir, res.new_fattr);
+ 	}
++out:
++	nfs_free_fattr(res.new_fattr);
++	nfs_free_fattr(res.old_fattr);
+ 	return status;
+ }
+ 
+@@ -2702,28 +2718,30 @@ static int _nfs4_proc_link(struct inode 
+ 		.name   = name,
+ 		.bitmask = server->attr_bitmask,
+ 	};
+-	struct nfs_fattr fattr, dir_attr;
+ 	struct nfs4_link_res res = {
+ 		.server = server,
+-		.fattr = &fattr,
+-		.dir_attr = &dir_attr,
+ 	};
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
+ 		.rpc_argp = &arg,
+ 		.rpc_resp = &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
++
++	res.fattr = nfs_alloc_fattr();
++	res.dir_attr = nfs_alloc_fattr();
++	if (res.fattr == NULL || res.dir_attr == NULL)
++		goto out;
+ 
+-	nfs_fattr_init(res.fattr);
+-	nfs_fattr_init(res.dir_attr);
+ 	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+ 	if (!status) {
+ 		update_changeattr(dir, &res.cinfo);
+ 		nfs_post_op_update_inode(dir, res.dir_attr);
+ 		nfs_post_op_update_inode(inode, res.fattr);
+ 	}
+-
++out:
++	nfs_free_fattr(res.dir_attr);
++	nfs_free_fattr(res.fattr);
+ 	return status;
+ }
+ 
+@@ -3146,23 +3164,31 @@ static void nfs4_proc_commit_setup(struc
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
++struct nfs4_renewdata {
++	struct nfs_client	*client;
++	unsigned long		timestamp;
++};
++
+ /*
+  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
+  * standalone procedure for queueing an asynchronous RENEW.
+  */
+-static void nfs4_renew_release(void *data)
++static void nfs4_renew_release(void *calldata)
+ {
+-	struct nfs_client *clp = data;
++	struct nfs4_renewdata *data = calldata;
++	struct nfs_client *clp = data->client;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(data);
+ }
+ 
+-static void nfs4_renew_done(struct rpc_task *task, void *data)
++static void nfs4_renew_done(struct rpc_task *task, void *calldata)
+ {
+-	struct nfs_client *clp = data;
+-	unsigned long timestamp = task->tk_start;
++	struct nfs4_renewdata *data = calldata;
++	struct nfs_client *clp = data->client;
++	unsigned long timestamp = data->timestamp;
+ 
+ 	if (task->tk_status < 0) {
+ 		/* Unless we're shutting down, schedule state recovery! */
+@@ -3188,11 +3214,17 @@ int nfs4_proc_async_renew(struct nfs_cli
+ 		.rpc_argp	= clp,
+ 		.rpc_cred	= cred,
+ 	};
++	struct nfs4_renewdata *data;
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+ 		return -EIO;
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++	if (data == NULL)
++		return -ENOMEM;
++	data->client = clp;
++	data->timestamp = jiffies;
+ 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			&nfs4_renew_ops, clp);
++			&nfs4_renew_ops, data);
+ }
+ 
+ int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
+@@ -3494,7 +3526,9 @@ nfs4_async_handle_error(struct rpc_task 
+ 	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+ }
+ 
+-int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
++int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
++		unsigned short port, struct rpc_cred *cred,
++		struct nfs4_setclientid_res *res)
+ {
+ 	nfs4_verifier sc_verifier;
+ 	struct nfs4_setclientid setclientid = {
+@@ -3504,7 +3538,7 @@ int nfs4_proc_setclientid(struct nfs_cli
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
+ 		.rpc_argp = &setclientid,
+-		.rpc_resp = clp,
++		.rpc_resp = res,
+ 		.rpc_cred = cred,
+ 	};
+ 	__be32 *p;
+@@ -3547,12 +3581,14 @@ int nfs4_proc_setclientid(struct nfs_cli
+ 	return status;
+ }
+ 
+-static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
++static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
++		struct nfs4_setclientid_res *arg,
++		struct rpc_cred *cred)
+ {
+ 	struct nfs_fsinfo fsinfo;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
+-		.rpc_argp = clp,
++		.rpc_argp = arg,
+ 		.rpc_resp = &fsinfo,
+ 		.rpc_cred = cred,
+ 	};
+@@ -3570,12 +3606,14 @@ static int _nfs4_proc_setclientid_confir
+ 	return status;
+ }
+ 
+-int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
++int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
++		struct nfs4_setclientid_res *arg,
++		struct rpc_cred *cred)
+ {
+ 	long timeout = 0;
+ 	int err;
+ 	do {
+-		err = _nfs4_proc_setclientid_confirm(clp, cred);
++		err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
+ 		switch (err) {
+ 			case 0:
+ 				return err;
+@@ -3667,7 +3705,7 @@ static int _nfs4_proc_delegreturn(struct
+ 	};
+ 	int status = 0;
+ 
+-	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	data = kzalloc(sizeof(*data), GFP_NOFS);
+ 	if (data == NULL)
+ 		return -ENOMEM;
+ 	data->args.fhandle = &data->fh;
+@@ -3823,7 +3861,7 @@ static struct nfs4_unlockdata *nfs4_allo
+ 	struct nfs4_unlockdata *p;
+ 	struct inode *inode = lsp->ls_state->inode;
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), GFP_NOFS);
+ 	if (p == NULL)
+ 		return NULL;
+ 	p->arg.fh = NFS_FH(inode);
+@@ -3961,7 +3999,7 @@ static int nfs4_proc_unlck(struct nfs4_s
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 		goto out;
+ 	lsp = request->fl_u.nfs4_fl.owner;
+-	seqid = nfs_alloc_seqid(&lsp->ls_seqid);
++	seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
+ 	status = -ENOMEM;
+ 	if (seqid == NULL)
+ 		goto out;
+@@ -3989,22 +4027,23 @@ struct nfs4_lockdata {
+ };
+ 
+ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
+-		struct nfs_open_context *ctx, struct nfs4_lock_state *lsp)
++		struct nfs_open_context *ctx, struct nfs4_lock_state *lsp,
++		gfp_t gfp_mask)
+ {
+ 	struct nfs4_lockdata *p;
+ 	struct inode *inode = lsp->ls_state->inode;
+ 	struct nfs_server *server = NFS_SERVER(inode);
+ 
+-	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	p = kzalloc(sizeof(*p), gfp_mask);
+ 	if (p == NULL)
+ 		return NULL;
+ 
+ 	p->arg.fh = NFS_FH(inode);
+ 	p->arg.fl = &p->fl;
+-	p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
++	p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
+ 	if (p->arg.open_seqid == NULL)
+ 		goto out_free;
+-	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
++	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
+ 	if (p->arg.lock_seqid == NULL)
+ 		goto out_free_seqid;
+ 	p->arg.lock_stateid = &lsp->ls_stateid;
+@@ -4158,7 +4197,8 @@ static int _nfs4_do_setlk(struct nfs4_st
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 	data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
+-			fl->fl_u.nfs4_fl.owner);
++			fl->fl_u.nfs4_fl.owner,
++			recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
+ 	if (data == NULL)
+ 		return -ENOMEM;
+ 	if (IS_SETLKW(cmd))
+@@ -4647,7 +4687,7 @@ static int nfs4_reset_slot_table(struct 
+ 	if (max_reqs != tbl->max_slots) {
+ 		ret = -ENOMEM;
+ 		new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
+-			      GFP_KERNEL);
++			      GFP_NOFS);
+ 		if (!new)
+ 			goto out;
+ 		ret = 0;
+@@ -4712,7 +4752,7 @@ static int nfs4_init_slot_table(struct n
+ 
+ 	dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
+ 
+-	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
++	slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
+ 	if (!slot)
+ 		goto out;
+ 	ret = 0;
+@@ -4761,7 +4801,7 @@ struct nfs4_session *nfs4_alloc_session(
+ 	struct nfs4_session *session;
+ 	struct nfs4_slot_table *tbl;
+ 
+-	session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
++	session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
+ 	if (!session)
+ 		return NULL;
+ 
+@@ -5105,8 +5145,8 @@ static int nfs41_proc_async_sequence(str
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+ 		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_KERNEL);
+-	res = kzalloc(sizeof(*res), GFP_KERNEL);
++	args = kzalloc(sizeof(*args), GFP_NOFS);
++	res = kzalloc(sizeof(*res), GFP_NOFS);
+ 	if (!args || !res) {
+ 		kfree(args);
+ 		kfree(res);
+@@ -5207,7 +5247,7 @@ static int nfs41_proc_reclaim_complete(s
+ 	int status = -ENOMEM;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
++	calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
+ 	if (calldata == NULL)
+ 		goto out;
+ 	calldata->clp = clp;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 11:01:00.367574218 -0400
+@@ -62,6 +62,7 @@ static LIST_HEAD(nfs4_clientid_list);
+ 
+ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
+ {
++	struct nfs4_setclientid_res clid;
+ 	unsigned short port;
+ 	int status;
+ 
+@@ -69,11 +70,15 @@ int nfs4_init_clientid(struct nfs_client
+ 	if (clp->cl_addr.ss_family == AF_INET6)
+ 		port = nfs_callback_tcpport6;
+ 
+-	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
+-	if (status == 0)
+-		status = nfs4_proc_setclientid_confirm(clp, cred);
+-	if (status == 0)
+-		nfs4_schedule_state_renewal(clp);
++	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
++	if (status != 0)
++		goto out;
++	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
++	if (status != 0)
++		goto out;
++	clp->cl_clientid = clid.clientid;
++	nfs4_schedule_state_renewal(clp);
++out:
+ 	return status;
+ }
+ 
+@@ -361,7 +366,7 @@ nfs4_alloc_state_owner(void)
+ {
+ 	struct nfs4_state_owner *sp;
+ 
+-	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
++	sp = kzalloc(sizeof(*sp),GFP_NOFS);
+ 	if (!sp)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+@@ -435,7 +440,7 @@ nfs4_alloc_open_state(void)
+ {
+ 	struct nfs4_state *state;
+ 
+-	state = kzalloc(sizeof(*state), GFP_KERNEL);
++	state = kzalloc(sizeof(*state), GFP_NOFS);
+ 	if (!state)
+ 		return NULL;
+ 	atomic_set(&state->count, 1);
+@@ -537,7 +542,8 @@ void nfs4_put_open_state(struct nfs4_sta
+ /*
+  * Close the current file.
+  */
+-static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
++static void __nfs4_close(struct path *path, struct nfs4_state *state,
++		fmode_t fmode, gfp_t gfp_mask, int wait)
+ {
+ 	struct nfs4_state_owner *owner = state->owner;
+ 	int call_close = 0;
+@@ -578,17 +584,17 @@ static void __nfs4_close(struct path *pa
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+ 	} else
+-		nfs4_do_close(path, state, wait);
++		nfs4_do_close(path, state, gfp_mask, wait);
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+ {
+-	__nfs4_close(path, state, fmode, 0);
++	__nfs4_close(path, state, fmode, GFP_NOFS, 0);
+ }
+ 
+ void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
+ {
+-	__nfs4_close(path, state, fmode, 1);
++	__nfs4_close(path, state, fmode, GFP_KERNEL, 1);
+ }
+ 
+ /*
+@@ -618,7 +624,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 	struct nfs4_lock_state *lsp;
+ 	struct nfs_client *clp = state->owner->so_client;
+ 
+-	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
++	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+ 		return NULL;
+ 	rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
+@@ -754,11 +760,11 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 	nfs4_put_lock_state(lsp);
+ }
+ 
+-struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
++struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
+ {
+ 	struct nfs_seqid *new;
+ 
+-	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	new = kmalloc(sizeof(*new), gfp_mask);
+ 	if (new != NULL) {
+ 		new->sequence = counter;
+ 		INIT_LIST_HEAD(&new->list);
+@@ -1347,7 +1353,7 @@ static int nfs4_recall_slot(struct nfs_c
+ 
+ 	nfs4_begin_drain_session(clp);
+ 	new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
+-		      GFP_KERNEL);
++		      GFP_NOFS);
+         if (!new)
+ 		return -ENOMEM;
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 11:00:23.792491380 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 11:01:00.369544055 -0400
+@@ -1504,14 +1504,14 @@ static void encode_setclientid(struct xd
+ 	hdr->replen += decode_setclientid_maxsz;
+ }
+ 
+-static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
++static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
+ {
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
+-	p = xdr_encode_hyper(p, client_state->cl_clientid);
+-	xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
++	p = xdr_encode_hyper(p, arg->clientid);
++	xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setclientid_confirm_maxsz;
+ }
+@@ -2324,7 +2324,7 @@ static int nfs4_xdr_enc_setclientid(stru
+ /*
+  * a SETCLIENTID_CONFIRM request
+  */
+-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
++static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+@@ -2334,7 +2334,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+-	encode_setclientid_confirm(&xdr, clp, &hdr);
++	encode_setclientid_confirm(&xdr, arg, &hdr);
+ 	encode_putrootfh(&xdr, &hdr);
+ 	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+ 	encode_nops(&hdr);
+@@ -4397,7 +4397,7 @@ out_overflow:
+ 	return -EIO;
+ }
+ 
+-static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
++static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
+ {
+ 	__be32 *p;
+ 	uint32_t opnum;
+@@ -4417,8 +4417,8 @@ static int decode_setclientid(struct xdr
+ 		p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
+ 		if (unlikely(!p))
+ 			goto out_overflow;
+-		p = xdr_decode_hyper(p, &clp->cl_clientid);
+-		memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
++		p = xdr_decode_hyper(p, &res->clientid);
++		memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
+ 	} else if (nfserr == NFSERR_CLID_INUSE) {
+ 		uint32_t len;
+ 
+@@ -4815,7 +4815,7 @@ static int nfs4_xdr_dec_remove(struct rp
+ 		goto out;
+ 	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+ 		goto out;
+-	decode_getfattr(&xdr, &res->dir_attr, res->server,
++	decode_getfattr(&xdr, res->dir_attr, res->server,
+ 			!RPC_IS_ASYNC(rqstp->rq_task));
+ out:
+ 	return status;
+@@ -5498,7 +5498,7 @@ static int nfs4_xdr_dec_renew(struct rpc
+  * Decode SETCLIENTID response
+  */
+ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
+-		struct nfs_client *clp)
++		struct nfs4_setclientid_res *res)
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr;
+@@ -5507,7 +5507,7 @@ static int nfs4_xdr_dec_setclientid(stru
+ 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+ 	status = decode_compound_hdr(&xdr, &hdr);
+ 	if (!status)
+-		status = decode_setclientid(&xdr, clp);
++		status = decode_setclientid(&xdr, res);
+ 	return status;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/nfsroot.c.orig linux-2.6.34.noarch/fs/nfs/nfsroot.c
+--- linux-2.6.34.noarch/fs/nfs/nfsroot.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfsroot.c	2010-08-23 11:01:00.371574358 -0400
+@@ -488,7 +488,6 @@ static int __init root_nfs_ports(void)
+  */
+ static int __init root_nfs_get_handle(void)
+ {
+-	struct nfs_fh fh;
+ 	struct sockaddr_in sin;
+ 	unsigned int auth_flav_len = 0;
+ 	struct nfs_mount_request request = {
+@@ -499,21 +498,24 @@ static int __init root_nfs_get_handle(vo
+ 					NFS_MNT3_VERSION : NFS_MNT_VERSION,
+ 		.protocol	= (nfs_data.flags & NFS_MOUNT_TCP) ?
+ 					XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
+-		.fh		= &fh,
+ 		.auth_flav_len	= &auth_flav_len,
+ 	};
+-	int status;
++	int status = -ENOMEM;
+ 
++	request.fh = nfs_alloc_fhandle();
++	if (!request.fh)
++		goto out;
+ 	set_sockaddr(&sin, servaddr, htons(mount_port));
+ 	status = nfs_mount(&request);
+ 	if (status < 0)
+ 		printk(KERN_ERR "Root-NFS: Server returned error %d "
+ 				"while mounting %s\n", status, nfs_export_path);
+ 	else {
+-		nfs_data.root.size = fh.size;
+-		memcpy(nfs_data.root.data, fh.data, fh.size);
++		nfs_data.root.size = request.fh->size;
++		memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
+ 	}
+-
++	nfs_free_fhandle(request.fh);
++out:
+ 	return status;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 11:01:00.371574358 -0400
+@@ -60,16 +60,10 @@ nfs_create_request(struct nfs_open_conte
+ {
+ 	struct nfs_page		*req;
+ 
+-	for (;;) {
+-		/* try to allocate the request struct */
+-		req = nfs_page_alloc();
+-		if (req != NULL)
+-			break;
+-
+-		if (fatal_signal_pending(current))
+-			return ERR_PTR(-ERESTARTSYS);
+-		yield();
+-	}
++	/* try to allocate the request struct */
++	req = nfs_page_alloc();
++	if (req == NULL)
++		return ERR_PTR(-ENOMEM);
+ 
+ 	/* Initialize the request struct. Initially, we assume a
+ 	 * long write-back delay. This will be adjusted in
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 11:01:00.372574292 -0400
+@@ -224,35 +224,60 @@ static int nfs_proc_readlink(struct inod
+ 	return status;
+ }
+ 
++struct nfs_createdata {
++	struct nfs_createargs arg;
++	struct nfs_diropok res;
++	struct nfs_fh fhandle;
++	struct nfs_fattr fattr;
++};
++
++static struct nfs_createdata *nfs_alloc_createdata(struct inode *dir,
++		struct dentry *dentry, struct iattr *sattr)
++{
++	struct nfs_createdata *data;
++
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++
++	if (data != NULL) {
++		data->arg.fh = NFS_FH(dir);
++		data->arg.name = dentry->d_name.name;
++		data->arg.len = dentry->d_name.len;
++		data->arg.sattr = sattr;
++		nfs_fattr_init(&data->fattr);
++		data->fhandle.size = 0;
++		data->res.fh = &data->fhandle;
++		data->res.fattr = &data->fattr;
++	}
++	return data;
++};
++
++static void nfs_free_createdata(const struct nfs_createdata *data)
++{
++	kfree(data);
++}
++
+ static int
+ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ 		int flags, struct nameidata *nd)
+ {
+-	struct nfs_fh		fhandle;
+-	struct nfs_fattr	fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+-	nfs_fattr_init(&fattr);
+ 	dprintk("NFS call  create %s\n", dentry->d_name.name);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply create: %d\n", status);
+ 	return status;
+ }
+@@ -264,24 +289,12 @@ static int
+ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+ 	       dev_t rdev)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int status, mode;
++	umode_t mode;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
+ 
+@@ -294,17 +307,24 @@ nfs_proc_mknod(struct inode *dir, struct
+ 		sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
+ 	}
+ 
+-	nfs_fattr_init(&fattr);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 
+ 	if (status == -EINVAL && S_ISFIFO(mode)) {
+ 		sattr->ia_mode = mode;
+-		nfs_fattr_init(&fattr);
++		nfs_fattr_init(data->res.fattr);
+ 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	}
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply mknod: %d\n", status);
+ 	return status;
+ }
+@@ -398,8 +418,8 @@ static int
+ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+ 		 unsigned int len, struct iattr *sattr)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
++	struct nfs_fh *fh;
++	struct nfs_fattr *fattr;
+ 	struct nfs_symlinkargs	arg = {
+ 		.fromfh		= NFS_FH(dir),
+ 		.fromname	= dentry->d_name.name,
+@@ -412,12 +432,18 @@ nfs_proc_symlink(struct inode *dir, stru
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_SYMLINK],
+ 		.rpc_argp	= &arg,
+ 	};
+-	int			status;
++	int status = -ENAMETOOLONG;
++
++	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+ 
+ 	if (len > NFS2_MAXPATHLEN)
+-		return -ENAMETOOLONG;
++		goto out;
+ 
+-	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
++	fh = nfs_alloc_fhandle();
++	fattr = nfs_alloc_fattr();
++	status = -ENOMEM;
++	if (fh == NULL || fattr == NULL)
++		goto out;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -427,12 +453,12 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 * filehandle size to zero indicates to nfs_instantiate that it
+ 	 * should fill in the data with a LOOKUP call on the wire.
+ 	 */
+-	if (status == 0) {
+-		nfs_fattr_init(&fattr);
+-		fhandle.size = 0;
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
+-	}
++	if (status == 0)
++		status = nfs_instantiate(dentry, fh, fattr);
+ 
++	nfs_free_fattr(fattr);
++	nfs_free_fhandle(fh);
++out:
+ 	dprintk("NFS reply symlink: %d\n", status);
+ 	return status;
+ }
+@@ -440,31 +466,25 @@ nfs_proc_symlink(struct inode *dir, stru
+ static int
+ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
+ {
+-	struct nfs_fh fhandle;
+-	struct nfs_fattr fattr;
+-	struct nfs_createargs	arg = {
+-		.fh		= NFS_FH(dir),
+-		.name		= dentry->d_name.name,
+-		.len		= dentry->d_name.len,
+-		.sattr		= sattr
+-	};
+-	struct nfs_diropok	res = {
+-		.fh		= &fhandle,
+-		.fattr		= &fattr
+-	};
++	struct nfs_createdata *data;
+ 	struct rpc_message msg = {
+ 		.rpc_proc	= &nfs_procedures[NFSPROC_MKDIR],
+-		.rpc_argp	= &arg,
+-		.rpc_resp	= &res,
+ 	};
+-	int			status;
++	int status = -ENOMEM;
+ 
+ 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
+-	nfs_fattr_init(&fattr);
++	data = nfs_alloc_createdata(dir, dentry, sattr);
++	if (data == NULL)
++		goto out;
++	msg.rpc_argp = &data->arg;
++	msg.rpc_resp = &data->res;
++
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+ 	if (status == 0)
+-		status = nfs_instantiate(dentry, &fhandle, &fattr);
++		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
++	nfs_free_createdata(data);
++out:
+ 	dprintk("NFS reply mkdir: %d\n", status);
+ 	return status;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 11:01:00.373574317 -0400
+@@ -40,7 +40,7 @@ static mempool_t *nfs_rdata_mempool;
+ 
+ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+ {
+-	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
++	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
+ 
+ 	if (p) {
+ 		memset(p, 0, sizeof(*p));
+@@ -50,7 +50,7 @@ struct nfs_read_data *nfs_readdata_alloc
+ 		if (pagecount <= ARRAY_SIZE(p->page_array))
+ 			p->pagevec = p->page_array;
+ 		else {
+-			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
++			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ 			if (!p->pagevec) {
+ 				mempool_free(p, nfs_rdata_mempool);
+ 				p = NULL;
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 11:00:23.794511661 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 11:01:00.374564179 -0400
+@@ -141,7 +141,6 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_resvport, "resvport" },
+ 	{ Opt_noresvport, "noresvport" },
+ 	{ Opt_fscache, "fsc" },
+-	{ Opt_fscache_uniq, "fsc=%s" },
+ 	{ Opt_nofscache, "nofsc" },
+ 
+ 	{ Opt_port, "port=%s" },
+@@ -171,6 +170,7 @@ static const match_table_t nfs_mount_opt
+ 	{ Opt_mountaddr, "mountaddr=%s" },
+ 
+ 	{ Opt_lookupcache, "lookupcache=%s" },
++	{ Opt_fscache_uniq, "fsc=%s" },
+ 
+ 	{ Opt_err, NULL }
+ };
+@@ -423,15 +423,19 @@ static int nfs_statfs(struct dentry *den
+ 	unsigned char blockbits;
+ 	unsigned long blockres;
+ 	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
+-	struct nfs_fattr fattr;
+-	struct nfs_fsstat res = {
+-			.fattr = &fattr,
+-	};
+-	int error;
++	struct nfs_fsstat res;
++	int error = -ENOMEM;
++
++	res.fattr = nfs_alloc_fattr();
++	if (res.fattr == NULL)
++		goto out_err;
+ 
+ 	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
++
++	nfs_free_fattr(res.fattr);
+ 	if (error < 0)
+ 		goto out_err;
++
+ 	buf->f_type = NFS_SUPER_MAGIC;
+ 
+ 	/*
+@@ -1060,14 +1064,6 @@ static int nfs_parse_mount_options(char 
+ 			kfree(mnt->fscache_uniq);
+ 			mnt->fscache_uniq = NULL;
+ 			break;
+-		case Opt_fscache_uniq:
+-			string = match_strdup(args);
+-			if (!string)
+-				goto out_nomem;
+-			kfree(mnt->fscache_uniq);
+-			mnt->fscache_uniq = string;
+-			mnt->options |= NFS_OPTION_FSCACHE;
+-			break;
+ 
+ 		/*
+ 		 * options that take numeric values
+@@ -1398,6 +1394,14 @@ static int nfs_parse_mount_options(char 
+ 					return 0;
+ 			};
+ 			break;
++		case Opt_fscache_uniq:
++			string = match_strdup(args);
++			if (string == NULL)
++				goto out_nomem;
++			kfree(mnt->fscache_uniq);
++			mnt->fscache_uniq = string;
++			mnt->options |= NFS_OPTION_FSCACHE;
++			break;
+ 
+ 		/*
+ 		 * Special options
+@@ -2186,7 +2190,7 @@ static int nfs_get_sb(struct file_system
+ 	int error = -ENOMEM;
+ 
+ 	data = nfs_alloc_parsed_mount_data(3);
+-	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
++	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+ 		goto out_free_fh;
+ 
+@@ -2261,7 +2265,7 @@ out:
+ 	kfree(data->fscache_uniq);
+ 	security_free_mnt_opts(&data->lsm_opts);
+ out_free_fh:
+-	kfree(mntfh);
++	nfs_free_fhandle(mntfh);
+ 	kfree(data);
+ 	return error;
+ 
+@@ -2570,7 +2574,7 @@ static int nfs4_remote_get_sb(struct fil
+ 	};
+ 	int error = -ENOMEM;
+ 
+-	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
++	mntfh = nfs_alloc_fhandle();
+ 	if (data == NULL || mntfh == NULL)
+ 		goto out_free_fh;
+ 
+@@ -2628,7 +2632,7 @@ static int nfs4_remote_get_sb(struct fil
+ out:
+ 	security_free_mnt_opts(&data->lsm_opts);
+ out_free_fh:
+-	kfree(mntfh);
++	nfs_free_fhandle(mntfh);
+ 	return error;
+ 
+ out_free:
+@@ -2683,41 +2687,120 @@ out_freepage:
+ 	free_page((unsigned long)page);
+ }
+ 
++struct nfs_referral_count {
++	struct list_head list;
++	const struct task_struct *task;
++	unsigned int referral_count;
++};
++
++static LIST_HEAD(nfs_referral_count_list);
++static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
++
++static struct nfs_referral_count *nfs_find_referral_count(void)
++{
++	struct nfs_referral_count *p;
++
++	list_for_each_entry(p, &nfs_referral_count_list, list) {
++		if (p->task == current)
++			return p;
++	}
++	return NULL;
++}
++
++#define NFS_MAX_NESTED_REFERRALS 2
++
++static int nfs_referral_loop_protect(void)
++{
++	struct nfs_referral_count *p, *new;
++	int ret = -ENOMEM;
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		goto out;
++	new->task = current;
++	new->referral_count = 1;
++
++	ret = 0;
++	spin_lock(&nfs_referral_count_list_lock);
++	p = nfs_find_referral_count();
++	if (p != NULL) {
++		if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
++			ret = -ELOOP;
++		else
++			p->referral_count++;
++	} else {
++		list_add(&new->list, &nfs_referral_count_list);
++		new = NULL;
++	}
++	spin_unlock(&nfs_referral_count_list_lock);
++	kfree(new);
++out:
++	return ret;
++}
++
++static void nfs_referral_loop_unprotect(void)
++{
++	struct nfs_referral_count *p;
++
++	spin_lock(&nfs_referral_count_list_lock);
++	p = nfs_find_referral_count();
++	p->referral_count--;
++	if (p->referral_count == 0)
++		list_del(&p->list);
++	else
++		p = NULL;
++	spin_unlock(&nfs_referral_count_list_lock);
++	kfree(p);
++}
++
+ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
+ 		const char *export_path, struct vfsmount *mnt_target)
+ {
++	struct nameidata *nd = NULL;
+ 	struct mnt_namespace *ns_private;
+-	struct nameidata nd;
+ 	struct super_block *s;
+ 	int ret;
+ 
++	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
++	if (nd == NULL)
++		return -ENOMEM;
++
+ 	ns_private = create_mnt_ns(root_mnt);
+ 	ret = PTR_ERR(ns_private);
+ 	if (IS_ERR(ns_private))
+ 		goto out_mntput;
+ 
++	ret = nfs_referral_loop_protect();
++	if (ret != 0)
++		goto out_put_mnt_ns;
++
+ 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
+-			export_path, LOOKUP_FOLLOW, &nd);
++			export_path, LOOKUP_FOLLOW, nd);
+ 
++	nfs_referral_loop_unprotect();
+ 	put_mnt_ns(ns_private);
+ 
+ 	if (ret != 0)
+ 		goto out_err;
+ 
+-	s = nd.path.mnt->mnt_sb;
++	s = nd->path.mnt->mnt_sb;
+ 	atomic_inc(&s->s_active);
+ 	mnt_target->mnt_sb = s;
+-	mnt_target->mnt_root = dget(nd.path.dentry);
++	mnt_target->mnt_root = dget(nd->path.dentry);
+ 
+ 	/* Correct the device pathname */
+-	nfs_fix_devname(&nd.path, mnt_target);
++	nfs_fix_devname(&nd->path, mnt_target);
+ 
+-	path_put(&nd.path);
++	path_put(&nd->path);
++	kfree(nd);
+ 	down_write(&s->s_umount);
+ 	return 0;
++out_put_mnt_ns:
++	put_mnt_ns(ns_private);
+ out_mntput:
+ 	mntput(root_mnt);
+ out_err:
++	kfree(nd);
+ 	return ret;
+ }
+ 
+@@ -2888,17 +2971,21 @@ static int nfs4_remote_referral_get_sb(s
+ 	struct super_block *s;
+ 	struct nfs_server *server;
+ 	struct dentry *mntroot;
+-	struct nfs_fh mntfh;
++	struct nfs_fh *mntfh;
+ 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
+ 	struct nfs_sb_mountdata sb_mntdata = {
+ 		.mntflags = flags,
+ 	};
+-	int error;
++	int error = -ENOMEM;
+ 
+ 	dprintk("--> nfs4_referral_get_sb()\n");
+ 
++	mntfh = nfs_alloc_fhandle();
++	if (mntfh == NULL)
++		goto out_err_nofh;
++
+ 	/* create a new volume representation */
+-	server = nfs4_create_referral_server(data, &mntfh);
++	server = nfs4_create_referral_server(data, mntfh);
+ 	if (IS_ERR(server)) {
+ 		error = PTR_ERR(server);
+ 		goto out_err_noserver;
+@@ -2930,7 +3017,7 @@ static int nfs4_remote_referral_get_sb(s
+ 		nfs_fscache_get_super_cookie(s, NULL, data);
+ 	}
+ 
+-	mntroot = nfs4_get_root(s, &mntfh);
++	mntroot = nfs4_get_root(s, mntfh);
+ 	if (IS_ERR(mntroot)) {
+ 		error = PTR_ERR(mntroot);
+ 		goto error_splat_super;
+@@ -2947,12 +3034,15 @@ static int nfs4_remote_referral_get_sb(s
+ 
+ 	security_sb_clone_mnt_opts(data->sb, s);
+ 
++	nfs_free_fhandle(mntfh);
+ 	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+ 	return 0;
+ 
+ out_err_nosb:
+ 	nfs_free_server(server);
+ out_err_noserver:
++	nfs_free_fhandle(mntfh);
++out_err_nofh:
+ 	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+ 	return error;
+ 
+@@ -2961,6 +3051,7 @@ error_splat_super:
+ 		bdi_unregister(&server->backing_dev_info);
+ error_splat_bdi:
+ 	deactivate_locked_super(s);
++	nfs_free_fhandle(mntfh);
+ 	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+ 	return error;
+ }
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 11:01:00.375554592 -0400
+@@ -23,6 +23,7 @@ struct nfs_unlinkdata {
+ 	struct nfs_removeres res;
+ 	struct inode *dir;
+ 	struct rpc_cred	*cred;
++	struct nfs_fattr dir_attr;
+ };
+ 
+ /**
+@@ -169,7 +170,7 @@ static int nfs_do_call_unlink(struct den
+ 	}
+ 	nfs_sb_active(dir->i_sb);
+ 	data->args.fh = NFS_FH(dir);
+-	nfs_fattr_init(&data->res.dir_attr);
++	nfs_fattr_init(data->res.dir_attr);
+ 
+ 	NFS_PROTO(dir)->unlink_setup(&msg, dir);
+ 
+@@ -259,6 +260,7 @@ nfs_async_unlink(struct inode *dir, stru
+ 		goto out_free;
+ 	}
+ 	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	data->res.dir_attr = &data->dir_attr;
+ 
+ 	status = -EBUSY;
+ 	spin_lock(&dentry->d_lock);
+diff -up linux-2.6.34.noarch/include/linux/ktime.h.orig linux-2.6.34.noarch/include/linux/ktime.h
+--- linux-2.6.34.noarch/include/linux/ktime.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/ktime.h	2010-08-23 11:01:00.377554285 -0400
+@@ -130,7 +130,7 @@ static inline ktime_t timeval_to_ktime(s
+ /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
+ #define ktime_to_ns(kt)			((kt).tv64)
+ 
+-#else
++#else	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+ 
+ /*
+  * Helper macros/inlines to get the ktime_t math right in the timespec
+@@ -275,7 +275,7 @@ static inline s64 ktime_to_ns(const ktim
+ 	return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec;
+ }
+ 
+-#endif
++#endif	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
+ 
+ /**
+  * ktime_equal - Compares two ktime_t variables to see if they are equal
+@@ -295,6 +295,12 @@ static inline s64 ktime_to_us(const ktim
+ 	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
+ }
+ 
++static inline s64 ktime_to_ms(const ktime_t kt)
++{
++	struct timeval tv = ktime_to_timeval(kt);
++	return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
++}
++
+ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
+ {
+        return ktime_to_us(ktime_sub(later, earlier));
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 11:00:23.822502111 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 11:01:00.378563926 -0400
+@@ -356,6 +356,20 @@ extern struct nfs_open_context *nfs_find
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
++extern struct nfs_fattr *nfs_alloc_fattr(void);
++
++static inline void nfs_free_fattr(const struct nfs_fattr *fattr)
++{
++	kfree(fattr);
++}
++
++extern struct nfs_fh *nfs_alloc_fhandle(void);
++
++static inline void nfs_free_fhandle(const struct nfs_fh *fh)
++{
++	kfree(fh);
++}
++
+ /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
+ extern __be32 root_nfs_parse_addr(char *name); /*__init*/
+ extern unsigned long nfs_inc_attr_generation_counter(void);
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 11:01:00.380553887 -0400
+@@ -44,7 +44,6 @@ struct nfs_client {
+ 
+ #ifdef CONFIG_NFS_V4
+ 	u64			cl_clientid;	/* constant */
+-	nfs4_verifier		cl_confirm;
+ 	unsigned long		cl_state;
+ 
+ 	struct rb_root		cl_openowner_id;
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 11:01:00.381564072 -0400
+@@ -386,8 +386,8 @@ struct nfs_removeargs {
+ 
+ struct nfs_removeres {
+ 	const struct nfs_server *server;
++	struct nfs_fattr	*dir_attr;
+ 	struct nfs4_change_info	cinfo;
+-	struct nfs_fattr	dir_attr;
+ 	struct nfs4_sequence_res 	seq_res;
+ };
+ 
+@@ -824,6 +824,11 @@ struct nfs4_setclientid {
+ 	u32				sc_cb_ident;
+ };
+ 
++struct nfs4_setclientid_res {
++	u64				clientid;
++	nfs4_verifier			confirm;
++};
++
+ struct nfs4_statfs_arg {
+ 	const struct nfs_fh *		fh;
+ 	const u32 *			bitmask;
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h.orig linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/auth_gss.h	2010-08-23 11:01:00.382564026 -0400
+@@ -82,6 +82,7 @@ struct gss_cred {
+ 	enum rpc_gss_svc	gc_service;
+ 	struct gss_cl_ctx	*gc_ctx;
+ 	struct gss_upcall_msg	*gc_upcall;
++	unsigned long		gc_upcall_timestamp;
+ 	unsigned char		gc_machine_cred : 1;
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/auth.h.orig linux-2.6.34.noarch/include/linux/sunrpc/auth.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/auth.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/auth.h	2010-08-23 11:01:00.382564026 -0400
+@@ -54,6 +54,7 @@ struct rpc_cred {
+ #define RPCAUTH_CRED_NEW	0
+ #define RPCAUTH_CRED_UPTODATE	1
+ #define RPCAUTH_CRED_HASHED	2
++#define RPCAUTH_CRED_NEGATIVE	3
+ 
+ #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h.orig linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/gss_api.h	2010-08-23 11:01:00.383574314 -0400
+@@ -35,7 +35,8 @@ int gss_import_sec_context(
+ 		const void*		input_token,
+ 		size_t			bufsize,
+ 		struct gss_api_mech	*mech,
+-		struct gss_ctx		**ctx_id);
++		struct gss_ctx		**ctx_id,
++		gfp_t			gfp_mask);
+ u32 gss_get_mic(
+ 		struct gss_ctx		*ctx_id,
+ 		struct xdr_buf		*message,
+@@ -80,6 +81,8 @@ struct gss_api_mech {
+ 	/* pseudoflavors supported by this mechanism: */
+ 	int			gm_pf_num;
+ 	struct pf_desc *	gm_pfs;
++	/* Should the following be a callback operation instead? */
++	const char		*gm_upcall_enctypes;
+ };
+ 
+ /* and must provide the following operations: */
+@@ -87,7 +90,8 @@ struct gss_api_ops {
+ 	int (*gss_import_sec_context)(
+ 			const void		*input_token,
+ 			size_t			bufsize,
+-			struct gss_ctx		*ctx_id);
++			struct gss_ctx		*ctx_id,
++			gfp_t			gfp_mask);
+ 	u32 (*gss_get_mic)(
+ 			struct gss_ctx		*ctx_id,
+ 			struct xdr_buf		*message,
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h.orig linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/gss_krb5.h	2010-08-23 11:01:00.383574314 -0400
+@@ -4,7 +4,7 @@
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
+  *  lib/gssapi/krb5/gssapiP_krb5.h, and others
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -36,17 +36,86 @@
+  *
+  */
+ 
++#include <linux/crypto.h>
+ #include <linux/sunrpc/auth_gss.h>
+ #include <linux/sunrpc/gss_err.h>
+ #include <linux/sunrpc/gss_asn1.h>
+ 
++/* Length of constant used in key derivation */
++#define GSS_KRB5_K5CLENGTH (5)
++
++/* Maximum key length (in bytes) for the supported crypto algorithms*/
++#define GSS_KRB5_MAX_KEYLEN (32)
++
++/* Maximum checksum function output for the supported crypto algorithms */
++#define GSS_KRB5_MAX_CKSUM_LEN  (20)
++
++/* Maximum blocksize for the supported crypto algorithms */
++#define GSS_KRB5_MAX_BLOCKSIZE  (16)
++
++struct krb5_ctx;
++
++struct gss_krb5_enctype {
++	const u32		etype;		/* encryption (key) type */
++	const u32		ctype;		/* checksum type */
++	const char		*name;		/* "friendly" name */
++	const char		*encrypt_name;	/* crypto encrypt name */
++	const char		*cksum_name;	/* crypto checksum name */
++	const u16		signalg;	/* signing algorithm */
++	const u16		sealalg;	/* sealing algorithm */
++	const u32		blocksize;	/* encryption blocksize */
++	const u32		conflen;	/* confounder length
++						   (normally the same as
++						   the blocksize) */
++	const u32		cksumlength;	/* checksum length */
++	const u32		keyed_cksum;	/* is it a keyed cksum? */
++	const u32		keybytes;	/* raw key len, in bytes */
++	const u32		keylength;	/* final key len, in bytes */
++	u32 (*encrypt) (struct crypto_blkcipher *tfm,
++			void *iv, void *in, void *out,
++			int length);		/* encryption function */
++	u32 (*decrypt) (struct crypto_blkcipher *tfm,
++			void *iv, void *in, void *out,
++			int length);		/* decryption function */
++	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
++		       struct xdr_netobj *in,
++		       struct xdr_netobj *out);	/* complete key generation */
++	u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
++			   struct xdr_buf *buf, int ec,
++			   struct page **pages); /* v2 encryption function */
++	u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
++			   struct xdr_buf *buf, u32 *headskip,
++			   u32 *tailskip);	/* v2 decryption function */
++};
++
++/* krb5_ctx flags definitions */
++#define KRB5_CTX_FLAG_INITIATOR         0x00000001
++#define KRB5_CTX_FLAG_CFX               0x00000002
++#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY   0x00000004
++
+ struct krb5_ctx {
+ 	int			initiate; /* 1 = initiating, 0 = accepting */
++	u32			enctype;
++	u32			flags;
++	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
+ 	struct crypto_blkcipher	*enc;
+ 	struct crypto_blkcipher	*seq;
++	struct crypto_blkcipher *acceptor_enc;
++	struct crypto_blkcipher *initiator_enc;
++	struct crypto_blkcipher *acceptor_enc_aux;
++	struct crypto_blkcipher *initiator_enc_aux;
++	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
++	u8			cksum[GSS_KRB5_MAX_KEYLEN];
+ 	s32			endtime;
+ 	u32			seq_send;
++	u64			seq_send64;
+ 	struct xdr_netobj	mech_used;
++	u8			initiator_sign[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_sign[GSS_KRB5_MAX_KEYLEN];
++	u8			initiator_seal[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_seal[GSS_KRB5_MAX_KEYLEN];
++	u8			initiator_integ[GSS_KRB5_MAX_KEYLEN];
++	u8			acceptor_integ[GSS_KRB5_MAX_KEYLEN];
+ };
+ 
+ extern spinlock_t krb5_seq_lock;
+@@ -57,6 +126,18 @@ extern spinlock_t krb5_seq_lock;
+ #define KG_TOK_MIC_MSG    0x0101
+ #define KG_TOK_WRAP_MSG   0x0201
+ 
++#define KG2_TOK_INITIAL     0x0101
++#define KG2_TOK_RESPONSE    0x0202
++#define KG2_TOK_MIC         0x0404
++#define KG2_TOK_WRAP        0x0504
++
++#define KG2_TOKEN_FLAG_SENTBYACCEPTOR   0x01
++#define KG2_TOKEN_FLAG_SEALED           0x02
++#define KG2_TOKEN_FLAG_ACCEPTORSUBKEY   0x04
++
++#define KG2_RESP_FLAG_ERROR             0x0001
++#define KG2_RESP_FLAG_DELEG_OK          0x0002
++
+ enum sgn_alg {
+ 	SGN_ALG_DES_MAC_MD5 = 0x0000,
+ 	SGN_ALG_MD2_5 = 0x0001,
+@@ -81,6 +162,9 @@ enum seal_alg {
+ #define CKSUMTYPE_RSA_MD5_DES		0x0008
+ #define CKSUMTYPE_NIST_SHA		0x0009
+ #define CKSUMTYPE_HMAC_SHA1_DES3	0x000c
++#define CKSUMTYPE_HMAC_SHA1_96_AES128   0x000f
++#define CKSUMTYPE_HMAC_SHA1_96_AES256   0x0010
++#define CKSUMTYPE_HMAC_MD5_ARCFOUR      -138 /* Microsoft md5 hmac cksumtype */
+ 
+ /* from gssapi_err_krb5.h */
+ #define KG_CCACHE_NOMATCH                        (39756032L)
+@@ -111,11 +195,56 @@ enum seal_alg {
+ #define ENCTYPE_DES3_CBC_RAW    0x0006	/* DES-3 cbc mode raw */
+ #define ENCTYPE_DES_HMAC_SHA1   0x0008
+ #define ENCTYPE_DES3_CBC_SHA1   0x0010
++#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011
++#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012
++#define ENCTYPE_ARCFOUR_HMAC            0x0017
++#define ENCTYPE_ARCFOUR_HMAC_EXP        0x0018
+ #define ENCTYPE_UNKNOWN         0x01ff
+ 
+-s32
+-make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body,
+-		   int body_offset, struct xdr_netobj *cksum);
++/*
++ * Constants used for key derivation
++ */
++/* for 3DES */
++#define KG_USAGE_SEAL (22)
++#define KG_USAGE_SIGN (23)
++#define KG_USAGE_SEQ  (24)
++
++/* from rfc3961 */
++#define KEY_USAGE_SEED_CHECKSUM         (0x99)
++#define KEY_USAGE_SEED_ENCRYPTION       (0xAA)
++#define KEY_USAGE_SEED_INTEGRITY        (0x55)
++
++/* from rfc4121 */
++#define KG_USAGE_ACCEPTOR_SEAL  (22)
++#define KG_USAGE_ACCEPTOR_SIGN  (23)
++#define KG_USAGE_INITIATOR_SEAL (24)
++#define KG_USAGE_INITIATOR_SIGN (25)
++
++/*
++ * This compile-time check verifies that we will not exceed the
++ * slack space allotted by the client and server auth_gss code
++ * before they call gss_wrap().
++ */
++#define GSS_KRB5_MAX_SLACK_NEEDED \
++	(GSS_KRB5_TOK_HDR_LEN     /* gss token header */         \
++	+ GSS_KRB5_MAX_CKSUM_LEN  /* gss token checksum */       \
++	+ GSS_KRB5_MAX_BLOCKSIZE  /* confounder */               \
++	+ GSS_KRB5_MAX_BLOCKSIZE  /* possible padding */         \
++	+ GSS_KRB5_TOK_HDR_LEN    /* encrypted hdr in v2 token */\
++	+ GSS_KRB5_MAX_CKSUM_LEN  /* encryption hmac */          \
++	+ 4 + 4                   /* RPC verifier */             \
++	+ GSS_KRB5_TOK_HDR_LEN                                   \
++	+ GSS_KRB5_MAX_CKSUM_LEN)
++
++u32
++make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
++		struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		unsigned int usage, struct xdr_netobj *cksumout);
++
++u32
++make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen,
++		 struct xdr_buf *body, int body_offset, u8 *key,
++		 unsigned int usage, struct xdr_netobj *cksum);
+ 
+ u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
+ 		struct xdr_netobj *);
+@@ -149,11 +278,54 @@ gss_decrypt_xdr_buf(struct crypto_blkcip
+ 		    int offset);
+ 
+ s32
+-krb5_make_seq_num(struct crypto_blkcipher *key,
++krb5_make_seq_num(struct krb5_ctx *kctx,
++		struct crypto_blkcipher *key,
+ 		int direction,
+ 		u32 seqnum, unsigned char *cksum, unsigned char *buf);
+ 
+ s32
+-krb5_get_seq_num(struct crypto_blkcipher *key,
++krb5_get_seq_num(struct krb5_ctx *kctx,
+ 	       unsigned char *cksum,
+ 	       unsigned char *buf, int *direction, u32 *seqnum);
++
++int
++xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen);
++
++u32
++krb5_derive_key(const struct gss_krb5_enctype *gk5e,
++		const struct xdr_netobj *inkey,
++		struct xdr_netobj *outkey,
++		const struct xdr_netobj *in_constant,
++		gfp_t gfp_mask);
++
++u32
++gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
++		       struct xdr_netobj *randombits,
++		       struct xdr_netobj *key);
++
++u32
++gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
++		      struct xdr_netobj *randombits,
++		      struct xdr_netobj *key);
++
++u32
++gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, int ec,
++		     struct page **pages);
++
++u32
++gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, u32 *plainoffset,
++		     u32 *plainlen);
++
++int
++krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
++		       struct crypto_blkcipher *cipher,
++		       unsigned char *cksum);
++
++int
++krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
++		       struct crypto_blkcipher *cipher,
++		       s32 seqnum);
++void
++gss_krb5_make_confounder(char *p, u32 conflen);
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/metrics.h.orig linux-2.6.34.noarch/include/linux/sunrpc/metrics.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/metrics.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/metrics.h	2010-08-23 11:01:00.384611889 -0400
+@@ -26,6 +26,7 @@
+ #define _LINUX_SUNRPC_METRICS_H
+ 
+ #include <linux/seq_file.h>
++#include <linux/ktime.h>
+ 
+ #define RPC_IOSTATS_VERS	"1.0"
+ 
+@@ -58,9 +59,9 @@ struct rpc_iostats {
+ 	 * and the total time the request spent from init to release
+ 	 * are measured.
+ 	 */
+-	unsigned long long	om_queue,	/* jiffies queued for xmit */
+-				om_rtt,		/* jiffies for RPC RTT */
+-				om_execute;	/* jiffies for RPC execution */
++	ktime_t			om_queue,	/* queued for xmit */
++				om_rtt,		/* RPC RTT */
++				om_execute;	/* RPC execution */
+ } ____cacheline_aligned;
+ 
+ struct rpc_task;
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/sched.h.orig linux-2.6.34.noarch/include/linux/sunrpc/sched.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/sched.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/sched.h	2010-08-23 11:01:00.385361873 -0400
+@@ -10,6 +10,7 @@
+ #define _LINUX_SUNRPC_SCHED_H_
+ 
+ #include <linux/timer.h>
++#include <linux/ktime.h>
+ #include <linux/sunrpc/types.h>
+ #include <linux/spinlock.h>
+ #include <linux/wait.h>
+@@ -40,21 +41,15 @@ struct rpc_wait {
+  * This is the RPC task struct
+  */
+ struct rpc_task {
+-#ifdef RPC_DEBUG
+-	unsigned long		tk_magic;	/* 0xf00baa */
+-#endif
+ 	atomic_t		tk_count;	/* Reference count */
+ 	struct list_head	tk_task;	/* global list of tasks */
+ 	struct rpc_clnt *	tk_client;	/* RPC client */
+ 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
+-	int			tk_status;	/* result of last operation */
+ 
+ 	/*
+ 	 * RPC call state
+ 	 */
+ 	struct rpc_message	tk_msg;		/* RPC call info */
+-	__u8			tk_garb_retry;
+-	__u8			tk_cred_retry;
+ 
+ 	/*
+ 	 * callback	to be executed after waking up
+@@ -67,7 +62,6 @@ struct rpc_task {
+ 	void *			tk_calldata;
+ 
+ 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
+-	unsigned short		tk_flags;	/* misc flags */
+ 	unsigned long		tk_runstate;	/* Task run status */
+ 	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
+ 						 * be any workqueue
+@@ -78,17 +72,19 @@ struct rpc_task {
+ 		struct rpc_wait		tk_wait;	/* RPC wait */
+ 	} u;
+ 
+-	unsigned short		tk_timeouts;	/* maj timeouts */
+-	size_t			tk_bytes_sent;	/* total bytes sent */
+-	unsigned long		tk_start;	/* RPC task init timestamp */
+-	long			tk_rtt;		/* round-trip time (jiffies) */
++	ktime_t			tk_start;	/* RPC task init timestamp */
+ 
+ 	pid_t			tk_owner;	/* Process id for batching tasks */
+-	unsigned char		tk_priority : 2;/* Task priority */
++	int			tk_status;	/* result of last operation */
++	unsigned short		tk_flags;	/* misc flags */
++	unsigned short		tk_timeouts;	/* maj timeouts */
+ 
+ #ifdef RPC_DEBUG
+ 	unsigned short		tk_pid;		/* debugging aid */
+ #endif
++	unsigned char		tk_priority : 2,/* Task priority */
++				tk_garb_retry : 2,
++				tk_cred_retry : 2;
+ };
+ #define tk_xprt			tk_client->cl_xprt
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 11:01:00.385361873 -0400
+@@ -1,7 +1,10 @@
+ /*
+- * include/linux/sunrpc/xdr.h
++ * XDR standard data types and function declarations
+  *
+  * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
++ *
++ * Based on:
++ *   RFC 4506 "XDR: External Data Representation Standard", May 2006
+  */
+ 
+ #ifndef _SUNRPC_XDR_H_
+@@ -62,7 +65,6 @@ struct xdr_buf {
+ 
+ 	unsigned int	buflen,		/* Total length of storage buffer */
+ 			len;		/* Length of XDR encoded message */
+-
+ };
+ 
+ /*
+@@ -178,7 +180,7 @@ struct xdr_array2_desc {
+ };
+ 
+ extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+-                             struct xdr_array2_desc *desc);
++			     struct xdr_array2_desc *desc);
+ extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+ 			     struct xdr_array2_desc *desc);
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xprt.h	2010-08-23 11:01:00.386574704 -0400
+@@ -13,6 +13,7 @@
+ #include <linux/socket.h>
+ #include <linux/in.h>
+ #include <linux/kref.h>
++#include <linux/ktime.h>
+ #include <linux/sunrpc/sched.h>
+ #include <linux/sunrpc/xdr.h>
+ #include <linux/sunrpc/msg_prot.h>
+@@ -65,8 +66,6 @@ struct rpc_rqst {
+ 	struct rpc_task *	rq_task;	/* RPC task data */
+ 	__be32			rq_xid;		/* request XID */
+ 	int			rq_cong;	/* has incremented xprt->cong */
+-	int			rq_reply_bytes_recvd;	/* number of reply */
+-							/* bytes received */
+ 	u32			rq_seqno;	/* gss seq no. used on req. */
+ 	int			rq_enc_pages_num;
+ 	struct page		**rq_enc_pages;	/* scratch pages for use by
+@@ -77,12 +76,16 @@ struct rpc_rqst {
+ 	__u32 *			rq_buffer;	/* XDR encode buffer */
+ 	size_t			rq_callsize,
+ 				rq_rcvsize;
++	size_t			rq_xmit_bytes_sent;	/* total bytes sent */
++	size_t			rq_reply_bytes_recvd;	/* total reply bytes */
++							/* received */
+ 
+ 	struct xdr_buf		rq_private_buf;		/* The receive buffer
+ 							 * used in the softirq.
+ 							 */
+ 	unsigned long		rq_majortimeo;	/* major timeout alarm */
+ 	unsigned long		rq_timeout;	/* Current timeout value */
++	ktime_t			rq_rtt;		/* round-trip time */
+ 	unsigned int		rq_retries;	/* # of retries */
+ 	unsigned int		rq_connect_cookie;
+ 						/* A cookie used to track the
+@@ -94,7 +97,7 @@ struct rpc_rqst {
+ 	 */
+ 	u32			rq_bytes_sent;	/* Bytes we have sent */
+ 
+-	unsigned long		rq_xtime;	/* when transmitted */
++	ktime_t			rq_xtime;	/* transmit time stamp */
+ 	int			rq_ntrans;
+ 
+ #if defined(CONFIG_NFS_V4_1)
+@@ -174,8 +177,7 @@ struct rpc_xprt {
+ 	/*
+ 	 * Connection of transports
+ 	 */
+-	unsigned long		connect_timeout,
+-				bind_timeout,
++	unsigned long		bind_timeout,
+ 				reestablish_timeout;
+ 	unsigned int		connect_cookie;	/* A cookie that gets bumped
+ 						   every time the transport
+@@ -294,7 +296,6 @@ void			xprt_set_retrans_timeout_rtt(stru
+ void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
+ void			xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
+ void			xprt_write_space(struct rpc_xprt *xprt);
+-void			xprt_update_rtt(struct rpc_task *task);
+ void			xprt_adjust_cwnd(struct rpc_task *task, int result);
+ struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
+ void			xprt_complete_rqst(struct rpc_task *task, int copied);
+diff -up linux-2.6.34.noarch/net/sunrpc/auth.c.orig linux-2.6.34.noarch/net/sunrpc/auth.c
+--- linux-2.6.34.noarch/net/sunrpc/auth.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth.c	2010-08-23 11:01:00.387574079 -0400
+@@ -236,10 +236,15 @@ rpcauth_prune_expired(struct list_head *
+ 
+ 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
+ 
+-		/* Enforce a 60 second garbage collection moratorium */
++		if (nr_to_scan-- == 0)
++			break;
++		/*
++		 * Enforce a 60 second garbage collection moratorium
++		 * Note that the cred_unused list must be time-ordered.
++		 */
+ 		if (time_in_range(cred->cr_expire, expired, jiffies) &&
+ 		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+-			continue;
++			return 0;
+ 
+ 		list_del_init(&cred->cr_lru);
+ 		number_cred_unused--;
+@@ -252,13 +257,10 @@ rpcauth_prune_expired(struct list_head *
+ 			get_rpccred(cred);
+ 			list_add_tail(&cred->cr_lru, free);
+ 			rpcauth_unhash_cred_locked(cred);
+-			nr_to_scan--;
+ 		}
+ 		spin_unlock(cache_lock);
+-		if (nr_to_scan == 0)
+-			break;
+ 	}
+-	return nr_to_scan;
++	return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ }
+ 
+ /*
+@@ -270,11 +272,12 @@ rpcauth_cache_shrinker(int nr_to_scan, g
+ 	LIST_HEAD(free);
+ 	int res;
+ 
++	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
++		return (nr_to_scan == 0) ? 0 : -1;
+ 	if (list_empty(&cred_unused))
+ 		return 0;
+ 	spin_lock(&rpc_credcache_lock);
+-	nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
+-	res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
++	res = rpcauth_prune_expired(&free, nr_to_scan);
+ 	spin_unlock(&rpc_credcache_lock);
+ 	rpcauth_destroy_credlist(&free);
+ 	return res;
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/auth_gss.c	2010-08-23 11:01:00.388574680 -0400
+@@ -57,11 +57,14 @@ static const struct rpc_authops authgss_
+ static const struct rpc_credops gss_credops;
+ static const struct rpc_credops gss_nullops;
+ 
++#define GSS_RETRY_EXPIRED 5
++static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
++
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+ #endif
+ 
+-#define GSS_CRED_SLACK		1024
++#define GSS_CRED_SLACK		(RPC_MAX_AUTH_SIZE * 2)
+ /* length of a krb5 verifier (48), plus data added before arguments when
+  * using integrity (two 4-byte integers): */
+ #define GSS_VERF_SLACK		100
+@@ -229,7 +232,7 @@ gss_fill_context(const void *p, const vo
+ 		p = ERR_PTR(-EFAULT);
+ 		goto err;
+ 	}
+-	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
++	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
+ 	if (ret < 0) {
+ 		p = ERR_PTR(ret);
+ 		goto err;
+@@ -350,6 +353,24 @@ gss_unhash_msg(struct gss_upcall_msg *gs
+ }
+ 
+ static void
++gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
++{
++	switch (gss_msg->msg.errno) {
++	case 0:
++		if (gss_msg->ctx == NULL)
++			break;
++		clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
++		gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
++		break;
++	case -EKEYEXPIRED:
++		set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
++	}
++	gss_cred->gc_upcall_timestamp = jiffies;
++	gss_cred->gc_upcall = NULL;
++	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
++}
++
++static void
+ gss_upcall_callback(struct rpc_task *task)
+ {
+ 	struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+@@ -358,13 +379,9 @@ gss_upcall_callback(struct rpc_task *tas
+ 	struct inode *inode = &gss_msg->inode->vfs_inode;
+ 
+ 	spin_lock(&inode->i_lock);
+-	if (gss_msg->ctx)
+-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
+-	else
+-		task->tk_status = gss_msg->msg.errno;
+-	gss_cred->gc_upcall = NULL;
+-	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
++	gss_handle_downcall_result(gss_cred, gss_msg);
+ 	spin_unlock(&inode->i_lock);
++	task->tk_status = gss_msg->msg.errno;
+ 	gss_release_msg(gss_msg);
+ }
+ 
+@@ -377,11 +394,12 @@ static void gss_encode_v0_msg(struct gss
+ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
+ 				struct rpc_clnt *clnt, int machine_cred)
+ {
++	struct gss_api_mech *mech = gss_msg->auth->mech;
+ 	char *p = gss_msg->databuf;
+ 	int len = 0;
+ 
+ 	gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
+-				   gss_msg->auth->mech->gm_name,
++				   mech->gm_name,
+ 				   gss_msg->uid);
+ 	p += gss_msg->msg.len;
+ 	if (clnt->cl_principal) {
+@@ -398,6 +416,11 @@ static void gss_encode_v1_msg(struct gss
+ 		p += len;
+ 		gss_msg->msg.len += len;
+ 	}
++	if (mech->gm_upcall_enctypes) {
++		len = sprintf(p, mech->gm_upcall_enctypes);
++		p += len;
++		gss_msg->msg.len += len;
++	}
+ 	len = sprintf(p, "\n");
+ 	gss_msg->msg.len += len;
+ 
+@@ -507,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task
+ 	spin_lock(&inode->i_lock);
+ 	if (gss_cred->gc_upcall != NULL)
+ 		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
+-	else if (gss_msg->ctx != NULL) {
+-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
+-		gss_cred->gc_upcall = NULL;
+-		rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+-	} else if (gss_msg->msg.errno >= 0) {
++	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+ 		task->tk_timeout = 0;
+ 		gss_cred->gc_upcall = gss_msg;
+ 		/* gss_upcall_callback will release the reference to gss_upcall_msg */
+ 		atomic_inc(&gss_msg->count);
+ 		rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
+-	} else
++	} else {
++		gss_handle_downcall_result(gss_cred, gss_msg);
+ 		err = gss_msg->msg.errno;
++	}
+ 	spin_unlock(&inode->i_lock);
+ 	gss_release_msg(gss_msg);
+ out:
+@@ -1117,6 +1138,23 @@ static int gss_renew_cred(struct rpc_tas
+ 	return 0;
+ }
+ 
++static int gss_cred_is_negative_entry(struct rpc_cred *cred)
++{
++	if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
++		unsigned long now = jiffies;
++		unsigned long begin, expire;
++		struct gss_cred *gss_cred; 
++
++		gss_cred = container_of(cred, struct gss_cred, gc_base);
++		begin = gss_cred->gc_upcall_timestamp;
++		expire = begin + gss_expired_cred_retry_delay * HZ;
++
++		if (time_in_range_open(now, begin, expire))
++			return 1;
++	}
++	return 0;
++}
++
+ /*
+ * Refresh credentials. XXX - finish
+ */
+@@ -1126,6 +1164,9 @@ gss_refresh(struct rpc_task *task)
+ 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ 	int ret = 0;
+ 
++	if (gss_cred_is_negative_entry(cred))
++		return -EKEYEXPIRED;
++
+ 	if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
+ 			!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ 		ret = gss_renew_cred(task);
+@@ -1316,15 +1357,21 @@ gss_wrap_req_priv(struct rpc_cred *cred,
+ 	inpages = snd_buf->pages + first;
+ 	snd_buf->pages = rqstp->rq_enc_pages;
+ 	snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
+-	/* Give the tail its own page, in case we need extra space in the
+-	 * head when wrapping: */
++	/*
++	 * Give the tail its own page, in case we need extra space in the
++	 * head when wrapping:
++	 *
++	 * call_allocate() allocates twice the slack space required
++	 * by the authentication flavor to rq_callsize.
++	 * For GSS, slack is GSS_CRED_SLACK.
++	 */
+ 	if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+ 		tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
+ 		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
+ 		snd_buf->tail[0].iov_base = tmp;
+ 	}
+ 	maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
+-	/* RPC_SLACK_SPACE should prevent this ever happening: */
++	/* slack space should prevent this ever happening: */
+ 	BUG_ON(snd_buf->len > snd_buf->buflen);
+ 	status = -EIO;
+ 	/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
+@@ -1573,5 +1620,11 @@ static void __exit exit_rpcsec_gss(void)
+ }
+ 
+ MODULE_LICENSE("GPL");
++module_param_named(expired_cred_retry_delay,
++		   gss_expired_cred_retry_delay,
++		   uint, 0644);
++MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
++		"the RPC engine retries an expired credential");
++
+ module_init(init_rpcsec_gss)
+ module_exit(exit_rpcsec_gss)
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_crypto.c	2010-08-23 11:01:00.390553891 -0400
+@@ -1,7 +1,7 @@
+ /*
+  *  linux/net/sunrpc/gss_krb5_crypto.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -41,6 +41,7 @@
+ #include <linux/crypto.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
++#include <linux/random.h>
+ #include <linux/sunrpc/gss_krb5.h>
+ #include <linux/sunrpc/xdr.h>
+ 
+@@ -58,13 +59,13 @@ krb5_encrypt(
+ {
+ 	u32 ret = -EINVAL;
+ 	struct scatterlist sg[1];
+-	u8 local_iv[16] = {0};
++	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+ 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ 
+ 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ 		goto out;
+ 
+-	if (crypto_blkcipher_ivsize(tfm) > 16) {
++	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
+ 			crypto_blkcipher_ivsize(tfm));
+ 		goto out;
+@@ -92,13 +93,13 @@ krb5_decrypt(
+ {
+ 	u32 ret = -EINVAL;
+ 	struct scatterlist sg[1];
+-	u8 local_iv[16] = {0};
++	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
+ 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
+ 
+ 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
+ 		goto out;
+ 
+-	if (crypto_blkcipher_ivsize(tfm) > 16) {
++	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
+ 			crypto_blkcipher_ivsize(tfm));
+ 		goto out;
+@@ -123,21 +124,155 @@ checksummer(struct scatterlist *sg, void
+ 	return crypto_hash_update(desc, sg, sg->length);
+ }
+ 
+-/* checksum the plaintext data and hdrlen bytes of the token header */
+-s32
+-make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
+-		   int body_offset, struct xdr_netobj *cksum)
++static int
++arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
++{
++	unsigned int ms_usage;
++
++	switch (usage) {
++	case KG_USAGE_SIGN:
++		ms_usage = 15;
++		break;
++	case KG_USAGE_SEAL:
++		ms_usage = 13;
++		break;
++	default:
++		return EINVAL;;
++	}
++	salt[0] = (ms_usage >> 0) & 0xff;
++	salt[1] = (ms_usage >> 8) & 0xff;
++	salt[2] = (ms_usage >> 16) & 0xff;
++	salt[3] = (ms_usage >> 24) & 0xff;
++
++	return 0;
++}
++
++static u32
++make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
++		       struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		       unsigned int usage, struct xdr_netobj *cksumout)
+ {
+-	struct hash_desc                desc; /* XXX add to ctx? */
++	struct hash_desc                desc;
+ 	struct scatterlist              sg[1];
+ 	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	u8 rc4salt[4];
++	struct crypto_hash *md5;
++	struct crypto_hash *hmac_md5;
++
++	if (cksumkey == NULL)
++		return GSS_S_FAILURE;
++
++	if (cksumout->len < kctx->gk5e->cksumlength) {
++		dprintk("%s: checksum buffer length, %u, too small for %s\n",
++			__func__, cksumout->len, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++
++	if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
++		dprintk("%s: invalid usage value %u\n", __func__, usage);
++		return GSS_S_FAILURE;
++	}
++
++	md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(md5))
++		return GSS_S_FAILURE;
++
++	hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
++				     CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac_md5)) {
++		crypto_free_hash(md5);
++		return GSS_S_FAILURE;
++	}
++
++	desc.tfm = md5;
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	sg_init_one(sg, rc4salt, 4);
++	err = crypto_hash_update(&desc, sg, 4);
++	if (err)
++		goto out;
++
++	sg_init_one(sg, header, hdrlen);
++	err = crypto_hash_update(&desc, sg, hdrlen);
++	if (err)
++		goto out;
++	err = xdr_process_buf(body, body_offset, body->len - body_offset,
++			      checksummer, &desc);
++	if (err)
++		goto out;
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
++
++	desc.tfm = hmac_md5;
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
++	if (err)
++		goto out;
++
++	sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
++	err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
++				 checksumdata);
++	if (err)
++		goto out;
++
++	memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++	cksumout->len = kctx->gk5e->cksumlength;
++out:
++	crypto_free_hash(md5);
++	crypto_free_hash(hmac_md5);
++	return err ? GSS_S_FAILURE : 0;
++}
++
++/*
++ * checksum the plaintext data and hdrlen bytes of the token header
++ * The checksum is performed over the first 8 bytes of the
++ * gss token header and then over the data body
++ */
++u32
++make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
++	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
++	      unsigned int usage, struct xdr_netobj *cksumout)
++{
++	struct hash_desc                desc;
++	struct scatterlist              sg[1];
++	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	unsigned int checksumlen;
++
++	if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
++		return make_checksum_hmac_md5(kctx, header, hdrlen,
++					      body, body_offset,
++					      cksumkey, usage, cksumout);
++
++	if (cksumout->len < kctx->gk5e->cksumlength) {
++		dprintk("%s: checksum buffer length, %u, too small for %s\n",
++			__func__, cksumout->len, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
+ 
+-	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
++	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ 	if (IS_ERR(desc.tfm))
+ 		return GSS_S_FAILURE;
+-	cksum->len = crypto_hash_digestsize(desc.tfm);
+ 	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ 
++	checksumlen = crypto_hash_digestsize(desc.tfm);
++
++	if (cksumkey != NULL) {
++		err = crypto_hash_setkey(desc.tfm, cksumkey,
++					 kctx->gk5e->keylength);
++		if (err)
++			goto out;
++	}
++
+ 	err = crypto_hash_init(&desc);
+ 	if (err)
+ 		goto out;
+@@ -149,15 +284,109 @@ make_checksum(char *cksumname, char *hea
+ 			      checksummer, &desc);
+ 	if (err)
+ 		goto out;
+-	err = crypto_hash_final(&desc, cksum->data);
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
+ 
++	switch (kctx->gk5e->ctype) {
++	case CKSUMTYPE_RSA_MD5:
++		err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
++					  checksumdata, checksumlen);
++		if (err)
++			goto out;
++		memcpy(cksumout->data,
++		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
++		       kctx->gk5e->cksumlength);
++		break;
++	case CKSUMTYPE_HMAC_SHA1_DES3:
++		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++		break;
++	default:
++		BUG();
++		break;
++	}
++	cksumout->len = kctx->gk5e->cksumlength;
++out:
++	crypto_free_hash(desc.tfm);
++	return err ? GSS_S_FAILURE : 0;
++}
++
++/*
++ * checksum the plaintext data and hdrlen bytes of the token header
++ * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
++ * body then over the first 16 octets of the MIC token
++ * Inclusion of the header data in the calculation of the
++ * checksum is optional.
++ */
++u32
++make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
++		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
++		 unsigned int usage, struct xdr_netobj *cksumout)
++{
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	int err;
++	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	unsigned int checksumlen;
++
++	if (kctx->gk5e->keyed_cksum == 0) {
++		dprintk("%s: expected keyed hash for %s\n",
++			__func__, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++	if (cksumkey == NULL) {
++		dprintk("%s: no key supplied for %s\n",
++			__func__, kctx->gk5e->name);
++		return GSS_S_FAILURE;
++	}
++
++	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
++							CRYPTO_ALG_ASYNC);
++	if (IS_ERR(desc.tfm))
++		return GSS_S_FAILURE;
++	checksumlen = crypto_hash_digestsize(desc.tfm);
++	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
++
++	err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
++	if (err)
++		goto out;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out;
++	err = xdr_process_buf(body, body_offset, body->len - body_offset,
++			      checksummer, &desc);
++	if (err)
++		goto out;
++	if (header != NULL) {
++		sg_init_one(sg, header, hdrlen);
++		err = crypto_hash_update(&desc, sg, hdrlen);
++		if (err)
++			goto out;
++	}
++	err = crypto_hash_final(&desc, checksumdata);
++	if (err)
++		goto out;
++
++	cksumout->len = kctx->gk5e->cksumlength;
++
++	switch (kctx->gk5e->ctype) {
++	case CKSUMTYPE_HMAC_SHA1_96_AES128:
++	case CKSUMTYPE_HMAC_SHA1_96_AES256:
++		/* note that this truncates the hash */
++		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
++		break;
++	default:
++		BUG();
++		break;
++	}
+ out:
+ 	crypto_free_hash(desc.tfm);
+ 	return err ? GSS_S_FAILURE : 0;
+ }
+ 
+ struct encryptor_desc {
+-	u8 iv[8]; /* XXX hard-coded blocksize */
++	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+ 	struct blkcipher_desc desc;
+ 	int pos;
+ 	struct xdr_buf *outbuf;
+@@ -198,7 +427,7 @@ encryptor(struct scatterlist *sg, void *
+ 	desc->fraglen += sg->length;
+ 	desc->pos += sg->length;
+ 
+-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
++	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ 	thislen -= fraglen;
+ 
+ 	if (thislen == 0)
+@@ -256,7 +485,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcip
+ }
+ 
+ struct decryptor_desc {
+-	u8 iv[8]; /* XXX hard-coded blocksize */
++	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
+ 	struct blkcipher_desc desc;
+ 	struct scatterlist frags[4];
+ 	int fragno;
+@@ -278,7 +507,7 @@ decryptor(struct scatterlist *sg, void *
+ 	desc->fragno++;
+ 	desc->fraglen += sg->length;
+ 
+-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
++	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
+ 	thislen -= fraglen;
+ 
+ 	if (thislen == 0)
+@@ -325,3 +554,437 @@ gss_decrypt_xdr_buf(struct crypto_blkcip
+ 
+ 	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
+ }
++
++/*
++ * This function makes the assumption that it was ultimately called
++ * from gss_wrap().
++ *
++ * The client auth_gss code moves any existing tail data into a
++ * separate page before calling gss_wrap.
++ * The server svcauth_gss code ensures that both the head and the
++ * tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
++ *
++ * Even with that guarantee, this function may be called more than
++ * once in the processing of gss_wrap().  The best we can do is
++ * verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
++ * largest expected shift will fit within RPC_MAX_AUTH_SIZE.
++ * At run-time we can verify that a single invocation of this
++ * function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
++ */
++
++int
++xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
++{
++	u8 *p;
++
++	if (shiftlen == 0)
++		return 0;
++
++	BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
++	BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
++
++	p = buf->head[0].iov_base + base;
++
++	memmove(p + shiftlen, p, buf->head[0].iov_len - base);
++
++	buf->head[0].iov_len += shiftlen;
++	buf->len += shiftlen;
++
++	return 0;
++}
++
++static u32
++gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
++		   u32 offset, u8 *iv, struct page **pages, int encrypt)
++{
++	u32 ret;
++	struct scatterlist sg[1];
++	struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
++	u8 data[crypto_blkcipher_blocksize(cipher) * 2];
++	struct page **save_pages;
++	u32 len = buf->len - offset;
++
++	BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2);
++
++	/*
++	 * For encryption, we want to read from the cleartext
++	 * page cache pages, and write the encrypted data to
++	 * the supplied xdr_buf pages.
++	 */
++	save_pages = buf->pages;
++	if (encrypt)
++		buf->pages = pages;
++
++	ret = read_bytes_from_xdr_buf(buf, offset, data, len);
++	buf->pages = save_pages;
++	if (ret)
++		goto out;
++
++	sg_init_one(sg, data, len);
++
++	if (encrypt)
++		ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
++	else
++		ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
++
++	if (ret)
++		goto out;
++
++	ret = write_bytes_to_xdr_buf(buf, offset, data, len);
++
++out:
++	return ret;
++}
++
++u32
++gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, int ec, struct page **pages)
++{
++	u32 err;
++	struct xdr_netobj hmac;
++	u8 *cksumkey;
++	u8 *ecptr;
++	struct crypto_blkcipher *cipher, *aux_cipher;
++	int blocksize;
++	struct page **save_pages;
++	int nblocks, nbytes;
++	struct encryptor_desc desc;
++	u32 cbcbytes;
++	unsigned int usage;
++
++	if (kctx->initiate) {
++		cipher = kctx->initiator_enc;
++		aux_cipher = kctx->initiator_enc_aux;
++		cksumkey = kctx->initiator_integ;
++		usage = KG_USAGE_INITIATOR_SEAL;
++	} else {
++		cipher = kctx->acceptor_enc;
++		aux_cipher = kctx->acceptor_enc_aux;
++		cksumkey = kctx->acceptor_integ;
++		usage = KG_USAGE_ACCEPTOR_SEAL;
++	}
++	blocksize = crypto_blkcipher_blocksize(cipher);
++
++	/* hide the gss token header and insert the confounder */
++	offset += GSS_KRB5_TOK_HDR_LEN;
++	if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
++		return GSS_S_FAILURE;
++	gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
++	offset -= GSS_KRB5_TOK_HDR_LEN;
++
++	if (buf->tail[0].iov_base != NULL) {
++		ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
++	} else {
++		buf->tail[0].iov_base = buf->head[0].iov_base
++							+ buf->head[0].iov_len;
++		buf->tail[0].iov_len = 0;
++		ecptr = buf->tail[0].iov_base;
++	}
++
++	memset(ecptr, 'X', ec);
++	buf->tail[0].iov_len += ec;
++	buf->len += ec;
++
++	/* copy plaintext gss token header after filler (if any) */
++	memcpy(ecptr + ec, buf->head[0].iov_base + offset,
++						GSS_KRB5_TOK_HDR_LEN);
++	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
++	buf->len += GSS_KRB5_TOK_HDR_LEN;
++
++	/* Do the HMAC */
++	hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
++	hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
++
++	/*
++	 * When we are called, pages points to the real page cache
++	 * data -- which we can't go and encrypt!  buf->pages points
++	 * to scratch pages which we are going to send off to the
++	 * client/server.  Swap in the plaintext pages to calculate
++	 * the hmac.
++	 */
++	save_pages = buf->pages;
++	buf->pages = pages;
++
++	err = make_checksum_v2(kctx, NULL, 0, buf,
++			       offset + GSS_KRB5_TOK_HDR_LEN,
++			       cksumkey, usage, &hmac);
++	buf->pages = save_pages;
++	if (err)
++		return GSS_S_FAILURE;
++
++	nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
++	nblocks = (nbytes + blocksize - 1) / blocksize;
++	cbcbytes = 0;
++	if (nblocks > 2)
++		cbcbytes = (nblocks - 2) * blocksize;
++
++	memset(desc.iv, 0, sizeof(desc.iv));
++
++	if (cbcbytes) {
++		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
++		desc.fragno = 0;
++		desc.fraglen = 0;
++		desc.pages = pages;
++		desc.outbuf = buf;
++		desc.desc.info = desc.iv;
++		desc.desc.flags = 0;
++		desc.desc.tfm = aux_cipher;
++
++		sg_init_table(desc.infrags, 4);
++		sg_init_table(desc.outfrags, 4);
++
++		err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
++				      cbcbytes, encryptor, &desc);
++		if (err)
++			goto out_err;
++	}
++
++	/* Make sure IV carries forward from any CBC results. */
++	err = gss_krb5_cts_crypt(cipher, buf,
++				 offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
++				 desc.iv, pages, 1);
++	if (err) {
++		err = GSS_S_FAILURE;
++		goto out_err;
++	}
++
++	/* Now update buf to account for HMAC */
++	buf->tail[0].iov_len += kctx->gk5e->cksumlength;
++	buf->len += kctx->gk5e->cksumlength;
++
++out_err:
++	if (err)
++		err = GSS_S_FAILURE;
++	return err;
++}
++
++u32
++gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
++		     u32 *headskip, u32 *tailskip)
++{
++	struct xdr_buf subbuf;
++	u32 ret = 0;
++	u8 *cksum_key;
++	struct crypto_blkcipher *cipher, *aux_cipher;
++	struct xdr_netobj our_hmac_obj;
++	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
++	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
++	int nblocks, blocksize, cbcbytes;
++	struct decryptor_desc desc;
++	unsigned int usage;
++
++	if (kctx->initiate) {
++		cipher = kctx->acceptor_enc;
++		aux_cipher = kctx->acceptor_enc_aux;
++		cksum_key = kctx->acceptor_integ;
++		usage = KG_USAGE_ACCEPTOR_SEAL;
++	} else {
++		cipher = kctx->initiator_enc;
++		aux_cipher = kctx->initiator_enc_aux;
++		cksum_key = kctx->initiator_integ;
++		usage = KG_USAGE_INITIATOR_SEAL;
++	}
++	blocksize = crypto_blkcipher_blocksize(cipher);
++
++
++	/* create a segment skipping the header and leaving out the checksum */
++	xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
++				    (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
++				     kctx->gk5e->cksumlength));
++
++	nblocks = (subbuf.len + blocksize - 1) / blocksize;
++
++	cbcbytes = 0;
++	if (nblocks > 2)
++		cbcbytes = (nblocks - 2) * blocksize;
++
++	memset(desc.iv, 0, sizeof(desc.iv));
++
++	if (cbcbytes) {
++		desc.fragno = 0;
++		desc.fraglen = 0;
++		desc.desc.info = desc.iv;
++		desc.desc.flags = 0;
++		desc.desc.tfm = aux_cipher;
++
++		sg_init_table(desc.frags, 4);
++
++		ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
++		if (ret)
++			goto out_err;
++	}
++
++	/* Make sure IV carries forward from any CBC results. */
++	ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
++	if (ret)
++		goto out_err;
++
++
++	/* Calculate our hmac over the plaintext data */
++	our_hmac_obj.len = sizeof(our_hmac);
++	our_hmac_obj.data = our_hmac;
++
++	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
++			       cksum_key, usage, &our_hmac_obj);
++	if (ret)
++		goto out_err;
++
++	/* Get the packet's hmac value */
++	ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
++				      pkt_hmac, kctx->gk5e->cksumlength);
++	if (ret)
++		goto out_err;
++
++	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
++		ret = GSS_S_BAD_SIG;
++		goto out_err;
++	}
++	*headskip = kctx->gk5e->conflen;
++	*tailskip = kctx->gk5e->cksumlength;
++out_err:
++	if (ret && ret != GSS_S_BAD_SIG)
++		ret = GSS_S_FAILURE;
++	return ret;
++}
++
++/*
++ * Compute Kseq given the initial session key and the checksum.
++ * Set the key of the given cipher.
++ */
++int
++krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
++		       unsigned char *cksum)
++{
++	struct crypto_hash *hmac;
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	u8 Kseq[GSS_KRB5_MAX_KEYLEN];
++	u32 zeroconstant = 0;
++	int err;
++
++	dprintk("%s: entered\n", __func__);
++
++	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld, allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
++		return PTR_ERR(hmac);
++	}
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err;
++
++	/* Compute intermediate Kseq from session key */
++	err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, &zeroconstant, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kseq);
++	if (err)
++		goto out_err;
++
++	/* Compute final Kseq from the checksum and intermediate Kseq */
++	err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_set_buf(sg, cksum, 8);
++
++	err = crypto_hash_digest(&desc, sg, 8, Kseq);
++	if (err)
++		goto out_err;
++
++	err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	err = 0;
++
++out_err:
++	crypto_free_hash(hmac);
++	dprintk("%s: returning %d\n", __func__, err);
++	return err;
++}
++
++/*
++ * Compute Kcrypt given the initial session key and the plaintext seqnum.
++ * Set the key of cipher kctx->enc.
++ */
++int
++krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
++		       s32 seqnum)
++{
++	struct crypto_hash *hmac;
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
++	u8 zeroconstant[4] = {0};
++	u8 seqnumarray[4];
++	int err, i;
++
++	dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
++
++	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld, allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
++		return PTR_ERR(hmac);
++	}
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err;
++
++	/* Compute intermediate Kcrypt from session key */
++	for (i = 0; i < kctx->gk5e->keylength; i++)
++		Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
++
++	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, zeroconstant, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
++	if (err)
++		goto out_err;
++
++	/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
++	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
++	seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
++	seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
++	seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
++
++	sg_set_buf(sg, seqnumarray, 4);
++
++	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
++	if (err)
++		goto out_err;
++
++	err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
++	if (err)
++		goto out_err;
++
++	err = 0;
++
++out_err:
++	crypto_free_hash(hmac);
++	dprintk("%s: returning %d\n", __func__, err);
++	return err;
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c.orig	2010-08-23 11:01:00.390553891 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_keys.c	2010-08-23 11:01:00.391564137 -0400
+@@ -0,0 +1,336 @@
++/*
++ * COPYRIGHT (c) 2008
++ * The Regents of the University of Michigan
++ * ALL RIGHTS RESERVED
++ *
++ * Permission is granted to use, copy, create derivative works
++ * and redistribute this software and such derivative works
++ * for any purpose, so long as the name of The University of
++ * Michigan is not used in any advertising or publicity
++ * pertaining to the use of distribution of this software
++ * without specific, written prior authorization.  If the
++ * above copyright notice or any other identification of the
++ * University of Michigan is included in any copy of any
++ * portion of this software, then the disclaimer below must
++ * also be included.
++ *
++ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
++ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
++ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
++ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
++ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
++ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
++ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
++ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
++ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGES.
++ */
++
++/*
++ * Copyright (C) 1998 by the FundsXpress, INC.
++ *
++ * All rights reserved.
++ *
++ * Export of this software from the United States of America may require
++ * a specific license from the United States Government.  It is the
++ * responsibility of any person or organization contemplating export to
++ * obtain such a license before exporting.
++ *
++ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
++ * distribute this software and its documentation for any purpose and
++ * without fee is hereby granted, provided that the above copyright
++ * notice appear in all copies and that both that copyright notice and
++ * this permission notice appear in supporting documentation, and that
++ * the name of FundsXpress. not be used in advertising or publicity pertaining
++ * to distribution of the software without specific, written prior
++ * permission.  FundsXpress makes no representations about the suitability of
++ * this software for any purpose.  It is provided "as is" without express
++ * or implied warranty.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
++ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
++ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
++ */
++
++#include <linux/err.h>
++#include <linux/types.h>
++#include <linux/crypto.h>
++#include <linux/sunrpc/gss_krb5.h>
++#include <linux/sunrpc/xdr.h>
++
++#ifdef RPC_DEBUG
++# define RPCDBG_FACILITY        RPCDBG_AUTH
++#endif
++
++/*
++ * This is the n-fold function as described in rfc3961, sec 5.1
++ * Taken from MIT Kerberos and modified.
++ */
++
++static void krb5_nfold(u32 inbits, const u8 *in,
++		       u32 outbits, u8 *out)
++{
++	int a, b, c, lcm;
++	int byte, i, msbit;
++
++	/* the code below is more readable if I make these bytes
++	   instead of bits */
++
++	inbits >>= 3;
++	outbits >>= 3;
++
++	/* first compute lcm(n,k) */
++
++	a = outbits;
++	b = inbits;
++
++	while (b != 0) {
++		c = b;
++		b = a%b;
++		a = c;
++	}
++
++	lcm = outbits*inbits/a;
++
++	/* now do the real work */
++
++	memset(out, 0, outbits);
++	byte = 0;
++
++	/* this will end up cycling through k lcm(k,n)/k times, which
++	   is correct */
++	for (i = lcm-1; i >= 0; i--) {
++		/* compute the msbit in k which gets added into this byte */
++		msbit = (
++			/* first, start with the msbit in the first,
++			 * unrotated byte */
++			 ((inbits << 3) - 1)
++			 /* then, for each byte, shift to the right
++			  * for each repetition */
++			 + (((inbits << 3) + 13) * (i/inbits))
++			 /* last, pick out the correct byte within
++			  * that shifted repetition */
++			 + ((inbits - (i % inbits)) << 3)
++			 ) % (inbits << 3);
++
++		/* pull out the byte value itself */
++		byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
++				  (in[((inbits) - (msbit >> 3)) % inbits]))
++				 >> ((msbit & 7) + 1)) & 0xff;
++
++		/* do the addition */
++		byte += out[i % outbits];
++		out[i % outbits] = byte & 0xff;
++
++		/* keep around the carry bit, if any */
++		byte >>= 8;
++
++	}
++
++	/* if there's a carry bit left over, add it back in */
++	if (byte) {
++		for (i = outbits - 1; i >= 0; i--) {
++			/* do the addition */
++			byte += out[i];
++			out[i] = byte & 0xff;
++
++			/* keep around the carry bit, if any */
++			byte >>= 8;
++		}
++	}
++}
++
++/*
++ * This is the DK (derive_key) function as described in rfc3961, sec 5.1
++ * Taken from MIT Kerberos and modified.
++ */
++
++u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
++		    const struct xdr_netobj *inkey,
++		    struct xdr_netobj *outkey,
++		    const struct xdr_netobj *in_constant,
++		    gfp_t gfp_mask)
++{
++	size_t blocksize, keybytes, keylength, n;
++	unsigned char *inblockdata, *outblockdata, *rawkey;
++	struct xdr_netobj inblock, outblock;
++	struct crypto_blkcipher *cipher;
++	u32 ret = EINVAL;
++
++	blocksize = gk5e->blocksize;
++	keybytes = gk5e->keybytes;
++	keylength = gk5e->keylength;
++
++	if ((inkey->len != keylength) || (outkey->len != keylength))
++		goto err_return;
++
++	cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		goto err_return;
++	if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
++		goto err_return;
++
++	/* allocate and set up buffers */
++
++	ret = ENOMEM;
++	inblockdata = kmalloc(blocksize, gfp_mask);
++	if (inblockdata == NULL)
++		goto err_free_cipher;
++
++	outblockdata = kmalloc(blocksize, gfp_mask);
++	if (outblockdata == NULL)
++		goto err_free_in;
++
++	rawkey = kmalloc(keybytes, gfp_mask);
++	if (rawkey == NULL)
++		goto err_free_out;
++
++	inblock.data = (char *) inblockdata;
++	inblock.len = blocksize;
++
++	outblock.data = (char *) outblockdata;
++	outblock.len = blocksize;
++
++	/* initialize the input block */
++
++	if (in_constant->len == inblock.len) {
++		memcpy(inblock.data, in_constant->data, inblock.len);
++	} else {
++		krb5_nfold(in_constant->len * 8, in_constant->data,
++			   inblock.len * 8, inblock.data);
++	}
++
++	/* loop encrypting the blocks until enough key bytes are generated */
++
++	n = 0;
++	while (n < keybytes) {
++		(*(gk5e->encrypt))(cipher, NULL, inblock.data,
++				   outblock.data, inblock.len);
++
++		if ((keybytes - n) <= outblock.len) {
++			memcpy(rawkey + n, outblock.data, (keybytes - n));
++			break;
++		}
++
++		memcpy(rawkey + n, outblock.data, outblock.len);
++		memcpy(inblock.data, outblock.data, outblock.len);
++		n += outblock.len;
++	}
++
++	/* postprocess the key */
++
++	inblock.data = (char *) rawkey;
++	inblock.len = keybytes;
++
++	BUG_ON(gk5e->mk_key == NULL);
++	ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
++	if (ret) {
++		dprintk("%s: got %d from mk_key function for '%s'\n",
++			__func__, ret, gk5e->encrypt_name);
++		goto err_free_raw;
++	}
++
++	/* clean memory, free resources and exit */
++
++	ret = 0;
++
++err_free_raw:
++	memset(rawkey, 0, keybytes);
++	kfree(rawkey);
++err_free_out:
++	memset(outblockdata, 0, blocksize);
++	kfree(outblockdata);
++err_free_in:
++	memset(inblockdata, 0, blocksize);
++	kfree(inblockdata);
++err_free_cipher:
++	crypto_free_blkcipher(cipher);
++err_return:
++	return ret;
++}
++
++#define smask(step) ((1<<step)-1)
++#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
++#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
++
++static void mit_des_fixup_key_parity(u8 key[8])
++{
++	int i;
++	for (i = 0; i < 8; i++) {
++		key[i] &= 0xfe;
++		key[i] |= 1^parity_char(key[i]);
++	}
++}
++
++/*
++ * This is the des3 key derivation postprocess function
++ */
++u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
++			   struct xdr_netobj *randombits,
++			   struct xdr_netobj *key)
++{
++	int i;
++	u32 ret = EINVAL;
++
++	if (key->len != 24) {
++		dprintk("%s: key->len is %d\n", __func__, key->len);
++		goto err_out;
++	}
++	if (randombits->len != 21) {
++		dprintk("%s: randombits->len is %d\n",
++			__func__, randombits->len);
++		goto err_out;
++	}
++
++	/* take the seven bytes, move them around into the top 7 bits of the
++	   8 key bytes, then compute the parity bits.  Do this three times. */
++
++	for (i = 0; i < 3; i++) {
++		memcpy(key->data + i*8, randombits->data + i*7, 7);
++		key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
++				    ((key->data[i*8+1]&1)<<2) |
++				    ((key->data[i*8+2]&1)<<3) |
++				    ((key->data[i*8+3]&1)<<4) |
++				    ((key->data[i*8+4]&1)<<5) |
++				    ((key->data[i*8+5]&1)<<6) |
++				    ((key->data[i*8+6]&1)<<7));
++
++		mit_des_fixup_key_parity(key->data + i*8);
++	}
++	ret = 0;
++err_out:
++	return ret;
++}
++
++/*
++ * This is the aes key derivation postprocess function
++ */
++u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
++			  struct xdr_netobj *randombits,
++			  struct xdr_netobj *key)
++{
++	u32 ret = EINVAL;
++
++	if (key->len != 16 && key->len != 32) {
++		dprintk("%s: key->len is %d\n", __func__, key->len);
++		goto err_out;
++	}
++	if (randombits->len != 16 && randombits->len != 32) {
++		dprintk("%s: randombits->len is %d\n",
++			__func__, randombits->len);
++		goto err_out;
++	}
++	if (randombits->len != key->len) {
++		dprintk("%s: randombits->len is %d, key->len is %d\n",
++			__func__, randombits->len, key->len);
++		goto err_out;
++	}
++	memcpy(key->data, randombits->data, key->len);
++	ret = 0;
++err_out:
++	return ret;
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_mech.c	2010-08-23 11:01:00.392564136 -0400
+@@ -1,7 +1,7 @@
+ /*
+  *  linux/net/sunrpc/gss_krb5_mech.c
+  *
+- *  Copyright (c) 2001 The Regents of the University of Michigan.
++ *  Copyright (c) 2001-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson <andros@umich.edu>
+@@ -48,6 +48,143 @@
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+ #endif
+ 
++static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
++
++static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
++	/*
++	 * DES (All DES enctypes are mapped to the same gss functionality)
++	 */
++	{
++	  .etype = ENCTYPE_DES_CBC_RAW,
++	  .ctype = CKSUMTYPE_RSA_MD5,
++	  .name = "des-cbc-crc",
++	  .encrypt_name = "cbc(des)",
++	  .cksum_name = "md5",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = NULL,
++	  .signalg = SGN_ALG_DES_MAC_MD5,
++	  .sealalg = SEAL_ALG_DES,
++	  .keybytes = 7,
++	  .keylength = 8,
++	  .blocksize = 8,
++	  .conflen = 8,
++	  .cksumlength = 8,
++	  .keyed_cksum = 0,
++	},
++	/*
++	 * RC4-HMAC
++	 */
++	{
++	  .etype = ENCTYPE_ARCFOUR_HMAC,
++	  .ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
++	  .name = "rc4-hmac",
++	  .encrypt_name = "ecb(arc4)",
++	  .cksum_name = "hmac(md5)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = NULL,
++	  .signalg = SGN_ALG_HMAC_MD5,
++	  .sealalg = SEAL_ALG_MICROSOFT_RC4,
++	  .keybytes = 16,
++	  .keylength = 16,
++	  .blocksize = 1,
++	  .conflen = 8,
++	  .cksumlength = 8,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * 3DES
++	 */
++	{
++	  .etype = ENCTYPE_DES3_CBC_RAW,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
++	  .name = "des3-hmac-sha1",
++	  .encrypt_name = "cbc(des3_ede)",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_des3_make_key,
++	  .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
++	  .sealalg = SEAL_ALG_DES3KD,
++	  .keybytes = 21,
++	  .keylength = 24,
++	  .blocksize = 8,
++	  .conflen = 8,
++	  .cksumlength = 20,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * AES128
++	 */
++	{
++	  .etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
++	  .name = "aes128-cts",
++	  .encrypt_name = "cts(cbc(aes))",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_aes_make_key,
++	  .encrypt_v2 = gss_krb5_aes_encrypt,
++	  .decrypt_v2 = gss_krb5_aes_decrypt,
++	  .signalg = -1,
++	  .sealalg = -1,
++	  .keybytes = 16,
++	  .keylength = 16,
++	  .blocksize = 16,
++	  .conflen = 16,
++	  .cksumlength = 12,
++	  .keyed_cksum = 1,
++	},
++	/*
++	 * AES256
++	 */
++	{
++	  .etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
++	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
++	  .name = "aes256-cts",
++	  .encrypt_name = "cts(cbc(aes))",
++	  .cksum_name = "hmac(sha1)",
++	  .encrypt = krb5_encrypt,
++	  .decrypt = krb5_decrypt,
++	  .mk_key = gss_krb5_aes_make_key,
++	  .encrypt_v2 = gss_krb5_aes_encrypt,
++	  .decrypt_v2 = gss_krb5_aes_decrypt,
++	  .signalg = -1,
++	  .sealalg = -1,
++	  .keybytes = 32,
++	  .keylength = 32,
++	  .blocksize = 16,
++	  .conflen = 16,
++	  .cksumlength = 12,
++	  .keyed_cksum = 1,
++	},
++};
++
++static const int num_supported_enctypes =
++	ARRAY_SIZE(supported_gss_krb5_enctypes);
++
++static int
++supported_gss_krb5_enctype(int etype)
++{
++	int i;
++	for (i = 0; i < num_supported_enctypes; i++)
++		if (supported_gss_krb5_enctypes[i].etype == etype)
++			return 1;
++	return 0;
++}
++
++static const struct gss_krb5_enctype *
++get_gss_krb5_enctype(int etype)
++{
++	int i;
++	for (i = 0; i < num_supported_enctypes; i++)
++		if (supported_gss_krb5_enctypes[i].etype == etype)
++			return &supported_gss_krb5_enctypes[i];
++	return NULL;
++}
++
+ static const void *
+ simple_get_bytes(const void *p, const void *end, void *res, int len)
+ {
+@@ -78,35 +215,45 @@ simple_get_netobj(const void *p, const v
+ }
+ 
+ static inline const void *
+-get_key(const void *p, const void *end, struct crypto_blkcipher **res)
++get_key(const void *p, const void *end,
++	struct krb5_ctx *ctx, struct crypto_blkcipher **res)
+ {
+ 	struct xdr_netobj	key;
+ 	int			alg;
+-	char			*alg_name;
+ 
+ 	p = simple_get_bytes(p, end, &alg, sizeof(alg));
+ 	if (IS_ERR(p))
+ 		goto out_err;
++
++	switch (alg) {
++	case ENCTYPE_DES_CBC_CRC:
++	case ENCTYPE_DES_CBC_MD4:
++	case ENCTYPE_DES_CBC_MD5:
++		/* Map all these key types to ENCTYPE_DES_CBC_RAW */
++		alg = ENCTYPE_DES_CBC_RAW;
++		break;
++	}
++
++	if (!supported_gss_krb5_enctype(alg)) {
++		printk(KERN_WARNING "gss_kerberos_mech: unsupported "
++			"encryption key algorithm %d\n", alg);
++		goto out_err;
++	}
+ 	p = simple_get_netobj(p, end, &key);
+ 	if (IS_ERR(p))
+ 		goto out_err;
+ 
+-	switch (alg) {
+-		case ENCTYPE_DES_CBC_RAW:
+-			alg_name = "cbc(des)";
+-			break;
+-		default:
+-			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
+-			goto out_err_free_key;
+-	}
+-	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
++	*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++							CRYPTO_ALG_ASYNC);
+ 	if (IS_ERR(*res)) {
+-		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
++		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
++			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+ 		*res = NULL;
+ 		goto out_err_free_key;
+ 	}
+ 	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
+-		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
++		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
++			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+ 		goto out_err_free_tfm;
+ 	}
+ 
+@@ -123,56 +270,55 @@ out_err:
+ }
+ 
+ static int
+-gss_import_sec_context_kerberos(const void *p,
+-				size_t len,
+-				struct gss_ctx *ctx_id)
++gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
+ {
+-	const void *end = (const void *)((const char *)p + len);
+-	struct	krb5_ctx *ctx;
+ 	int tmp;
+ 
+-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) {
+-		p = ERR_PTR(-ENOMEM);
+-		goto out_err;
+-	}
+-
+ 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
++
++	/* Old format supports only DES!  Any other enctype uses new format */
++	ctx->enctype = ENCTYPE_DES_CBC_RAW;
++
++	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
++	if (ctx->gk5e == NULL)
++		goto out_err;
++
+ 	/* The downcall format was designed before we completely understood
+ 	 * the uses of the context fields; so it includes some stuff we
+ 	 * just give some minimal sanity-checking, and some we ignore
+ 	 * completely (like the next twenty bytes): */
+ 	if (unlikely(p + 20 > end || p + 20 < p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p += 20;
+ 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	if (tmp != SGN_ALG_DES_MAC_MD5) {
+ 		p = ERR_PTR(-ENOSYS);
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	}
+ 	p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	if (tmp != SEAL_ALG_DES) {
+ 		p = ERR_PTR(-ENOSYS);
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	}
+ 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
++		goto out_err;
+ 	p = simple_get_netobj(p, end, &ctx->mech_used);
+ 	if (IS_ERR(p))
+-		goto out_err_free_ctx;
+-	p = get_key(p, end, &ctx->enc);
++		goto out_err;
++	p = get_key(p, end, ctx, &ctx->enc);
+ 	if (IS_ERR(p))
+ 		goto out_err_free_mech;
+-	p = get_key(p, end, &ctx->seq);
++	p = get_key(p, end, ctx, &ctx->seq);
+ 	if (IS_ERR(p))
+ 		goto out_err_free_key1;
+ 	if (p != end) {
+@@ -180,9 +326,6 @@ gss_import_sec_context_kerberos(const vo
+ 		goto out_err_free_key2;
+ 	}
+ 
+-	ctx_id->internal_ctx_id = ctx;
+-
+-	dprintk("RPC:       Successfully imported new context.\n");
+ 	return 0;
+ 
+ out_err_free_key2:
+@@ -191,18 +334,378 @@ out_err_free_key1:
+ 	crypto_free_blkcipher(ctx->enc);
+ out_err_free_mech:
+ 	kfree(ctx->mech_used.data);
+-out_err_free_ctx:
+-	kfree(ctx);
+ out_err:
+ 	return PTR_ERR(p);
+ }
+ 
++struct crypto_blkcipher *
++context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
++{
++	struct crypto_blkcipher *cp;
++
++	cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cp)) {
++		dprintk("gss_kerberos_mech: unable to initialize "
++			"crypto algorithm %s\n", cname);
++		return NULL;
++	}
++	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
++		dprintk("gss_kerberos_mech: error setting key for "
++			"crypto algorithm %s\n", cname);
++		crypto_free_blkcipher(cp);
++		return NULL;
++	}
++	return cp;
++}
++
++static inline void
++set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
++{
++	cdata[0] = (usage>>24)&0xff;
++	cdata[1] = (usage>>16)&0xff;
++	cdata[2] = (usage>>8)&0xff;
++	cdata[3] = usage&0xff;
++	cdata[4] = seed;
++}
++
++static int
++context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
++{
++	struct xdr_netobj c, keyin, keyout;
++	u8 cdata[GSS_KRB5_K5CLENGTH];
++	u32 err;
++
++	c.len = GSS_KRB5_K5CLENGTH;
++	c.data = cdata;
++
++	keyin.data = ctx->Ksess;
++	keyin.len = ctx->gk5e->keylength;
++	keyout.len = ctx->gk5e->keylength;
++
++	/* seq uses the raw key */
++	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
++					   ctx->Ksess);
++	if (ctx->seq == NULL)
++		goto out_err;
++
++	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
++					   ctx->Ksess);
++	if (ctx->enc == NULL)
++		goto out_free_seq;
++
++	/* derive cksum */
++	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->cksum;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving cksum key\n",
++			__func__, err);
++		goto out_free_enc;
++	}
++
++	return 0;
++
++out_free_enc:
++	crypto_free_blkcipher(ctx->enc);
++out_free_seq:
++	crypto_free_blkcipher(ctx->seq);
++out_err:
++	return -EINVAL;
++}
++
++/*
++ * Note that RC4 depends on deriving keys using the sequence
++ * number or the checksum of a token.  Therefore, the final keys
++ * cannot be calculated until the token is being constructed!
++ */
++static int
++context_derive_keys_rc4(struct krb5_ctx *ctx)
++{
++	struct crypto_hash *hmac;
++	char sigkeyconstant[] = "signaturekey";
++	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
++	struct hash_desc desc;
++	struct scatterlist sg[1];
++	int err;
++
++	dprintk("RPC:       %s: entered\n", __func__);
++	/*
++	 * derive cksum (aka Ksign) key
++	 */
++	hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
++	if (IS_ERR(hmac)) {
++		dprintk("%s: error %ld allocating hash '%s'\n",
++			__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
++		err = PTR_ERR(hmac);
++		goto out_err;
++	}
++
++	err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
++	if (err)
++		goto out_err_free_hmac;
++
++	sg_init_table(sg, 1);
++	sg_set_buf(sg, sigkeyconstant, slen);
++
++	desc.tfm = hmac;
++	desc.flags = 0;
++
++	err = crypto_hash_init(&desc);
++	if (err)
++		goto out_err_free_hmac;
++
++	err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
++	if (err)
++		goto out_err_free_hmac;
++	/*
++	 * allocate hash, and blkciphers for data and seqnum encryption
++	 */
++	ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++					  CRYPTO_ALG_ASYNC);
++	if (IS_ERR(ctx->enc)) {
++		err = PTR_ERR(ctx->enc);
++		goto out_err_free_hmac;
++	}
++
++	ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
++					  CRYPTO_ALG_ASYNC);
++	if (IS_ERR(ctx->seq)) {
++		crypto_free_blkcipher(ctx->enc);
++		err = PTR_ERR(ctx->seq);
++		goto out_err_free_hmac;
++	}
++
++	dprintk("RPC:       %s: returning success\n", __func__);
++
++	err = 0;
++
++out_err_free_hmac:
++	crypto_free_hash(hmac);
++out_err:
++	dprintk("RPC:       %s: returning %d\n", __func__, err);
++	return err;
++}
++
++static int
++context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
++{
++	struct xdr_netobj c, keyin, keyout;
++	u8 cdata[GSS_KRB5_K5CLENGTH];
++	u32 err;
++
++	c.len = GSS_KRB5_K5CLENGTH;
++	c.data = cdata;
++
++	keyin.data = ctx->Ksess;
++	keyin.len = ctx->gk5e->keylength;
++	keyout.len = ctx->gk5e->keylength;
++
++	/* initiator seal encryption */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
++	keyout.data = ctx->initiator_seal;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_seal key\n",
++			__func__, err);
++		goto out_err;
++	}
++	ctx->initiator_enc = context_v2_alloc_cipher(ctx,
++						     ctx->gk5e->encrypt_name,
++						     ctx->initiator_seal);
++	if (ctx->initiator_enc == NULL)
++		goto out_err;
++
++	/* acceptor seal encryption */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
++	keyout.data = ctx->acceptor_seal;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_seal key\n",
++			__func__, err);
++		goto out_free_initiator_enc;
++	}
++	ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
++						    ctx->gk5e->encrypt_name,
++						    ctx->acceptor_seal);
++	if (ctx->acceptor_enc == NULL)
++		goto out_free_initiator_enc;
++
++	/* initiator sign checksum */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->initiator_sign;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_sign key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* acceptor sign checksum */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
++	keyout.data = ctx->acceptor_sign;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_sign key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* initiator seal integrity */
++	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
++	keyout.data = ctx->initiator_integ;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving initiator_integ key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	/* acceptor seal integrity */
++	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
++	keyout.data = ctx->acceptor_integ;
++	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
++	if (err) {
++		dprintk("%s: Error %d deriving acceptor_integ key\n",
++			__func__, err);
++		goto out_free_acceptor_enc;
++	}
++
++	switch (ctx->enctype) {
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		ctx->initiator_enc_aux =
++			context_v2_alloc_cipher(ctx, "cbc(aes)",
++						ctx->initiator_seal);
++		if (ctx->initiator_enc_aux == NULL)
++			goto out_free_acceptor_enc;
++		ctx->acceptor_enc_aux =
++			context_v2_alloc_cipher(ctx, "cbc(aes)",
++						ctx->acceptor_seal);
++		if (ctx->acceptor_enc_aux == NULL) {
++			crypto_free_blkcipher(ctx->initiator_enc_aux);
++			goto out_free_acceptor_enc;
++		}
++	}
++
++	return 0;
++
++out_free_acceptor_enc:
++	crypto_free_blkcipher(ctx->acceptor_enc);
++out_free_initiator_enc:
++	crypto_free_blkcipher(ctx->initiator_enc);
++out_err:
++	return -EINVAL;
++}
++
++static int
++gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
++		gfp_t gfp_mask)
++{
++	int keylen;
++
++	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
++	if (IS_ERR(p))
++		goto out_err;
++	ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
++
++	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
++	if (IS_ERR(p))
++		goto out_err;
++	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
++	if (IS_ERR(p))
++		goto out_err;
++	/* set seq_send for use by "older" enctypes */
++	ctx->seq_send = ctx->seq_send64;
++	if (ctx->seq_send64 != ctx->seq_send) {
++		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
++			(long unsigned)ctx->seq_send64, ctx->seq_send);
++		goto out_err;
++	}
++	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
++	if (IS_ERR(p))
++		goto out_err;
++	/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
++	if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
++		ctx->enctype = ENCTYPE_DES3_CBC_RAW;
++	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
++	if (ctx->gk5e == NULL) {
++		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
++			ctx->enctype);
++		p = ERR_PTR(-EINVAL);
++		goto out_err;
++	}
++	keylen = ctx->gk5e->keylength;
++
++	p = simple_get_bytes(p, end, ctx->Ksess, keylen);
++	if (IS_ERR(p))
++		goto out_err;
++
++	if (p != end) {
++		p = ERR_PTR(-EINVAL);
++		goto out_err;
++	}
++
++	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
++				      gss_kerberos_mech.gm_oid.len, gfp_mask);
++	if (unlikely(ctx->mech_used.data == NULL)) {
++		p = ERR_PTR(-ENOMEM);
++		goto out_err;
++	}
++	ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
++
++	switch (ctx->enctype) {
++	case ENCTYPE_DES3_CBC_RAW:
++		return context_derive_keys_des3(ctx, gfp_mask);
++	case ENCTYPE_ARCFOUR_HMAC:
++		return context_derive_keys_rc4(ctx);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return context_derive_keys_new(ctx, gfp_mask);
++	default:
++		return -EINVAL;
++	}
++
++out_err:
++	return PTR_ERR(p);
++}
++
++static int
++gss_import_sec_context_kerberos(const void *p, size_t len,
++				struct gss_ctx *ctx_id,
++				gfp_t gfp_mask)
++{
++	const void *end = (const void *)((const char *)p + len);
++	struct  krb5_ctx *ctx;
++	int ret;
++
++	ctx = kzalloc(sizeof(*ctx), gfp_mask);
++	if (ctx == NULL)
++		return -ENOMEM;
++
++	if (len == 85)
++		ret = gss_import_v1_context(p, end, ctx);
++	else
++		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
++
++	if (ret == 0)
++		ctx_id->internal_ctx_id = ctx;
++	else
++		kfree(ctx);
++
++	dprintk("RPC:       %s: returning %d\n", __func__, ret);
++	return ret;
++}
++
+ static void
+ gss_delete_sec_context_kerberos(void *internal_ctx) {
+ 	struct krb5_ctx *kctx = internal_ctx;
+ 
+ 	crypto_free_blkcipher(kctx->seq);
+ 	crypto_free_blkcipher(kctx->enc);
++	crypto_free_blkcipher(kctx->acceptor_enc);
++	crypto_free_blkcipher(kctx->initiator_enc);
++	crypto_free_blkcipher(kctx->acceptor_enc_aux);
++	crypto_free_blkcipher(kctx->initiator_enc_aux);
+ 	kfree(kctx->mech_used.data);
+ 	kfree(kctx);
+ }
+@@ -241,6 +744,7 @@ static struct gss_api_mech gss_kerberos_
+ 	.gm_ops		= &gss_kerberos_ops,
+ 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
+ 	.gm_pfs		= gss_kerberos_pfs,
++	.gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ",
+ };
+ 
+ static int __init init_kerberos_module(void)
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seal.c	2010-08-23 11:01:00.392564136 -0400
+@@ -3,7 +3,7 @@
+  *
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson	<andros@umich.edu>
+@@ -70,53 +70,154 @@
+ 
+ DEFINE_SPINLOCK(krb5_seq_lock);
+ 
+-u32
+-gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
++static char *
++setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
++{
++	__be16 *ptr, *krb5_hdr;
++	int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
++
++	token->len = g_token_size(&ctx->mech_used, body_size);
++
++	ptr = (__be16 *)token->data;
++	g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
++
++	/* ptr now at start of header described in rfc 1964, section 1.2.1: */
++	krb5_hdr = ptr;
++	*ptr++ = KG_TOK_MIC_MSG;
++	*ptr++ = cpu_to_le16(ctx->gk5e->signalg);
++	*ptr++ = SEAL_ALG_NONE;
++	*ptr++ = 0xffff;
++
++	return (char *)krb5_hdr;
++}
++
++static void *
++setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
++{
++	__be16 *ptr, *krb5_hdr;
++	u8 *p, flags = 0x00;
++
++	if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
++		flags |= 0x01;
++	if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
++		flags |= 0x04;
++
++	/* Per rfc 4121, sec 4.2.6.1, there is no header,
++	 * just start the token */
++	krb5_hdr = ptr = (__be16 *)token->data;
++
++	*ptr++ = KG2_TOK_MIC;
++	p = (u8 *)ptr;
++	*p++ = flags;
++	*p++ = 0xff;
++	ptr = (__be16 *)p;
++	*ptr++ = 0xffff;
++	*ptr++ = 0xffff;
++
++	token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
++	return krb5_hdr;
++}
++
++static u32
++gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
+ 		struct xdr_netobj *token)
+ {
+-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+-	unsigned char		*ptr, *msg_start;
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
++	void			*ptr;
+ 	s32			now;
+ 	u32			seq_send;
++	u8			*cksumkey;
+ 
+-	dprintk("RPC:       gss_krb5_seal\n");
++	dprintk("RPC:       %s\n", __func__);
+ 	BUG_ON(ctx == NULL);
+ 
+ 	now = get_seconds();
+ 
+-	token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
++	ptr = setup_token(ctx, token);
+ 
+-	ptr = token->data;
+-	g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
++	if (ctx->gk5e->keyed_cksum)
++		cksumkey = ctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+-	/* ptr now at header described in rfc 1964, section 1.2.1: */
+-	ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
+-	ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
++	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
++			  KG_USAGE_SIGN, &md5cksum))
++		return GSS_S_FAILURE;
+ 
+-	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
++	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+ 
+-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+-	memset(ptr + 4, 0xff, 4);
++	spin_lock(&krb5_seq_lock);
++	seq_send = ctx->seq_send++;
++	spin_unlock(&krb5_seq_lock);
+ 
+-	if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
++	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
++			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
+ 		return GSS_S_FAILURE;
+ 
+-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+-			  md5cksum.data, md5cksum.len))
+-		return GSS_S_FAILURE;
++	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
++}
++
++u32
++gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
++		struct xdr_netobj *token)
++{
++	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
++				       .data = cksumdata};
++	void *krb5_hdr;
++	s32 now;
++	u64 seq_send;
++	u8 *cksumkey;
++	unsigned int cksum_usage;
++
++	dprintk("RPC:       %s\n", __func__);
+ 
+-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
++	krb5_hdr = setup_token_v2(ctx, token);
+ 
++	/* Set up the sequence number. Now 64-bits in clear
++	 * text and w/o direction indicator */
+ 	spin_lock(&krb5_seq_lock);
+-	seq_send = ctx->seq_send++;
++	seq_send = ctx->seq_send64++;
+ 	spin_unlock(&krb5_seq_lock);
++	*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+ 
+-	if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
+-			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
+-			      ptr + 8))
++	if (ctx->initiate) {
++		cksumkey = ctx->initiator_sign;
++		cksum_usage = KG_USAGE_INITIATOR_SIGN;
++	} else {
++		cksumkey = ctx->acceptor_sign;
++		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
++	}
++
++	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
++			     text, 0, cksumkey, cksum_usage, &cksumobj))
+ 		return GSS_S_FAILURE;
+ 
++	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
++
++	now = get_seconds();
++
+ 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+ }
++
++u32
++gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
++		     struct xdr_netobj *token)
++{
++	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
++
++	switch (ctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_get_mic_v1(ctx, text, token);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_get_mic_v2(ctx, text, token);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_seqnum.c	2010-08-23 11:01:00.393496180 -0400
+@@ -39,14 +39,51 @@
+ # define RPCDBG_FACILITY        RPCDBG_AUTH
+ #endif
+ 
++static s32
++krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
++		      unsigned char *cksum, unsigned char *buf)
++{
++	struct crypto_blkcipher *cipher;
++	unsigned char plain[8];
++	s32 code;
++
++	dprintk("RPC:       %s:\n", __func__);
++	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		return PTR_ERR(cipher);
++
++	plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
++	plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
++	plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
++	plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
++	plain[4] = direction;
++	plain[5] = direction;
++	plain[6] = direction;
++	plain[7] = direction;
++
++	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
++	if (code)
++		goto out;
++
++	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
++out:
++	crypto_free_blkcipher(cipher);
++	return code;
++}
+ s32
+-krb5_make_seq_num(struct crypto_blkcipher *key,
++krb5_make_seq_num(struct krb5_ctx *kctx,
++		struct crypto_blkcipher *key,
+ 		int direction,
+ 		u32 seqnum,
+ 		unsigned char *cksum, unsigned char *buf)
+ {
+ 	unsigned char plain[8];
+ 
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
++		return krb5_make_rc4_seq_num(kctx, direction, seqnum,
++					     cksum, buf);
++
+ 	plain[0] = (unsigned char) (seqnum & 0xff);
+ 	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
+ 	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
+@@ -60,17 +97,59 @@ krb5_make_seq_num(struct crypto_blkciphe
+ 	return krb5_encrypt(key, cksum, plain, buf, 8);
+ }
+ 
++static s32
++krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
++		     unsigned char *buf, int *direction, s32 *seqnum)
++{
++	struct crypto_blkcipher *cipher;
++	unsigned char plain[8];
++	s32 code;
++
++	dprintk("RPC:       %s:\n", __func__);
++	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++					CRYPTO_ALG_ASYNC);
++	if (IS_ERR(cipher))
++		return PTR_ERR(cipher);
++
++	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
++	if (code)
++		goto out;
++
++	code = krb5_decrypt(cipher, cksum, buf, plain, 8);
++	if (code)
++		goto out;
++
++	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
++				   || (plain[4] != plain[7])) {
++		code = (s32)KG_BAD_SEQ;
++		goto out;
++	}
++
++	*direction = plain[4];
++
++	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
++					(plain[2] << 8) | (plain[3]));
++out:
++	crypto_free_blkcipher(cipher);
++	return code;
++}
++
+ s32
+-krb5_get_seq_num(struct crypto_blkcipher *key,
++krb5_get_seq_num(struct krb5_ctx *kctx,
+ 	       unsigned char *cksum,
+ 	       unsigned char *buf,
+ 	       int *direction, u32 *seqnum)
+ {
+ 	s32 code;
+ 	unsigned char plain[8];
++	struct crypto_blkcipher *key = kctx->seq;
+ 
+ 	dprintk("RPC:       krb5_get_seq_num:\n");
+ 
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
++		return krb5_get_rc4_seq_num(kctx, cksum, buf,
++					    direction, seqnum);
++
+ 	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
+ 		return code;
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_unseal.c	2010-08-23 11:01:00.393496180 -0400
+@@ -3,7 +3,7 @@
+  *
+  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
+  *
+- *  Copyright (c) 2000 The Regents of the University of Michigan.
++ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
+  *  All rights reserved.
+  *
+  *  Andy Adamson   <andros@umich.edu>
+@@ -70,20 +70,21 @@
+ /* read_token is a mic token, and message_buffer is the data that the mic was
+  * supposedly taken over. */
+ 
+-u32
+-gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
++static u32
++gss_verify_mic_v1(struct krb5_ctx *ctx,
+ 		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+ {
+-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+ 	int			signalg;
+ 	int			sealalg;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	s32			now;
+ 	int			direction;
+ 	u32			seqnum;
+ 	unsigned char		*ptr = (unsigned char *)read_token->data;
+ 	int			bodysize;
++	u8			*cksumkey;
+ 
+ 	dprintk("RPC:       krb5_read_token\n");
+ 
+@@ -98,7 +99,7 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 	/* XXX sanity-check bodysize?? */
+ 
+ 	signalg = ptr[2] + (ptr[3] << 8);
+-	if (signalg != SGN_ALG_DES_MAC_MD5)
++	if (signalg != ctx->gk5e->signalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	sealalg = ptr[4] + (ptr[5] << 8);
+@@ -108,13 +109,17 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+-	if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
+-		return GSS_S_FAILURE;
++	if (ctx->gk5e->keyed_cksum)
++		cksumkey = ctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
++	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
++			  cksumkey, KG_USAGE_SIGN, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 
+-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
++	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++					ctx->gk5e->cksumlength))
+ 		return GSS_S_BAD_SIG;
+ 
+ 	/* it got through unscathed.  Make sure the context is unexpired */
+@@ -126,7 +131,8 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 
+ 	/* do sequencing checks */
+ 
+-	if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
++	if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
++			     &direction, &seqnum))
+ 		return GSS_S_FAILURE;
+ 
+ 	if ((ctx->initiate && direction != 0xff) ||
+@@ -135,3 +141,86 @@ gss_verify_mic_kerberos(struct gss_ctx *
+ 
+ 	return GSS_S_COMPLETE;
+ }
++
++static u32
++gss_verify_mic_v2(struct krb5_ctx *ctx,
++		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
++{
++	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
++				      .data = cksumdata};
++	s32 now;
++	u64 seqnum;
++	u8 *ptr = read_token->data;
++	u8 *cksumkey;
++	u8 flags;
++	int i;
++	unsigned int cksum_usage;
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	flags = ptr[2];
++	if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
++	    (ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
++		return GSS_S_BAD_SIG;
++
++	if (flags & KG2_TOKEN_FLAG_SEALED) {
++		dprintk("%s: token has unexpected sealed flag\n", __func__);
++		return GSS_S_FAILURE;
++	}
++
++	for (i = 3; i < 8; i++)
++		if (ptr[i] != 0xff)
++			return GSS_S_DEFECTIVE_TOKEN;
++
++	if (ctx->initiate) {
++		cksumkey = ctx->acceptor_sign;
++		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
++	} else {
++		cksumkey = ctx->initiator_sign;
++		cksum_usage = KG_USAGE_INITIATOR_SIGN;
++	}
++
++	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
++			     cksumkey, cksum_usage, &cksumobj))
++		return GSS_S_FAILURE;
++
++	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++				ctx->gk5e->cksumlength))
++		return GSS_S_BAD_SIG;
++
++	/* it got through unscathed.  Make sure the context is unexpired */
++	now = get_seconds();
++	if (now > ctx->endtime)
++		return GSS_S_CONTEXT_EXPIRED;
++
++	/* do sequencing checks */
++
++	seqnum = be64_to_cpup((__be64 *)ptr + 8);
++
++	return GSS_S_COMPLETE;
++}
++
++u32
++gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
++			struct xdr_buf *message_buffer,
++			struct xdr_netobj *read_token)
++{
++	struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
++
++	switch (ctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_verify_mic_v1(ctx, message_buffer, read_token);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_verify_mic_v2(ctx, message_buffer, read_token);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_krb5_wrap.c	2010-08-23 11:01:00.394576083 -0400
+@@ -1,3 +1,33 @@
++/*
++ * COPYRIGHT (c) 2008
++ * The Regents of the University of Michigan
++ * ALL RIGHTS RESERVED
++ *
++ * Permission is granted to use, copy, create derivative works
++ * and redistribute this software and such derivative works
++ * for any purpose, so long as the name of The University of
++ * Michigan is not used in any advertising or publicity
++ * pertaining to the use of distribution of this software
++ * without specific, written prior authorization.  If the
++ * above copyright notice or any other identification of the
++ * University of Michigan is included in any copy of any
++ * portion of this software, then the disclaimer below must
++ * also be included.
++ *
++ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
++ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
++ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
++ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
++ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
++ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
++ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
++ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
++ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
++ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
++ * SUCH DAMAGES.
++ */
++
+ #include <linux/types.h>
+ #include <linux/jiffies.h>
+ #include <linux/sunrpc/gss_krb5.h>
+@@ -12,10 +42,7 @@
+ static inline int
+ gss_krb5_padding(int blocksize, int length)
+ {
+-	/* Most of the code is block-size independent but currently we
+-	 * use only 8: */
+-	BUG_ON(blocksize != 8);
+-	return 8 - (length & 7);
++	return blocksize - (length % blocksize);
+ }
+ 
+ static inline void
+@@ -86,8 +113,8 @@ out:
+ 	return 0;
+ }
+ 
+-static void
+-make_confounder(char *p, u32 conflen)
++void
++gss_krb5_make_confounder(char *p, u32 conflen)
+ {
+ 	static u64 i = 0;
+ 	u64 *q = (u64 *)p;
+@@ -127,69 +154,73 @@ make_confounder(char *p, u32 conflen)
+ 
+ /* XXX factor out common code with seal/unseal. */
+ 
+-u32
+-gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
++static u32
++gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
+ 		struct xdr_buf *buf, struct page **pages)
+ {
+-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	int			blocksize = 0, plainlen;
+ 	unsigned char		*ptr, *msg_start;
+ 	s32			now;
+ 	int			headlen;
+ 	struct page		**tmp_pages;
+ 	u32			seq_send;
++	u8			*cksumkey;
++	u32			conflen = kctx->gk5e->conflen;
+ 
+-	dprintk("RPC:       gss_wrap_kerberos\n");
++	dprintk("RPC:       %s\n", __func__);
+ 
+ 	now = get_seconds();
+ 
+ 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+ 	gss_krb5_add_padding(buf, offset, blocksize);
+ 	BUG_ON((buf->len - offset) % blocksize);
+-	plainlen = blocksize + buf->len - offset;
++	plainlen = conflen + buf->len - offset;
+ 
+-	headlen = g_token_size(&kctx->mech_used, 24 + plainlen) -
+-						(buf->len - offset);
++	headlen = g_token_size(&kctx->mech_used,
++		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
++		(buf->len - offset);
+ 
+ 	ptr = buf->head[0].iov_base + offset;
+ 	/* shift data to make room for header. */
++	xdr_extend_head(buf, offset, headlen);
++
+ 	/* XXX Would be cleverer to encrypt while copying. */
+-	/* XXX bounds checking, slack, etc. */
+-	memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
+-	buf->head[0].iov_len += headlen;
+-	buf->len += headlen;
+ 	BUG_ON((buf->len - offset - headlen) % blocksize);
+ 
+ 	g_make_token_header(&kctx->mech_used,
+-				GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
++				GSS_KRB5_TOK_HDR_LEN +
++				kctx->gk5e->cksumlength + plainlen, &ptr);
+ 
+ 
+ 	/* ptr now at header described in rfc 1964, section 1.2.1: */
+ 	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
+ 	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
+ 
+-	msg_start = ptr + 24;
++	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
+ 
+-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
++	*(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
+ 	memset(ptr + 4, 0xff, 4);
+-	*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
++	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
+ 
+-	make_confounder(msg_start, blocksize);
++	gss_krb5_make_confounder(msg_start, conflen);
++
++	if (kctx->gk5e->keyed_cksum)
++		cksumkey = kctx->cksum;
++	else
++		cksumkey = NULL;
+ 
+ 	/* XXXJBF: UGH!: */
+ 	tmp_pages = buf->pages;
+ 	buf->pages = pages;
+-	if (make_checksum("md5", ptr, 8, buf,
+-				offset + headlen - blocksize, &md5cksum))
++	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
++					cksumkey, KG_USAGE_SEAL, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 	buf->pages = tmp_pages;
+ 
+-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+-			  md5cksum.data, md5cksum.len))
+-		return GSS_S_FAILURE;
+-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
++	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
+ 
+ 	spin_lock(&krb5_seq_lock);
+ 	seq_send = kctx->seq_send++;
+@@ -197,25 +228,42 @@ gss_wrap_kerberos(struct gss_ctx *ctx, i
+ 
+ 	/* XXX would probably be more efficient to compute checksum
+ 	 * and encrypt at the same time: */
+-	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
++	if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
+ 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
+ 		return GSS_S_FAILURE;
+ 
+-	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
+-									pages))
+-		return GSS_S_FAILURE;
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
++		struct crypto_blkcipher *cipher;
++		int err;
++		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++						CRYPTO_ALG_ASYNC);
++		if (IS_ERR(cipher))
++			return GSS_S_FAILURE;
++
++		krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
++
++		err = gss_encrypt_xdr_buf(cipher, buf,
++					  offset + headlen - conflen, pages);
++		crypto_free_blkcipher(cipher);
++		if (err)
++			return GSS_S_FAILURE;
++	} else {
++		if (gss_encrypt_xdr_buf(kctx->enc, buf,
++					offset + headlen - conflen, pages))
++			return GSS_S_FAILURE;
++	}
+ 
+ 	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+ }
+ 
+-u32
+-gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
++static u32
++gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+ {
+-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
+ 	int			signalg;
+ 	int			sealalg;
+-	char			cksumdata[16];
+-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
++	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
++	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
++					    .data = cksumdata};
+ 	s32			now;
+ 	int			direction;
+ 	s32			seqnum;
+@@ -224,6 +272,9 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 	void			*data_start, *orig_start;
+ 	int			data_len;
+ 	int			blocksize;
++	u32			conflen = kctx->gk5e->conflen;
++	int			crypt_offset;
++	u8			*cksumkey;
+ 
+ 	dprintk("RPC:       gss_unwrap_kerberos\n");
+ 
+@@ -241,29 +292,65 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 	/* get the sign and seal algorithms */
+ 
+ 	signalg = ptr[2] + (ptr[3] << 8);
+-	if (signalg != SGN_ALG_DES_MAC_MD5)
++	if (signalg != kctx->gk5e->signalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	sealalg = ptr[4] + (ptr[5] << 8);
+-	if (sealalg != SEAL_ALG_DES)
++	if (sealalg != kctx->gk5e->sealalg)
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+ 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
+ 		return GSS_S_DEFECTIVE_TOKEN;
+ 
+-	if (gss_decrypt_xdr_buf(kctx->enc, buf,
+-			ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
+-		return GSS_S_DEFECTIVE_TOKEN;
++	/*
++	 * Data starts after token header and checksum.  ptr points
++	 * to the beginning of the token header
++	 */
++	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
++					(unsigned char *)buf->head[0].iov_base;
++
++	/*
++	 * Need plaintext seqnum to derive encryption key for arcfour-hmac
++	 */
++	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
++			     ptr + 8, &direction, &seqnum))
++		return GSS_S_BAD_SIG;
+ 
+-	if (make_checksum("md5", ptr, 8, buf,
+-		 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+-		return GSS_S_FAILURE;
++	if ((kctx->initiate && direction != 0xff) ||
++	    (!kctx->initiate && direction != 0))
++		return GSS_S_BAD_SIG;
++
++	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
++		struct crypto_blkcipher *cipher;
++		int err;
++
++		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
++						CRYPTO_ALG_ASYNC);
++		if (IS_ERR(cipher))
++			return GSS_S_FAILURE;
++
++		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
++
++		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
++		crypto_free_blkcipher(cipher);
++		if (err)
++			return GSS_S_DEFECTIVE_TOKEN;
++	} else {
++		if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
++			return GSS_S_DEFECTIVE_TOKEN;
++	}
+ 
+-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+-			   md5cksum.data, md5cksum.len))
++	if (kctx->gk5e->keyed_cksum)
++		cksumkey = kctx->cksum;
++	else
++		cksumkey = NULL;
++
++	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
++					cksumkey, KG_USAGE_SEAL, &md5cksum))
+ 		return GSS_S_FAILURE;
+ 
+-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
++	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
++						kctx->gk5e->cksumlength))
+ 		return GSS_S_BAD_SIG;
+ 
+ 	/* it got through unscathed.  Make sure the context is unexpired */
+@@ -275,19 +362,12 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 
+ 	/* do sequencing checks */
+ 
+-	if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+-				    &direction, &seqnum))
+-		return GSS_S_BAD_SIG;
+-
+-	if ((kctx->initiate && direction != 0xff) ||
+-	    (!kctx->initiate && direction != 0))
+-		return GSS_S_BAD_SIG;
+-
+ 	/* Copy the data back to the right position.  XXX: Would probably be
+ 	 * better to copy and encrypt at the same time. */
+ 
+ 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
+-	data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
++	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
++					conflen;
+ 	orig_start = buf->head[0].iov_base + offset;
+ 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
+ 	memmove(orig_start, data_start, data_len);
+@@ -299,3 +379,209 @@ gss_unwrap_kerberos(struct gss_ctx *ctx,
+ 
+ 	return GSS_S_COMPLETE;
+ }
++
++/*
++ * We cannot currently handle tokens with rotated data.  We need a
++ * generalized routine to rotate the data in place.  It is anticipated
++ * that we won't encounter rotated data in the general case.
++ */
++static u32
++rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc)
++{
++	unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN);
++
++	if (realrrc == 0)
++		return 0;
++
++	dprintk("%s: cannot process token with rotated data: "
++		"rrc %u, realrrc %u\n", __func__, rrc, realrrc);
++	return 1;
++}
++
++static u32
++gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
++		     struct xdr_buf *buf, struct page **pages)
++{
++	int		blocksize;
++	u8		*ptr, *plainhdr;
++	s32		now;
++	u8		flags = 0x00;
++	__be16		*be16ptr, ec = 0;
++	__be64		*be64ptr;
++	u32		err;
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (kctx->gk5e->encrypt_v2 == NULL)
++		return GSS_S_FAILURE;
++
++	/* make room for gss token header */
++	if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
++		return GSS_S_FAILURE;
++
++	/* construct gss token header */
++	ptr = plainhdr = buf->head[0].iov_base + offset;
++	*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
++	*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
++
++	if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
++		flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
++	if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
++		flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
++	/* We always do confidentiality in wrap tokens */
++	flags |= KG2_TOKEN_FLAG_SEALED;
++
++	*ptr++ = flags;
++	*ptr++ = 0xff;
++	be16ptr = (__be16 *)ptr;
++
++	blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
++	*be16ptr++ = cpu_to_be16(ec);
++	/* "inner" token header always uses 0 for RRC */
++	*be16ptr++ = cpu_to_be16(0);
++
++	be64ptr = (__be64 *)be16ptr;
++	spin_lock(&krb5_seq_lock);
++	*be64ptr = cpu_to_be64(kctx->seq_send64++);
++	spin_unlock(&krb5_seq_lock);
++
++	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages);
++	if (err)
++		return err;
++
++	now = get_seconds();
++	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
++}
++
++static u32
++gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
++{
++	s32		now;
++	u64		seqnum;
++	u8		*ptr;
++	u8		flags = 0x00;
++	u16		ec, rrc;
++	int		err;
++	u32		headskip, tailskip;
++	u8		decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
++	unsigned int	movelen;
++
++
++	dprintk("RPC:       %s\n", __func__);
++
++	if (kctx->gk5e->decrypt_v2 == NULL)
++		return GSS_S_FAILURE;
++
++	ptr = buf->head[0].iov_base + offset;
++
++	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	flags = ptr[2];
++	if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
++	    (kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
++		return GSS_S_BAD_SIG;
++
++	if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
++		dprintk("%s: token missing expected sealed flag\n", __func__);
++		return GSS_S_DEFECTIVE_TOKEN;
++	}
++
++	if (ptr[3] != 0xff)
++		return GSS_S_DEFECTIVE_TOKEN;
++
++	ec = be16_to_cpup((__be16 *)(ptr + 4));
++	rrc = be16_to_cpup((__be16 *)(ptr + 6));
++
++	seqnum = be64_to_cpup((__be64 *)(ptr + 8));
++
++	if (rrc != 0) {
++		err = rotate_left(kctx, offset, buf, rrc);
++		if (err)
++			return GSS_S_FAILURE;
++	}
++
++	err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
++					&headskip, &tailskip);
++	if (err)
++		return GSS_S_FAILURE;
++
++	/*
++	 * Retrieve the decrypted gss token header and verify
++	 * it against the original
++	 */
++	err = read_bytes_from_xdr_buf(buf,
++				buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
++				decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
++	if (err) {
++		dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
++		return GSS_S_FAILURE;
++	}
++	if (memcmp(ptr, decrypted_hdr, 6)
++				|| memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
++		dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
++		return GSS_S_FAILURE;
++	}
++
++	/* do sequencing checks */
++
++	/* it got through unscathed.  Make sure the context is unexpired */
++	now = get_seconds();
++	if (now > kctx->endtime)
++		return GSS_S_CONTEXT_EXPIRED;
++
++	/*
++	 * Move the head data back to the right position in xdr_buf.
++	 * We ignore any "ec" data since it might be in the head or
++	 * the tail, and we really don't need to deal with it.
++	 * Note that buf->head[0].iov_len may indicate the available
++	 * head buffer space rather than that actually occupied.
++	 */
++	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
++	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
++	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
++							buf->head[0].iov_len);
++	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
++	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
++	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
++
++	return GSS_S_COMPLETE;
++}
++
++u32
++gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
++		  struct xdr_buf *buf, struct page **pages)
++{
++	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
++
++	switch (kctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
++	}
++}
++
++u32
++gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
++{
++	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
++
++	switch (kctx->enctype) {
++	default:
++		BUG();
++	case ENCTYPE_DES_CBC_RAW:
++	case ENCTYPE_DES3_CBC_RAW:
++	case ENCTYPE_ARCFOUR_HMAC:
++		return gss_unwrap_kerberos_v1(kctx, offset, buf);
++	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
++	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
++		return gss_unwrap_kerberos_v2(kctx, offset, buf);
++	}
++}
++
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_mech_switch.c	2010-08-23 11:01:00.395574706 -0400
+@@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put);
+ int
+ gss_import_sec_context(const void *input_token, size_t bufsize,
+ 		       struct gss_api_mech	*mech,
+-		       struct gss_ctx		**ctx_id)
++		       struct gss_ctx		**ctx_id,
++		       gfp_t gfp_mask)
+ {
+-	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
++	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
+ 		return -ENOMEM;
+ 	(*ctx_id)->mech_type = gss_mech_get(mech);
+ 
+ 	return mech->gm_ops
+-		->gss_import_sec_context(input_token, bufsize, *ctx_id);
++		->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
+ }
+ 
+ /* gss_get_mic: compute a mic over message and return mic_token. */
+@@ -285,6 +286,20 @@ gss_verify_mic(struct gss_ctx		*context_
+ 				 mic_token);
+ }
+ 
++/*
++ * This function is called from both the client and server code.
++ * Each makes guarantees about how much "slack" space is available
++ * for the underlying function in "buf"'s head and tail while
++ * performing the wrap.
++ *
++ * The client and server code allocate RPC_MAX_AUTH_SIZE extra
++ * space in both the head and tail which is available for use by
++ * the wrap function.
++ *
++ * Underlying functions should verify they do not use more than
++ * RPC_MAX_AUTH_SIZE of extra space in either the head or tail
++ * when performing the wrap.
++ */
+ u32
+ gss_wrap(struct gss_ctx	*ctx_id,
+ 	 int		offset,
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/gss_spkm3_mech.c	2010-08-23 11:01:00.396574085 -0400
+@@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const v
+ 
+ static int
+ gss_import_sec_context_spkm3(const void *p, size_t len,
+-				struct gss_ctx *ctx_id)
++				struct gss_ctx *ctx_id,
++				gfp_t gfp_mask)
+ {
+ 	const void *end = (const void *)((const char *)p + len);
+ 	struct	spkm3_ctx *ctx;
+ 	int	version;
+ 
+-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
++	if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
+ 		goto out_err;
+ 
+ 	p = simple_get_bytes(p, end, &version, sizeof(version));
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/Makefile	2010-08-23 11:01:00.387574079 -0400
+@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_gener
+ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
+ 
+ rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+-	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
++	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+ 
+ obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c.orig linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c
+--- linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/auth_gss/svcauth_gss.c	2010-08-23 11:01:00.396574085 -0400
+@@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail
+ 		len = qword_get(&mesg, buf, mlen);
+ 		if (len < 0)
+ 			goto out;
+-		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
++		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
+ 		if (status)
+ 			goto out;
+ 
+@@ -1315,6 +1315,14 @@ svcauth_gss_wrap_resp_priv(struct svc_rq
+ 	inpages = resbuf->pages;
+ 	/* XXX: Would be better to write some xdr helper functions for
+ 	 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
++
++	/*
++	 * If there is currently tail data, make sure there is
++	 * room for the head, tail, and 2 * RPC_MAX_AUTH_SIZE in
++	 * the page, and move the current tail data such that
++	 * there is RPC_MAX_AUTH_SIZE slack space available in
++	 * both the head and tail.
++	 */
+ 	if (resbuf->tail[0].iov_base) {
+ 		BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base
+ 							+ PAGE_SIZE);
+@@ -1327,6 +1335,13 @@ svcauth_gss_wrap_resp_priv(struct svc_rq
+ 			resbuf->tail[0].iov_len);
+ 		resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
+ 	}
++	/*
++	 * If there is no current tail data, make sure there is
++	 * room for the head data, and 2 * RPC_MAX_AUTH_SIZE in the
++	 * allotted page, and set up tail information such that there
++	 * is RPC_MAX_AUTH_SIZE slack space available in both the
++	 * head and tail.
++	 */
+ 	if (resbuf->tail[0].iov_base == NULL) {
+ 		if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ 			return -ENOMEM;
+diff -up linux-2.6.34.noarch/net/sunrpc/clnt.c.orig linux-2.6.34.noarch/net/sunrpc/clnt.c
+--- linux-2.6.34.noarch/net/sunrpc/clnt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/clnt.c	2010-08-23 11:01:00.397622347 -0400
+@@ -556,26 +556,16 @@ static const struct rpc_call_ops rpc_def
+  */
+ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
+ {
+-	struct rpc_task *task, *ret;
++	struct rpc_task *task;
+ 
+ 	task = rpc_new_task(task_setup_data);
+-	if (task == NULL) {
+-		rpc_release_calldata(task_setup_data->callback_ops,
+-				task_setup_data->callback_data);
+-		ret = ERR_PTR(-ENOMEM);
++	if (IS_ERR(task))
+ 		goto out;
+-	}
+ 
+-	if (task->tk_status != 0) {
+-		ret = ERR_PTR(task->tk_status);
+-		rpc_put_task(task);
+-		goto out;
+-	}
+ 	atomic_inc(&task->tk_count);
+ 	rpc_execute(task);
+-	ret = task;
+ out:
+-	return ret;
++	return task;
+ }
+ EXPORT_SYMBOL_GPL(rpc_run_task);
+ 
+@@ -657,9 +647,8 @@ struct rpc_task *rpc_run_bc_task(struct 
+ 	 * Create an rpc_task to send the data
+ 	 */
+ 	task = rpc_new_task(&task_setup_data);
+-	if (!task) {
++	if (IS_ERR(task)) {
+ 		xprt_free_bc_request(req);
+-		task = ERR_PTR(-ENOMEM);
+ 		goto out;
+ 	}
+ 	task->tk_rqstp = req;
+diff -up linux-2.6.34.noarch/net/sunrpc/sched.c.orig linux-2.6.34.noarch/net/sunrpc/sched.c
+--- linux-2.6.34.noarch/net/sunrpc/sched.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/sched.c	2010-08-23 11:01:00.398564598 -0400
+@@ -25,7 +25,6 @@
+ 
+ #ifdef RPC_DEBUG
+ #define RPCDBG_FACILITY		RPCDBG_SCHED
+-#define RPC_TASK_MAGIC_ID	0xf00baa
+ #endif
+ 
+ /*
+@@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struc
+ {
+ 	static atomic_t rpc_pid;
+ 
+-	task->tk_magic = RPC_TASK_MAGIC_ID;
+ 	task->tk_pid = atomic_inc_return(&rpc_pid);
+ }
+ #else
+@@ -360,9 +358,6 @@ static void __rpc_do_wake_up_task(struct
+ 	dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
+ 			task->tk_pid, jiffies);
+ 
+-#ifdef RPC_DEBUG
+-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+-#endif
+ 	/* Has the task been executed yet? If not, we cannot wake it up! */
+ 	if (!RPC_IS_ACTIVATED(task)) {
+ 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
+@@ -834,7 +829,7 @@ static void rpc_init_task(struct rpc_tas
+ 	}
+ 
+ 	/* starting timestamp */
+-	task->tk_start = jiffies;
++	task->tk_start = ktime_get();
+ 
+ 	dprintk("RPC:       new task initialized, procpid %u\n",
+ 				task_pid_nr(current));
+@@ -856,16 +851,23 @@ struct rpc_task *rpc_new_task(const stru
+ 
+ 	if (task == NULL) {
+ 		task = rpc_alloc_task();
+-		if (task == NULL)
+-			goto out;
++		if (task == NULL) {
++			rpc_release_calldata(setup_data->callback_ops,
++					setup_data->callback_data);
++			return ERR_PTR(-ENOMEM);
++		}
+ 		flags = RPC_TASK_DYNAMIC;
+ 	}
+ 
+ 	rpc_init_task(task, setup_data);
++	if (task->tk_status < 0) {
++		int err = task->tk_status;
++		rpc_put_task(task);
++		return ERR_PTR(err);
++	}
+ 
+ 	task->tk_flags |= flags;
+ 	dprintk("RPC:       allocated task %p\n", task);
+-out:
+ 	return task;
+ }
+ 
+@@ -909,9 +911,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task);
+ 
+ static void rpc_release_task(struct rpc_task *task)
+ {
+-#ifdef RPC_DEBUG
+-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
+-#endif
+ 	dprintk("RPC: %5u release task\n", task->tk_pid);
+ 
+ 	if (!list_empty(&task->tk_task)) {
+@@ -923,9 +922,6 @@ static void rpc_release_task(struct rpc_
+ 	}
+ 	BUG_ON (RPC_IS_QUEUED(task));
+ 
+-#ifdef RPC_DEBUG
+-	task->tk_magic = 0;
+-#endif
+ 	/* Wake up anyone who is waiting for task completion */
+ 	rpc_mark_complete_task(task);
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/stats.c.orig linux-2.6.34.noarch/net/sunrpc/stats.c
+--- linux-2.6.34.noarch/net/sunrpc/stats.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/stats.c	2010-08-23 11:01:00.399574225 -0400
+@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *
+ 	struct rpc_rqst *req = task->tk_rqstp;
+ 	struct rpc_iostats *stats;
+ 	struct rpc_iostats *op_metrics;
+-	long rtt, execute, queue;
++	ktime_t delta;
+ 
+ 	if (!task->tk_client || !task->tk_client->cl_metrics || !req)
+ 		return;
+@@ -156,23 +156,16 @@ void rpc_count_iostats(struct rpc_task *
+ 	op_metrics->om_ntrans += req->rq_ntrans;
+ 	op_metrics->om_timeouts += task->tk_timeouts;
+ 
+-	op_metrics->om_bytes_sent += task->tk_bytes_sent;
++	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
+ 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
+ 
+-	queue = (long)req->rq_xtime - task->tk_start;
+-	if (queue < 0)
+-		queue = -queue;
+-	op_metrics->om_queue += queue;
+-
+-	rtt = task->tk_rtt;
+-	if (rtt < 0)
+-		rtt = -rtt;
+-	op_metrics->om_rtt += rtt;
+-
+-	execute = (long)jiffies - task->tk_start;
+-	if (execute < 0)
+-		execute = -execute;
+-	op_metrics->om_execute += execute;
++	delta = ktime_sub(req->rq_xtime, task->tk_start);
++	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
++
++	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
++
++	delta = ktime_sub(ktime_get(), task->tk_start);
++	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+ }
+ 
+ static void _print_name(struct seq_file *seq, unsigned int op,
+@@ -186,8 +179,6 @@ static void _print_name(struct seq_file 
+ 		seq_printf(seq, "\t%12u: ", op);
+ }
+ 
+-#define MILLISECS_PER_JIFFY	(1000 / HZ)
+-
+ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+ {
+ 	struct rpc_iostats *stats = clnt->cl_metrics;
+@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *
+ 				metrics->om_timeouts,
+ 				metrics->om_bytes_sent,
+ 				metrics->om_bytes_recv,
+-				metrics->om_queue * MILLISECS_PER_JIFFY,
+-				metrics->om_rtt * MILLISECS_PER_JIFFY,
+-				metrics->om_execute * MILLISECS_PER_JIFFY);
++				ktime_to_ms(metrics->om_queue),
++				ktime_to_ms(metrics->om_rtt),
++				ktime_to_ms(metrics->om_execute));
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(rpc_print_iostats);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 11:01:00.400574086 -0400
+@@ -762,6 +762,7 @@ int write_bytes_to_xdr_buf(struct xdr_bu
+ 	__write_bytes_to_xdr_buf(&subbuf, obj, len);
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(write_bytes_to_xdr_buf);
+ 
+ int
+ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
+diff -up linux-2.6.34.noarch/net/sunrpc/xprt.c.orig linux-2.6.34.noarch/net/sunrpc/xprt.c
+--- linux-2.6.34.noarch/net/sunrpc/xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprt.c	2010-08-23 11:01:00.401372963 -0400
+@@ -43,6 +43,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/workqueue.h>
+ #include <linux/net.h>
++#include <linux/ktime.h>
+ 
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/metrics.h>
+@@ -62,7 +63,6 @@
+  * Local functions
+  */
+ static void	xprt_request_init(struct rpc_task *, struct rpc_xprt *);
+-static inline void	do_xprt_reserve(struct rpc_task *);
+ static void	xprt_connect_status(struct rpc_task *task);
+ static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+ 
+@@ -711,12 +711,16 @@ void xprt_connect(struct rpc_task *task)
+ 		if (task->tk_rqstp)
+ 			task->tk_rqstp->rq_bytes_sent = 0;
+ 
+-		task->tk_timeout = xprt->connect_timeout;
++		task->tk_timeout = task->tk_rqstp->rq_timeout;
+ 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
++
++		if (test_bit(XPRT_CLOSING, &xprt->state))
++			return;
++		if (xprt_test_and_set_connecting(xprt))
++			return;
+ 		xprt->stat.connect_start = jiffies;
+ 		xprt->ops->connect(task);
+ 	}
+-	return;
+ }
+ 
+ static void xprt_connect_status(struct rpc_task *task)
+@@ -771,25 +775,19 @@ struct rpc_rqst *xprt_lookup_rqst(struct
+ }
+ EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
+ 
+-/**
+- * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
+- * @task: RPC request that recently completed
+- *
+- */
+-void xprt_update_rtt(struct rpc_task *task)
++static void xprt_update_rtt(struct rpc_task *task)
+ {
+ 	struct rpc_rqst *req = task->tk_rqstp;
+ 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
+ 	unsigned timer = task->tk_msg.rpc_proc->p_timer;
++	long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
+ 
+ 	if (timer) {
+ 		if (req->rq_ntrans == 1)
+-			rpc_update_rtt(rtt, timer,
+-					(long)jiffies - req->rq_xtime);
++			rpc_update_rtt(rtt, timer, m);
+ 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
+ 	}
+ }
+-EXPORT_SYMBOL_GPL(xprt_update_rtt);
+ 
+ /**
+  * xprt_complete_rqst - called when reply processing is complete
+@@ -807,7 +805,9 @@ void xprt_complete_rqst(struct rpc_task 
+ 			task->tk_pid, ntohl(req->rq_xid), copied);
+ 
+ 	xprt->stat.recvs++;
+-	task->tk_rtt = (long)jiffies - req->rq_xtime;
++	req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
++	if (xprt->ops->timer != NULL)
++		xprt_update_rtt(task);
+ 
+ 	list_del_init(&req->rq_list);
+ 	req->rq_private_buf.len = copied;
+@@ -906,7 +906,7 @@ void xprt_transmit(struct rpc_task *task
+ 		return;
+ 
+ 	req->rq_connect_cookie = xprt->connect_cookie;
+-	req->rq_xtime = jiffies;
++	req->rq_xtime = ktime_get();
+ 	status = xprt->ops->send_request(task);
+ 	if (status != 0) {
+ 		task->tk_status = status;
+@@ -935,7 +935,7 @@ void xprt_transmit(struct rpc_task *task
+ 	spin_unlock_bh(&xprt->transport_lock);
+ }
+ 
+-static inline void do_xprt_reserve(struct rpc_task *task)
++static void xprt_alloc_slot(struct rpc_task *task)
+ {
+ 	struct rpc_xprt	*xprt = task->tk_xprt;
+ 
+@@ -955,6 +955,16 @@ static inline void do_xprt_reserve(struc
+ 	rpc_sleep_on(&xprt->backlog, task, NULL);
+ }
+ 
++static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
++{
++	memset(req, 0, sizeof(*req));	/* mark unused */
++
++	spin_lock(&xprt->reserve_lock);
++	list_add(&req->rq_list, &xprt->free);
++	rpc_wake_up_next(&xprt->backlog);
++	spin_unlock(&xprt->reserve_lock);
++}
++
+ /**
+  * xprt_reserve - allocate an RPC request slot
+  * @task: RPC task requesting a slot allocation
+@@ -968,7 +978,7 @@ void xprt_reserve(struct rpc_task *task)
+ 
+ 	task->tk_status = -EIO;
+ 	spin_lock(&xprt->reserve_lock);
+-	do_xprt_reserve(task);
++	xprt_alloc_slot(task);
+ 	spin_unlock(&xprt->reserve_lock);
+ }
+ 
+@@ -1006,14 +1016,10 @@ void xprt_release(struct rpc_task *task)
+ {
+ 	struct rpc_xprt	*xprt;
+ 	struct rpc_rqst	*req;
+-	int is_bc_request;
+ 
+ 	if (!(req = task->tk_rqstp))
+ 		return;
+ 
+-	/* Preallocated backchannel request? */
+-	is_bc_request = bc_prealloc(req);
+-
+ 	xprt = req->rq_xprt;
+ 	rpc_count_iostats(task);
+ 	spin_lock_bh(&xprt->transport_lock);
+@@ -1027,21 +1033,16 @@ void xprt_release(struct rpc_task *task)
+ 		mod_timer(&xprt->timer,
+ 				xprt->last_used + xprt->idle_timeout);
+ 	spin_unlock_bh(&xprt->transport_lock);
+-	if (!bc_prealloc(req))
++	if (req->rq_buffer)
+ 		xprt->ops->buf_free(req->rq_buffer);
+ 	task->tk_rqstp = NULL;
+ 	if (req->rq_release_snd_buf)
+ 		req->rq_release_snd_buf(req);
+ 
+ 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
+-	if (likely(!is_bc_request)) {
+-		memset(req, 0, sizeof(*req));	/* mark unused */
+-
+-		spin_lock(&xprt->reserve_lock);
+-		list_add(&req->rq_list, &xprt->free);
+-		rpc_wake_up_next(&xprt->backlog);
+-		spin_unlock(&xprt->reserve_lock);
+-	} else
++	if (likely(!bc_prealloc(req)))
++		xprt_free_slot(xprt, req);
++	else
+ 		xprt_free_bc_request(req);
+ }
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/transport.c	2010-08-23 11:01:00.402563985 -0400
+@@ -305,7 +305,6 @@ xprt_setup_rdma(struct xprt_create *args
+ 	/* 60 second timeout, no retries */
+ 	xprt->timeout = &xprt_rdma_default_timeout;
+ 	xprt->bind_timeout = (60U * HZ);
+-	xprt->connect_timeout = (60U * HZ);
+ 	xprt->reestablish_timeout = (5U * HZ);
+ 	xprt->idle_timeout = (5U * 60 * HZ);
+ 
+@@ -449,21 +448,19 @@ xprt_rdma_connect(struct rpc_task *task)
+ 	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ 
+-	if (!xprt_test_and_set_connecting(xprt)) {
+-		if (r_xprt->rx_ep.rep_connected != 0) {
+-			/* Reconnect */
+-			schedule_delayed_work(&r_xprt->rdma_connect,
+-				xprt->reestablish_timeout);
+-			xprt->reestablish_timeout <<= 1;
+-			if (xprt->reestablish_timeout > (30 * HZ))
+-				xprt->reestablish_timeout = (30 * HZ);
+-			else if (xprt->reestablish_timeout < (5 * HZ))
+-				xprt->reestablish_timeout = (5 * HZ);
+-		} else {
+-			schedule_delayed_work(&r_xprt->rdma_connect, 0);
+-			if (!RPC_IS_ASYNC(task))
+-				flush_scheduled_work();
+-		}
++	if (r_xprt->rx_ep.rep_connected != 0) {
++		/* Reconnect */
++		schedule_delayed_work(&r_xprt->rdma_connect,
++			xprt->reestablish_timeout);
++		xprt->reestablish_timeout <<= 1;
++		if (xprt->reestablish_timeout > (30 * HZ))
++			xprt->reestablish_timeout = (30 * HZ);
++		else if (xprt->reestablish_timeout < (5 * HZ))
++			xprt->reestablish_timeout = (5 * HZ);
++	} else {
++		schedule_delayed_work(&r_xprt->rdma_connect, 0);
++		if (!RPC_IS_ASYNC(task))
++			flush_scheduled_work();
+ 	}
+ }
+ 
+@@ -677,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *
+ 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ 		goto drop_connection;
+ 
+-	task->tk_bytes_sent += rqst->rq_snd_buf.len;
++	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
+ 	rqst->rq_bytes_sent = 0;
+ 	return 0;
+ 
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtsock.c.orig linux-2.6.34.noarch/net/sunrpc/xprtsock.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtsock.c.orig	2010-08-23 11:00:23.890501549 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtsock.c	2010-08-23 11:01:00.403564023 -0400
+@@ -138,20 +138,6 @@ static ctl_table sunrpc_table[] = {
+ #endif
+ 
+ /*
+- * Time out for an RPC UDP socket connect.  UDP socket connects are
+- * synchronous, but we set a timeout anyway in case of resource
+- * exhaustion on the local host.
+- */
+-#define XS_UDP_CONN_TO		(5U * HZ)
+-
+-/*
+- * Wait duration for an RPC TCP connection to be established.  Solaris
+- * NFS over TCP uses 60 seconds, for example, which is in line with how
+- * long a server takes to reboot.
+- */
+-#define XS_TCP_CONN_TO		(60U * HZ)
+-
+-/*
+  * Wait duration for a reply from the RPC portmapper.
+  */
+ #define XS_BIND_TO		(60U * HZ)
+@@ -543,7 +529,7 @@ static int xs_udp_send_request(struct rp
+ 			xdr->len - req->rq_bytes_sent, status);
+ 
+ 	if (status >= 0) {
+-		task->tk_bytes_sent += status;
++		req->rq_xmit_bytes_sent += status;
+ 		if (status >= req->rq_slen)
+ 			return 0;
+ 		/* Still some bytes left; set up for a retry later. */
+@@ -639,7 +625,7 @@ static int xs_tcp_send_request(struct rp
+ 		/* If we've sent the entire packet, immediately
+ 		 * reset the count of bytes sent. */
+ 		req->rq_bytes_sent += status;
+-		task->tk_bytes_sent += status;
++		req->rq_xmit_bytes_sent += status;
+ 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ 			req->rq_bytes_sent = 0;
+ 			return 0;
+@@ -859,7 +845,6 @@ static void xs_udp_data_ready(struct soc
+ 	dst_confirm(skb_dst(skb));
+ 
+ 	xprt_adjust_cwnd(task, copied);
+-	xprt_update_rtt(task);
+ 	xprt_complete_rqst(task, copied);
+ 
+  out_unlock:
+@@ -2022,9 +2007,6 @@ static void xs_connect(struct rpc_task *
+ 	struct rpc_xprt *xprt = task->tk_xprt;
+ 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ 
+-	if (xprt_test_and_set_connecting(xprt))
+-		return;
+-
+ 	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
+ 		dprintk("RPC:       xs_connect delayed xprt %p for %lu "
+ 				"seconds\n",
+@@ -2044,16 +2026,6 @@ static void xs_connect(struct rpc_task *
+ 	}
+ }
+ 
+-static void xs_tcp_connect(struct rpc_task *task)
+-{
+-	struct rpc_xprt *xprt = task->tk_xprt;
+-
+-	/* Exit if we need to wait for socket shutdown to complete */
+-	if (test_bit(XPRT_CLOSING, &xprt->state))
+-		return;
+-	xs_connect(task);
+-}
+-
+ /**
+  * xs_udp_print_stats - display UDP socket-specifc stats
+  * @xprt: rpc_xprt struct containing statistics
+@@ -2252,7 +2224,7 @@ static struct rpc_xprt_ops xs_tcp_ops = 
+ 	.release_xprt		= xs_tcp_release_xprt,
+ 	.rpcbind		= rpcb_getport_async,
+ 	.set_port		= xs_set_port,
+-	.connect		= xs_tcp_connect,
++	.connect		= xs_connect,
+ 	.buf_alloc		= rpc_malloc,
+ 	.buf_free		= rpc_free,
+ 	.send_request		= xs_tcp_send_request,
+@@ -2343,7 +2315,6 @@ static struct rpc_xprt *xs_setup_udp(str
+ 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
+ 
+ 	xprt->bind_timeout = XS_BIND_TO;
+-	xprt->connect_timeout = XS_UDP_CONN_TO;
+ 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
+ 	xprt->idle_timeout = XS_IDLE_DISC_TO;
+ 
+@@ -2418,7 +2389,6 @@ static struct rpc_xprt *xs_setup_tcp(str
+ 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+ 
+ 	xprt->bind_timeout = XS_BIND_TO;
+-	xprt->connect_timeout = XS_TCP_CONN_TO;
+ 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ 	xprt->idle_timeout = XS_IDLE_DISC_TO;
+ 
+@@ -2478,9 +2448,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(
+ 	struct sock_xprt *transport;
+ 	struct svc_sock *bc_sock;
+ 
+-	if (!args->bc_xprt)
+-		ERR_PTR(-EINVAL);
+-
+ 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ 	if (IS_ERR(xprt))
+ 		return xprt;
+@@ -2494,7 +2461,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(
+ 	/* backchannel */
+ 	xprt_set_bound(xprt);
+ 	xprt->bind_timeout = 0;
+-	xprt->connect_timeout = 0;
+ 	xprt->reestablish_timeout = 0;
+ 	xprt->idle_timeout = 0;
+ 
diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
new file mode 100644
index 000000000..ef99b4995
--- /dev/null
+++ b/nfsd-35-fc.patch
@@ -0,0 +1,1808 @@
+diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
+@@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
+    | READ                 | REQ        |              | Section 18.22  |
+    | READDIR              | REQ        |              | Section 18.23  |
+    | READLINK             | OPT        |              | Section 18.24  |
+-NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
++   | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
+    | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
+    | REMOVE               | REQ        |              | Section 18.25  |
+    | RENAME               | REQ        |              | Section 18.26  |
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
+@@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
+ 	.alloc		= expkey_alloc,
+ };
+ 
+-static struct svc_expkey *
+-svc_expkey_lookup(struct svc_expkey *item)
++static int
++svc_expkey_hash(struct svc_expkey *item)
+ {
+-	struct cache_head *ch;
+ 	int hash = item->ek_fsidtype;
+ 	char * cp = (char*)item->ek_fsid;
+ 	int len = key_len(item->ek_fsidtype);
+@@ -270,6 +269,14 @@ svc_expkey_lookup(struct svc_expkey *ite
+ 	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+ 	hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+ 	hash &= EXPKEY_HASHMASK;
++	return hash;
++}
++
++static struct svc_expkey *
++svc_expkey_lookup(struct svc_expkey *item)
++{
++	struct cache_head *ch;
++	int hash = svc_expkey_hash(item);
+ 
+ 	ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
+ 				 hash);
+@@ -283,13 +290,7 @@ static struct svc_expkey *
+ svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
+ {
+ 	struct cache_head *ch;
+-	int hash = new->ek_fsidtype;
+-	char * cp = (char*)new->ek_fsid;
+-	int len = key_len(new->ek_fsidtype);
+-
+-	hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+-	hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
+-	hash &= EXPKEY_HASHMASK;
++	int hash = svc_expkey_hash(new);
+ 
+ 	ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
+ 				 &old->h, hash);
+@@ -738,14 +739,22 @@ struct cache_detail svc_export_cache = {
+ 	.alloc		= svc_export_alloc,
+ };
+ 
+-static struct svc_export *
+-svc_export_lookup(struct svc_export *exp)
++static int
++svc_export_hash(struct svc_export *exp)
+ {
+-	struct cache_head *ch;
+ 	int hash;
++
+ 	hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
+ 	hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
+ 	hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
++	return hash;
++}
++
++static struct svc_export *
++svc_export_lookup(struct svc_export *exp)
++{
++	struct cache_head *ch;
++	int hash = svc_export_hash(exp);
+ 
+ 	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
+ 				 hash);
+@@ -759,10 +768,7 @@ static struct svc_export *
+ svc_export_update(struct svc_export *new, struct svc_export *old)
+ {
+ 	struct cache_head *ch;
+-	int hash;
+-	hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
+-	hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
+-	hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
++	int hash = svc_export_hash(old);
+ 
+ 	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
+ 				 &old->h,
+@@ -1071,9 +1077,9 @@ exp_export(struct nfsctl_export *nxp)
+ 		err = 0;
+ finish:
+ 	kfree(new.ex_pathname);
+-	if (exp)
++	if (!IS_ERR_OR_NULL(exp))
+ 		exp_put(exp);
+-	if (fsid_key && !IS_ERR(fsid_key))
++	if (!IS_ERR_OR_NULL(fsid_key))
+ 		cache_put(&fsid_key->h, &svc_expkey_cache);
+ 	path_put(&path);
+ out_put_clp:
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
+@@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
+ 
+-struct nfs4_rpc_args {
+-	void				*args_op;
+-	struct nfsd4_cb_sequence	args_seq;
+-};
+-
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+ */
+@@ -428,13 +423,19 @@ static struct rpc_procinfo     nfs4_cb_p
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
++/*
++ * Note on the callback rpc program version number: despite language in rfc
++ * 5661 section 18.36.3 requiring servers to use 4 in this field, the
++ * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
++ * in practice that appears to be what implementations use.  The section
++ * 18.36.3 language is expected to be fixed in an erratum.
++ */
+         .number                 = 1,
+         .nrprocs                = ARRAY_SIZE(nfs4_cb_procedures),
+         .procs                  = nfs4_cb_procedures
+ };
+ 
+ static struct rpc_version *	nfs_cb_version[] = {
+-	NULL,
+ 	&nfs_cb_version4,
+ };
+ 
+@@ -456,15 +457,14 @@ static struct rpc_program cb_program = {
+ 
+ static int max_cb_time(void)
+ {
+-	return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ;
++	return max(nfsd4_lease/10, (time_t)1) * HZ;
+ }
+ 
+ /* Reference counting, callback cleanup, etc., all look racy as heck.
+- * And why is cb_set an atomic? */
++ * And why is cl_cb_set an atomic? */
+ 
+-int setup_callback_client(struct nfs4_client *clp)
++int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+ {
+-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
+ 	struct rpc_timeout	timeparms = {
+ 		.to_initval	= max_cb_time(),
+ 		.to_retries	= 0,
+@@ -476,7 +476,7 @@ int setup_callback_client(struct nfs4_cl
+ 		.timeout	= &timeparms,
+ 		.program	= &cb_program,
+ 		.prognumber	= cb->cb_prog,
+-		.version	= nfs_cb_version[1]->number,
++		.version	= 0,
+ 		.authflavor	= clp->cl_flavor,
+ 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+ 		.client_name    = clp->cl_principal,
+@@ -486,7 +486,7 @@ int setup_callback_client(struct nfs4_cl
+ 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+ 		return -EINVAL;
+ 	if (cb->cb_minorversion) {
+-		args.bc_xprt = clp->cl_cb_xprt;
++		args.bc_xprt = cb->cb_xprt;
+ 		args.protocol = XPRT_TRANSPORT_BC_TCP;
+ 	}
+ 	/* Create RPC client */
+@@ -496,7 +496,7 @@ int setup_callback_client(struct nfs4_cl
+ 			PTR_ERR(client));
+ 		return PTR_ERR(client);
+ 	}
+-	cb->cb_client = client;
++	nfsd4_set_callback_client(clp, client);
+ 	return 0;
+ 
+ }
+@@ -514,8 +514,7 @@ static void nfsd4_cb_probe_done(struct r
+ 	if (task->tk_status)
+ 		warn_no_callback_path(clp, task->tk_status);
+ 	else
+-		atomic_set(&clp->cl_cb_conn.cb_set, 1);
+-	put_nfs4_client(clp);
++		atomic_set(&clp->cl_cb_set, 1);
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
+@@ -537,7 +536,6 @@ int set_callback_cred(void)
+ 
+ void do_probe_callback(struct nfs4_client *clp)
+ {
+-	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
+ 	struct rpc_message msg = {
+ 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+ 		.rpc_argp       = clp,
+@@ -545,34 +543,28 @@ void do_probe_callback(struct nfs4_clien
+ 	};
+ 	int status;
+ 
+-	status = rpc_call_async(cb->cb_client, &msg,
++	status = rpc_call_async(cb->cl_cb_client, &msg,
+ 				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ 				&nfsd4_cb_probe_ops, (void *)clp);
+-	if (status) {
++	if (status)
+ 		warn_no_callback_path(clp, status);
+-		put_nfs4_client(clp);
+-	}
+ }
+ 
+ /*
+  * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+  */
+-void
+-nfsd4_probe_callback(struct nfs4_client *clp)
++void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
+ {
+ 	int status;
+ 
+-	BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set));
++	BUG_ON(atomic_read(&clp->cl_cb_set));
+ 
+-	status = setup_callback_client(clp);
++	status = setup_callback_client(clp, cb);
+ 	if (status) {
+ 		warn_no_callback_path(clp, status);
+ 		return;
+ 	}
+ 
+-	/* the task holds a reference to the nfs4_client struct */
+-	atomic_inc(&clp->cl_count);
+-
+ 	do_probe_callback(clp);
+ }
+ 
+@@ -658,18 +650,32 @@ static void nfsd4_cb_done(struct rpc_tas
+ 	}
+ }
+ 
++
+ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+ 	struct nfs4_client *clp = dp->dl_client;
++	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+ 	nfsd4_cb_done(task, calldata);
+ 
++	if (current_rpc_client == NULL) {
++		/* We're shutting down; give up. */
++		/* XXX: err, or is it ok just to fall through
++		 * and rpc_restart_call? */
++		return;
++	}
++
+ 	switch (task->tk_status) {
+ 	case -EIO:
+ 		/* Network partition? */
+-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
++		atomic_set(&clp->cl_cb_set, 0);
+ 		warn_no_callback_path(clp, task->tk_status);
++		if (current_rpc_client != task->tk_client) {
++			/* queue a callback on the new connection: */
++			nfsd4_cb_recall(dp);
++			return;
++		}
+ 	case -EBADHANDLE:
+ 	case -NFS4ERR_BAD_STATEID:
+ 		/* Race: client probably got cb_recall
+@@ -677,7 +683,7 @@ static void nfsd4_cb_recall_done(struct 
+ 		break;
+ 	default:
+ 		/* success, or error we can't handle */
+-		goto done;
++		return;
+ 	}
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+@@ -685,20 +691,16 @@ static void nfsd4_cb_recall_done(struct 
+ 		rpc_restart_call(task);
+ 		return;
+ 	} else {
+-		atomic_set(&clp->cl_cb_conn.cb_set, 0);
++		atomic_set(&clp->cl_cb_set, 0);
+ 		warn_no_callback_path(clp, task->tk_status);
+ 	}
+-done:
+-	kfree(task->tk_msg.rpc_argp);
+ }
+ 
+ static void nfsd4_cb_recall_release(void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 
+ 	nfs4_put_delegation(dp);
+-	put_nfs4_client(clp);
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+@@ -707,33 +709,75 @@ static const struct rpc_call_ops nfsd4_c
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+ 
++static struct workqueue_struct *callback_wq;
++
++int nfsd4_create_callback_queue(void)
++{
++	callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
++	if (!callback_wq)
++		return -ENOMEM;
++	return 0;
++}
++
++void nfsd4_destroy_callback_queue(void)
++{
++	destroy_workqueue(callback_wq);
++}
++
++/* must be called under the state lock */
++void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
++{
++	struct rpc_clnt *old = clp->cl_cb_client;
++
++	clp->cl_cb_client = new;
++	/*
++	 * After this, any work that saw the old value of cl_cb_client will
++	 * be gone:
++	 */
++	flush_workqueue(callback_wq);
++	/* So we can safely shut it down: */
++	if (old)
++		rpc_shutdown_client(old);
++}
++
+ /*
+  * called with dp->dl_count inc'ed.
+  */
+-void
+-nfsd4_cb_recall(struct nfs4_delegation *dp)
++static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
+ {
+ 	struct nfs4_client *clp = dp->dl_client;
+-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+-	struct nfs4_rpc_args *args;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
+ 		.rpc_cred = callback_cred
+ 	};
+-	int status = -ENOMEM;
++	int status;
++
++	if (clnt == NULL)
++		return; /* Client is shutting down; give up. */
+ 
+-	args = kzalloc(sizeof(*args), GFP_KERNEL);
+-	if (!args)
+-		goto out;
+ 	args->args_op = dp;
+ 	msg.rpc_argp = args;
+ 	dp->dl_retries = 1;
+ 	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
+ 				&nfsd4_cb_recall_ops, dp);
+-out:
+-	if (status) {
+-		kfree(args);
+-		put_nfs4_client(clp);
++	if (status)
+ 		nfs4_put_delegation(dp);
+-	}
++}
++
++void nfsd4_do_callback_rpc(struct work_struct *w)
++{
++	/* XXX: for now, just send off delegation recall. */
++	/* In future, generalize to handle any sort of callback. */
++	struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
++	struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
++
++	_nfsd4_cb_recall(dp);
++}
++
++
++void nfsd4_cb_recall(struct nfs4_delegation *dp)
++{
++	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
+@@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
+ static const char *nfsd4_op_name(unsigned opnum);
+ 
+ /*
+- * Enforce NFSv4.1 COMPOUND ordering rules.
++ * Enforce NFSv4.1 COMPOUND ordering rules:
+  *
+- * TODO:
+- * - enforce NFS4ERR_NOT_ONLY_OP,
+- * - DESTROY_SESSION MUST be the final operation in the COMPOUND request.
++ * Also note, enforced elsewhere:
++ *	- SEQUENCE other than as first op results in
++ *	  NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
++ *	- BIND_CONN_TO_SESSION must be the only op in its compound
++ *	  (Will be enforced in nfsd4_bind_conn_to_session().)
++ *	- DESTROY_SESSION must be the final operation in a compound, if
++ *	  sessionid's in SEQUENCE and DESTROY_SESSION are the same.
++ *	  (Enforced in nfsd4_destroy_session().)
+  */
+-static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args)
++static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
+ {
+-	if (args->minorversion && args->opcnt > 0) {
+-		struct nfsd4_op *op = &args->ops[0];
+-		return (op->status == nfserr_op_illegal) ||
+-		       (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP);
+-	}
+-	return true;
++	struct nfsd4_op *op = &args->ops[0];
++
++	/* These ordering requirements don't apply to NFSv4.0: */
++	if (args->minorversion == 0)
++		return nfs_ok;
++	/* This is weird, but OK, not our problem: */
++	if (args->opcnt == 0)
++		return nfs_ok;
++	if (op->status == nfserr_op_illegal)
++		return nfs_ok;
++	if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
++		return nfserr_op_not_in_session;
++	if (op->opnum == OP_SEQUENCE)
++		return nfs_ok;
++	if (args->opcnt != 1)
++		return nfserr_not_only_op;
++	return nfs_ok;
+ }
+ 
+ /*
+@@ -1012,6 +1028,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ 	resp->rqstp = rqstp;
+ 	resp->cstate.minorversion = args->minorversion;
+ 	resp->cstate.replay_owner = NULL;
++	resp->cstate.session = NULL;
+ 	fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
+ 	fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
+ 	/* Use the deferral mechanism only for NFSv4.0 compounds */
+@@ -1024,13 +1041,13 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ 	if (args->minorversion > nfsd_supported_minorversion)
+ 		goto out;
+ 
+-	if (!nfs41_op_ordering_ok(args)) {
++	status = nfs41_check_op_ordering(args);
++	if (status) {
+ 		op = &args->ops[0];
+-		op->status = nfserr_sequence_pos;
++		op->status = status;
+ 		goto encode_op;
+ 	}
+ 
+-	status = nfs_ok;
+ 	while (!status && resp->opcnt < args->opcnt) {
+ 		op = &args->ops[resp->opcnt++];
+ 
+@@ -1295,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ 		.op_name = "OP_SEQUENCE",
+ 	},
++	[OP_RECLAIM_COMPLETE] = {
++		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_RECLAIM_COMPLETE",
++	},
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
+@@ -45,8 +45,8 @@
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+-static time_t lease_time = 90;     /* default lease time */
+-static time_t user_lease_time = 90;
++time_t nfsd4_lease = 90;     /* default lease time */
++time_t nfsd4_grace = 90;
+ static time_t boot_time;
+ static u32 current_ownerid = 1;
+ static u32 current_fileid = 1;
+@@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp
+ 	dp->dl_vfs_file = stp->st_vfs_file;
+ 	dp->dl_type = type;
+ 	dp->dl_ident = cb->cb_ident;
+-	dp->dl_stateid.si_boot = get_seconds();
++	dp->dl_stateid.si_boot = boot_time;
+ 	dp->dl_stateid.si_stateownerid = current_delegid++;
+ 	dp->dl_stateid.si_fileid = 0;
+ 	dp->dl_stateid.si_generation = 0;
+@@ -199,6 +199,7 @@ alloc_init_deleg(struct nfs4_client *clp
+ 	atomic_set(&dp->dl_count, 1);
+ 	list_add(&dp->dl_perfile, &fp->fi_delegations);
+ 	list_add(&dp->dl_perclnt, &clp->cl_delegations);
++	INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
+ 	return dp;
+ }
+ 
+@@ -249,6 +250,9 @@ unhash_delegation(struct nfs4_delegation
+  * SETCLIENTID state 
+  */
+ 
++/* client_lock protects the client lru list and session hash table */
++static DEFINE_SPINLOCK(client_lock);
++
+ /* Hash tables for nfs4_clientid state */
+ #define CLIENT_HASH_BITS                 4
+ #define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
+@@ -367,7 +371,6 @@ static void release_openowner(struct nfs
+ 	nfs4_put_stateowner(sop);
+ }
+ 
+-static DEFINE_SPINLOCK(sessionid_lock);
+ #define SESSION_HASH_SIZE	512
+ static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
+ 
+@@ -565,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqst
+ 
+ 	new->se_flags = cses->flags;
+ 	kref_init(&new->se_ref);
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	list_add(&new->se_hash, &sessionid_hashtbl[idx]);
+ 	list_add(&new->se_perclnt, &clp->cl_sessions);
+-	spin_unlock(&sessionid_lock);
++	spin_unlock(&client_lock);
+ 
+ 	status = nfs_ok;
+ out:
+@@ -579,7 +582,7 @@ out_free:
+ 	goto out;
+ }
+ 
+-/* caller must hold sessionid_lock */
++/* caller must hold client_lock */
+ static struct nfsd4_session *
+ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
+ {
+@@ -602,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_se
+ 	return NULL;
+ }
+ 
+-/* caller must hold sessionid_lock */
++/* caller must hold client_lock */
+ static void
+ unhash_session(struct nfsd4_session *ses)
+ {
+@@ -610,15 +613,6 @@ unhash_session(struct nfsd4_session *ses
+ 	list_del(&ses->se_perclnt);
+ }
+ 
+-static void
+-release_session(struct nfsd4_session *ses)
+-{
+-	spin_lock(&sessionid_lock);
+-	unhash_session(ses);
+-	spin_unlock(&sessionid_lock);
+-	nfsd4_put_session(ses);
+-}
+-
+ void
+ free_session(struct kref *kref)
+ {
+@@ -634,9 +628,18 @@ free_session(struct kref *kref)
+ 	kfree(ses);
+ }
+ 
++/* must be called under the client_lock */
+ static inline void
+-renew_client(struct nfs4_client *clp)
++renew_client_locked(struct nfs4_client *clp)
+ {
++	if (is_client_expired(clp)) {
++		dprintk("%s: client (clientid %08x/%08x) already expired\n",
++			__func__,
++			clp->cl_clientid.cl_boot,
++			clp->cl_clientid.cl_id);
++		return;
++	}
++
+ 	/*
+ 	* Move client to the end to the LRU list.
+ 	*/
+@@ -647,6 +650,14 @@ renew_client(struct nfs4_client *clp)
+ 	clp->cl_time = get_seconds();
+ }
+ 
++static inline void
++renew_client(struct nfs4_client *clp)
++{
++	spin_lock(&client_lock);
++	renew_client_locked(clp);
++	spin_unlock(&client_lock);
++}
++
+ /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
+ static int
+ STALE_CLIENTID(clientid_t *clid)
+@@ -680,27 +691,9 @@ static struct nfs4_client *alloc_client(
+ 	return clp;
+ }
+ 
+-static void
+-shutdown_callback_client(struct nfs4_client *clp)
+-{
+-	struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
+-
+-	if (clnt) {
+-		/*
+-		 * Callback threads take a reference on the client, so there
+-		 * should be no outstanding callbacks at this point.
+-		 */
+-		clp->cl_cb_conn.cb_client = NULL;
+-		rpc_shutdown_client(clnt);
+-	}
+-}
+-
+ static inline void
+ free_client(struct nfs4_client *clp)
+ {
+-	shutdown_callback_client(clp);
+-	if (clp->cl_cb_xprt)
+-		svc_xprt_put(clp->cl_cb_xprt);
+ 	if (clp->cl_cred.cr_group_info)
+ 		put_group_info(clp->cl_cred.cr_group_info);
+ 	kfree(clp->cl_principal);
+@@ -709,10 +702,34 @@ free_client(struct nfs4_client *clp)
+ }
+ 
+ void
+-put_nfs4_client(struct nfs4_client *clp)
++release_session_client(struct nfsd4_session *session)
+ {
+-	if (atomic_dec_and_test(&clp->cl_count))
++	struct nfs4_client *clp = session->se_client;
++
++	if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
++		return;
++	if (is_client_expired(clp)) {
+ 		free_client(clp);
++		session->se_client = NULL;
++	} else
++		renew_client_locked(clp);
++	spin_unlock(&client_lock);
++	nfsd4_put_session(session);
++}
++
++/* must be called under the client_lock */
++static inline void
++unhash_client_locked(struct nfs4_client *clp)
++{
++	mark_client_expired(clp);
++	list_del(&clp->cl_lru);
++	while (!list_empty(&clp->cl_sessions)) {
++		struct nfsd4_session  *ses;
++		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
++				 se_perclnt);
++		unhash_session(ses);
++		nfsd4_put_session(ses);
++	}
+ }
+ 
+ static void
+@@ -722,9 +739,6 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
+-	dprintk("NFSD: expire_client cl_count %d\n",
+-	                    atomic_read(&clp->cl_count));
+-
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -740,20 +754,20 @@ expire_client(struct nfs4_client *clp)
+ 		list_del_init(&dp->dl_recall_lru);
+ 		unhash_delegation(dp);
+ 	}
+-	list_del(&clp->cl_idhash);
+-	list_del(&clp->cl_strhash);
+-	list_del(&clp->cl_lru);
+ 	while (!list_empty(&clp->cl_openowners)) {
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
+-	while (!list_empty(&clp->cl_sessions)) {
+-		struct nfsd4_session  *ses;
+-		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
+-				 se_perclnt);
+-		release_session(ses);
+-	}
+-	put_nfs4_client(clp);
++	nfsd4_set_callback_client(clp, NULL);
++	if (clp->cl_cb_conn.cb_xprt)
++		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
++	list_del(&clp->cl_idhash);
++	list_del(&clp->cl_strhash);
++	spin_lock(&client_lock);
++	unhash_client_locked(clp);
++	if (atomic_read(&clp->cl_refcount) == 0)
++		free_client(clp);
++	spin_unlock(&client_lock);
+ }
+ 
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+@@ -839,14 +853,15 @@ static struct nfs4_client *create_client
+ 	}
+ 
+ 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
+-	atomic_set(&clp->cl_count, 1);
+-	atomic_set(&clp->cl_cb_conn.cb_set, 0);
++	atomic_set(&clp->cl_refcount, 0);
++	atomic_set(&clp->cl_cb_set, 0);
+ 	INIT_LIST_HEAD(&clp->cl_idhash);
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
++	clp->cl_time = get_seconds();
+ 	clear_bit(0, &clp->cl_cb_slot_busy);
+ 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
+ 	copy_verf(clp, verf);
+@@ -877,8 +892,7 @@ add_to_unconfirmed(struct nfs4_client *c
+ 	list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+ 	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+ 	list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+-	list_add_tail(&clp->cl_lru, &client_lru);
+-	clp->cl_time = get_seconds();
++	renew_client(clp);
+ }
+ 
+ static void
+@@ -888,10 +902,9 @@ move_to_confirmed(struct nfs4_client *cl
+ 	unsigned int strhashval;
+ 
+ 	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
+-	list_del_init(&clp->cl_strhash);
+ 	list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
+ 	strhashval = clientstr_hashval(clp->cl_recdir);
+-	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
++	list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+ 	renew_client(clp);
+ }
+ 
+@@ -1327,15 +1340,9 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		/*
+-		 * We do not support RDMA or persistent sessions
+-		 */
+-		cr_ses->flags &= ~SESSION4_PERSIST;
+-		cr_ses->flags &= ~SESSION4_RDMA;
+-
+ 		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(unconf->cl_cb_xprt);
++			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++			svc_xprt_get(rqstp->rq_xprt);
+ 			rpc_copy_addr(
+ 				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+ 				sa);
+@@ -1344,7 +1351,7 @@ nfsd4_create_session(struct svc_rqst *rq
+ 				cstate->minorversion;
+ 			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+ 			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf);
++			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+ 		}
+ 		conf = unconf;
+ 	} else {
+@@ -1352,6 +1359,12 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		goto out;
+ 	}
+ 
++	/*
++	 * We do not support RDMA or persistent sessions
++	 */
++	cr_ses->flags &= ~SESSION4_PERSIST;
++	cr_ses->flags &= ~SESSION4_RDMA;
++
+ 	status = alloc_init_session(rqstp, conf, cr_ses);
+ 	if (status)
+ 		goto out;
+@@ -1369,6 +1382,21 @@ out:
+ 	return status;
+ }
+ 
++static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
++{
++	struct nfsd4_compoundres *resp = rqstp->rq_resp;
++	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
++
++	return argp->opcnt == resp->opcnt;
++}
++
++static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
++{
++	if (!session)
++		return 0;
++	return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
++}
++
+ __be32
+ nfsd4_destroy_session(struct svc_rqst *r,
+ 		      struct nfsd4_compound_state *cstate,
+@@ -1384,19 +1412,25 @@ nfsd4_destroy_session(struct svc_rqst *r
+ 	 * - Do we need to clear any callback info from previous session?
+ 	 */
+ 
++	if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
++		if (!nfsd4_last_compound_op(r))
++			return nfserr_not_only_op;
++	}
+ 	dump_sessionid(__func__, &sessionid->sessionid);
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
+ 	if (!ses) {
+-		spin_unlock(&sessionid_lock);
++		spin_unlock(&client_lock);
+ 		goto out;
+ 	}
+ 
+ 	unhash_session(ses);
+-	spin_unlock(&sessionid_lock);
++	spin_unlock(&client_lock);
+ 
++	nfs4_lock_state();
+ 	/* wait for callbacks */
+-	shutdown_callback_client(ses->se_client);
++	nfsd4_set_callback_client(ses->se_client, NULL);
++	nfs4_unlock_state();
+ 	nfsd4_put_session(ses);
+ 	status = nfs_ok;
+ out:
+@@ -1417,7 +1451,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
+ 	if (resp->opcnt != 1)
+ 		return nfserr_sequence_pos;
+ 
+-	spin_lock(&sessionid_lock);
++	spin_lock(&client_lock);
+ 	status = nfserr_badsession;
+ 	session = find_in_sessionid_hashtbl(&seq->sessionid);
+ 	if (!session)
+@@ -1456,23 +1490,47 @@ nfsd4_sequence(struct svc_rqst *rqstp,
+ 	cstate->slot = slot;
+ 	cstate->session = session;
+ 
+-	/* Hold a session reference until done processing the compound:
+-	 * nfsd4_put_session called only if the cstate slot is set.
+-	 */
+-	nfsd4_get_session(session);
+ out:
+-	spin_unlock(&sessionid_lock);
+-	/* Renew the clientid on success and on replay */
++	/* Hold a session reference until done processing the compound. */
+ 	if (cstate->session) {
+-		nfs4_lock_state();
+-		renew_client(session->se_client);
+-		nfs4_unlock_state();
++		nfsd4_get_session(cstate->session);
++		atomic_inc(&session->se_client->cl_refcount);
+ 	}
++	spin_unlock(&client_lock);
+ 	dprintk("%s: return %d\n", __func__, ntohl(status));
+ 	return status;
+ }
+ 
+ __be32
++nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
++{
++	if (rc->rca_one_fs) {
++		if (!cstate->current_fh.fh_dentry)
++			return nfserr_nofilehandle;
++		/*
++		 * We don't take advantage of the rca_one_fs case.
++		 * That's OK, it's optional, we can safely ignore it.
++		 */
++		 return nfs_ok;
++	}
++	nfs4_lock_state();
++	if (is_client_expired(cstate->session->se_client)) {
++		nfs4_unlock_state();
++		/*
++		 * The following error isn't really legal.
++		 * But we only get here if the client just explicitly
++		 * destroyed the client.  Surely it no longer cares what
++		 * error it gets back on an operation for the dead
++		 * client.
++		 */
++		return nfserr_stale_clientid;
++	}
++	nfsd4_create_clid_dir(cstate->session->se_client);
++	nfs4_unlock_state();
++	return nfs_ok;
++}
++
++__be32
+ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 		  struct nfsd4_setclientid *setclid)
+ {
+@@ -1631,9 +1689,8 @@ nfsd4_setclientid_confirm(struct svc_rqs
+ 		if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
+ 			status = nfserr_clid_inuse;
+ 		else {
+-			/* XXX: We just turn off callbacks until we can handle
+-			  * change request correctly. */
+-			atomic_set(&conf->cl_cb_conn.cb_set, 0);
++			atomic_set(&conf->cl_cb_set, 0);
++			nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
+ 			expire_client(unconf);
+ 			status = nfs_ok;
+ 
+@@ -1667,7 +1724,7 @@ nfsd4_setclientid_confirm(struct svc_rqs
+ 			}
+ 			move_to_confirmed(unconf);
+ 			conf = unconf;
+-			nfsd4_probe_callback(conf);
++			nfsd4_probe_callback(conf, &conf->cl_cb_conn);
+ 			status = nfs_ok;
+ 		}
+ 	} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
+@@ -1700,12 +1757,12 @@ alloc_init_file(struct inode *ino)
+ 		INIT_LIST_HEAD(&fp->fi_hash);
+ 		INIT_LIST_HEAD(&fp->fi_stateids);
+ 		INIT_LIST_HEAD(&fp->fi_delegations);
+-		spin_lock(&recall_lock);
+-		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+-		spin_unlock(&recall_lock);
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++		spin_lock(&recall_lock);
++		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
++		spin_unlock(&recall_lock);
+ 		return fp;
+ 	}
+ 	return NULL;
+@@ -1827,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	stp->st_stateowner = sop;
+ 	get_nfs4_file(fp);
+ 	stp->st_file = fp;
+-	stp->st_stateid.si_boot = get_seconds();
++	stp->st_stateid.si_boot = boot_time;
+ 	stp->st_stateid.si_stateownerid = sop->so_id;
+ 	stp->st_stateid.si_fileid = fp->fi_id;
+ 	stp->st_stateid.si_generation = 0;
+@@ -2028,7 +2085,6 @@ void nfsd_break_deleg_cb(struct file_loc
+ 	 * lock) we know the server hasn't removed the lease yet, we know
+ 	 * it's safe to take a reference: */
+ 	atomic_inc(&dp->dl_count);
+-	atomic_inc(&dp->dl_client->cl_count);
+ 
+ 	spin_lock(&recall_lock);
+ 	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+@@ -2347,7 +2403,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ {
+ 	struct nfs4_delegation *dp;
+ 	struct nfs4_stateowner *sop = stp->st_stateowner;
+-	struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn;
++	int cb_up = atomic_read(&sop->so_client->cl_cb_set);
+ 	struct file_lock fl, *flp = &fl;
+ 	int status, flag = 0;
+ 
+@@ -2355,7 +2411,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ 	open->op_recall = 0;
+ 	switch (open->op_claim_type) {
+ 		case NFS4_OPEN_CLAIM_PREVIOUS:
+-			if (!atomic_read(&cb->cb_set))
++			if (!cb_up)
+ 				open->op_recall = 1;
+ 			flag = open->op_delegate_type;
+ 			if (flag == NFS4_OPEN_DELEGATE_NONE)
+@@ -2366,7 +2422,7 @@ nfs4_open_delegation(struct svc_fh *fh, 
+ 			 * had the chance to reclaim theirs.... */
+ 			if (locks_in_grace())
+ 				goto out;
+-			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
++			if (!cb_up || !sop->so_confirmed)
+ 				goto out;
+ 			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ 				flag = NFS4_OPEN_DELEGATE_WRITE;
+@@ -2483,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 	}
+ 	memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
+ 
+-	if (nfsd4_has_session(&resp->cstate)) {
++	if (nfsd4_has_session(&resp->cstate))
+ 		open->op_stateowner->so_confirmed = 1;
+-		nfsd4_create_clid_dir(open->op_stateowner->so_client);
+-	}
+ 
+ 	/*
+ 	* Attempt to hand out a delegation. No error return, because the
+@@ -2537,7 +2591,7 @@ nfsd4_renew(struct svc_rqst *rqstp, stru
+ 	renew_client(clp);
+ 	status = nfserr_cb_path_down;
+ 	if (!list_empty(&clp->cl_delegations)
+-			&& !atomic_read(&clp->cl_cb_conn.cb_set))
++			&& !atomic_read(&clp->cl_cb_set))
+ 		goto out;
+ 	status = nfs_ok;
+ out:
+@@ -2554,6 +2608,12 @@ nfsd4_end_grace(void)
+ 	dprintk("NFSD: end of grace period\n");
+ 	nfsd4_recdir_purge_old();
+ 	locks_end_grace(&nfsd4_manager);
++	/*
++	 * Now that every NFSv4 client has had the chance to recover and
++	 * to see the (possibly new, possibly shorter) lease time, we
++	 * can safely set the next grace time to the current lease time:
++	 */
++	nfsd4_grace = nfsd4_lease;
+ }
+ 
+ static time_t
+@@ -2563,15 +2623,17 @@ nfs4_laundromat(void)
+ 	struct nfs4_stateowner *sop;
+ 	struct nfs4_delegation *dp;
+ 	struct list_head *pos, *next, reaplist;
+-	time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
+-	time_t t, clientid_val = NFSD_LEASE_TIME;
+-	time_t u, test_val = NFSD_LEASE_TIME;
++	time_t cutoff = get_seconds() - nfsd4_lease;
++	time_t t, clientid_val = nfsd4_lease;
++	time_t u, test_val = nfsd4_lease;
+ 
+ 	nfs4_lock_state();
+ 
+ 	dprintk("NFSD: laundromat service - starting\n");
+ 	if (locks_in_grace())
+ 		nfsd4_end_grace();
++	INIT_LIST_HEAD(&reaplist);
++	spin_lock(&client_lock);
+ 	list_for_each_safe(pos, next, &client_lru) {
+ 		clp = list_entry(pos, struct nfs4_client, cl_lru);
+ 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+@@ -2580,12 +2642,22 @@ nfs4_laundromat(void)
+ 				clientid_val = t;
+ 			break;
+ 		}
++		if (atomic_read(&clp->cl_refcount)) {
++			dprintk("NFSD: client in use (clientid %08x)\n",
++				clp->cl_clientid.cl_id);
++			continue;
++		}
++		unhash_client_locked(clp);
++		list_add(&clp->cl_lru, &reaplist);
++	}
++	spin_unlock(&client_lock);
++	list_for_each_safe(pos, next, &reaplist) {
++		clp = list_entry(pos, struct nfs4_client, cl_lru);
+ 		dprintk("NFSD: purging unused client (clientid %08x)\n",
+ 			clp->cl_clientid.cl_id);
+ 		nfsd4_remove_clid_dir(clp);
+ 		expire_client(clp);
+ 	}
+-	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	list_for_each_safe(pos, next, &del_recall_lru) {
+ 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+@@ -2605,7 +2677,7 @@ nfs4_laundromat(void)
+ 		list_del_init(&dp->dl_recall_lru);
+ 		unhash_delegation(dp);
+ 	}
+-	test_val = NFSD_LEASE_TIME;
++	test_val = nfsd4_lease;
+ 	list_for_each_safe(pos, next, &close_lru) {
+ 		sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+ 		if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
+@@ -2661,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ static int
+ STALE_STATEID(stateid_t *stateid)
+ {
+-	if (time_after((unsigned long)boot_time,
+-			(unsigned long)stateid->si_boot)) {
+-		dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+-			STATEID_VAL(stateid));
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-static int
+-EXPIRED_STATEID(stateid_t *stateid)
+-{
+-	if (time_before((unsigned long)boot_time,
+-			((unsigned long)stateid->si_boot)) &&
+-	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
+-		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
+-			STATEID_VAL(stateid));
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-static __be32
+-stateid_error_map(stateid_t *stateid)
+-{
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
+-	if (EXPIRED_STATEID(stateid))
+-		return nfserr_expired;
+-
+-	dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
++	if (stateid->si_boot == boot_time)
++		return 0;
++	dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+ 		STATEID_VAL(stateid));
+-	return nfserr_bad_stateid;
++	return 1;
+ }
+ 
+ static inline int
+@@ -2817,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	status = nfserr_bad_stateid;
+ 	if (is_delegation_stateid(stateid)) {
+ 		dp = find_delegation_stateid(ino, stateid);
+-		if (!dp) {
+-			status = stateid_error_map(stateid);
++		if (!dp)
+ 			goto out;
+-		}
+ 		status = check_stateid_generation(stateid, &dp->dl_stateid,
+ 						  flags);
+ 		if (status)
+@@ -2833,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 			*filpp = dp->dl_vfs_file;
+ 	} else { /* open or lock stateid */
+ 		stp = find_stateid(stateid, flags);
+-		if (!stp) {
+-			status = stateid_error_map(stateid);
++		if (!stp)
+ 			goto out;
+-		}
+ 		if (nfs4_check_fh(current_fh, stp))
+ 			goto out;
+ 		if (!stp->st_stateowner->so_confirmed)
+@@ -2908,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 		 */
+ 		sop = search_close_lru(stateid->si_stateownerid, flags);
+ 		if (sop == NULL)
+-			return stateid_error_map(stateid);
++			return nfserr_bad_stateid;
+ 		*sopp = sop;
+ 		goto check_replay;
+ 	}
+@@ -3175,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (!is_delegation_stateid(stateid))
+ 		goto out;
+ 	dp = find_delegation_stateid(inode, stateid);
+-	if (!dp) {
+-		status = stateid_error_map(stateid);
++	if (!dp)
+ 		goto out;
+-	}
+ 	status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
+ 	if (status)
+ 		goto out;
+@@ -3404,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	stp->st_stateowner = sop;
+ 	get_nfs4_file(fp);
+ 	stp->st_file = fp;
+-	stp->st_stateid.si_boot = get_seconds();
++	stp->st_stateid.si_boot = boot_time;
+ 	stp->st_stateid.si_stateownerid = sop->so_id;
+ 	stp->st_stateid.si_fileid = fp->fi_id;
+ 	stp->st_stateid.si_generation = 0;
+@@ -3976,12 +4014,6 @@ nfsd4_load_reboot_recovery_data(void)
+ 		printk("NFSD: Failure reading reboot recovery data\n");
+ }
+ 
+-unsigned long
+-get_nfs4_grace_period(void)
+-{
+-	return max(user_lease_time, lease_time) * HZ;
+-}
+-
+ /*
+  * Since the lifetime of a delegation isn't limited to that of an open, a
+  * client may quite reasonably hang on to a delegation as long as it has
+@@ -4008,20 +4040,27 @@ set_max_delegations(void)
+ static int
+ __nfs4_state_start(void)
+ {
+-	unsigned long grace_time;
++	int ret;
+ 
+ 	boot_time = get_seconds();
+-	grace_time = get_nfs4_grace_period();
+-	lease_time = user_lease_time;
+ 	locks_start_grace(&nfsd4_manager);
+ 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
+-	       grace_time/HZ);
++	       nfsd4_grace);
++	ret = set_callback_cred();
++	if (ret)
++		return -ENOMEM;
+ 	laundry_wq = create_singlethread_workqueue("nfsd4");
+ 	if (laundry_wq == NULL)
+ 		return -ENOMEM;
+-	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
++	ret = nfsd4_create_callback_queue();
++	if (ret)
++		goto out_free_laundry;
++	queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
+ 	set_max_delegations();
+-	return set_callback_cred();
++	return 0;
++out_free_laundry:
++	destroy_workqueue(laundry_wq);
++	return ret;
+ }
+ 
+ int
+@@ -4039,12 +4078,6 @@ nfs4_state_start(void)
+ 	return 0;
+ }
+ 
+-time_t
+-nfs4_lease_time(void)
+-{
+-	return lease_time;
+-}
+-
+ static void
+ __nfs4_state_shutdown(void)
+ {
+@@ -4089,6 +4122,7 @@ nfs4_state_shutdown(void)
+ 	nfs4_lock_state();
+ 	nfs4_release_reclaim();
+ 	__nfs4_state_shutdown();
++	nfsd4_destroy_callback_queue();
+ 	nfs4_unlock_state();
+ }
+ 
+@@ -4128,21 +4162,3 @@ nfs4_recoverydir(void)
+ {
+ 	return user_recovery_dirname;
+ }
+-
+-/*
+- * Called when leasetime is changed.
+- *
+- * The only way the protocol gives us to handle on-the-fly lease changes is to
+- * simulate a reboot.  Instead of doing that, we just wait till the next time
+- * we start to register any changes in lease time.  If the administrator
+- * really wants to change the lease time *now*, they can go ahead and bring
+- * nfsd down and then back up again after changing the lease time.
+- *
+- * user_lease_time is protected by nfsd_mutex since it's only really accessed
+- * when nfsd is starting
+- */
+-void
+-nfs4_reset_lease(time_t leasetime)
+-{
+-	user_lease_time = leasetime;
+-}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
+@@ -46,6 +46,7 @@ enum {
+ 	 */
+ #ifdef CONFIG_NFSD_V4
+ 	NFSD_Leasetime,
++	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
+ };
+@@ -70,6 +71,7 @@ static ssize_t write_ports(struct file *
+ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
+ #ifdef CONFIG_NFSD_V4
+ static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
++static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
+ 
+@@ -91,6 +93,7 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_MaxBlkSize] = write_maxblksize,
+ #ifdef CONFIG_NFSD_V4
+ 	[NFSD_Leasetime] = write_leasetime,
++	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
+ };
+@@ -1204,29 +1207,45 @@ static ssize_t write_maxblksize(struct f
+ }
+ 
+ #ifdef CONFIG_NFSD_V4
+-extern time_t nfs4_leasetime(void);
+-
+-static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
++static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
+ {
+-	/* if size > 10 seconds, call
+-	 * nfs4_reset_lease() then write out the new lease (seconds) as reply
+-	 */
+ 	char *mesg = buf;
+-	int rv, lease;
++	int rv, i;
+ 
+ 	if (size > 0) {
+ 		if (nfsd_serv)
+ 			return -EBUSY;
+-		rv = get_int(&mesg, &lease);
++		rv = get_int(&mesg, &i);
+ 		if (rv)
+ 			return rv;
+-		if (lease < 10 || lease > 3600)
++		/*
++		 * Some sanity checking.  We don't have a reason for
++		 * these particular numbers, but problems with the
++		 * extremes are:
++		 *	- Too short: the briefest network outage may
++		 *	  cause clients to lose all their locks.  Also,
++		 *	  the frequent polling may be wasteful.
++		 *	- Too long: do you really want reboot recovery
++		 *	  to take more than an hour?  Or to make other
++		 *	  clients wait an hour before being able to
++		 *	  revoke a dead client's locks?
++		 */
++		if (i < 10 || i > 3600)
+ 			return -EINVAL;
+-		nfs4_reset_lease(lease);
++		*time = i;
+ 	}
+ 
+-	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n",
+-							nfs4_lease_time());
++	return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
++}
++
++static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __nfsd4_write_time(file, buf, size, time);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
+ }
+ 
+ /**
+@@ -1252,12 +1271,22 @@ static ssize_t __write_leasetime(struct 
+  */
+ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+ {
+-	ssize_t rv;
++	return nfsd4_write_time(file, buf, size, &nfsd4_lease);
++}
+ 
+-	mutex_lock(&nfsd_mutex);
+-	rv = __write_leasetime(file, buf, size);
+-	mutex_unlock(&nfsd_mutex);
+-	return rv;
++/**
++ * write_gracetime - Set or report current NFSv4 grace period time
++ *
++ * As above, but sets the time of the NFSv4 grace period.
++ *
++ * Note this should never be set to less than the *previous*
++ * lease-period time, but we don't try to enforce this.  (In the common
++ * case (a new boot), we don't know what the previous lease time was
++ * anyway.)
++ */
++static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
++{
++	return nfsd4_write_time(file, buf, size, &nfsd4_grace);
+ }
+ 
+ extern char *nfs4_recoverydir(void);
+@@ -1351,6 +1380,7 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+ #ifdef CONFIG_NFSD_V4
+ 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
++		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
+ 		/* last one */ {""}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
+@@ -82,7 +82,6 @@ int nfs4_state_init(void);
+ void nfsd4_free_slabs(void);
+ int nfs4_state_start(void);
+ void nfs4_state_shutdown(void);
+-time_t nfs4_lease_time(void);
+ void nfs4_reset_lease(time_t leasetime);
+ int nfs4_reset_recoverydir(char *recdir);
+ #else
+@@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) 
+ static inline void nfsd4_free_slabs(void) { }
+ static inline int nfs4_state_start(void) { return 0; }
+ static inline void nfs4_state_shutdown(void) { }
+-static inline time_t nfs4_lease_time(void) { return 0; }
+ static inline void nfs4_reset_lease(time_t leasetime) { }
+ static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+ #endif
+@@ -229,6 +227,9 @@ extern struct timeval	nfssvc_boot;
+ 
+ #ifdef CONFIG_NFSD_V4
+ 
++extern time_t nfsd4_lease;
++extern time_t nfsd4_grace;
++
+ /* before processing a COMPOUND operation, we have to check that there
+  * is enough space in the buffer for XDR encode to succeed.  otherwise,
+  * we might process an operation with side effects, and be unable to
+@@ -247,7 +248,6 @@ extern struct timeval	nfssvc_boot;
+ #define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
+ #define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
+ 
+-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
+ #define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
+ 
+ /*
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
+@@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
+ 	struct nfs4_client	*cbs_clp;
+ };
+ 
++struct nfs4_rpc_args {
++	void				*args_op;
++	struct nfsd4_cb_sequence	args_seq;
++};
++
++struct nfsd4_callback {
++	struct nfs4_rpc_args cb_args;
++	struct work_struct cb_work;
++};
++
+ struct nfs4_delegation {
+ 	struct list_head	dl_perfile;
+ 	struct list_head	dl_perclnt;
+@@ -86,6 +96,7 @@ struct nfs4_delegation {
+ 	stateid_t		dl_stateid;
+ 	struct knfsd_fh		dl_fh;
+ 	int			dl_retries;
++	struct nfsd4_callback	dl_recall;
+ };
+ 
+ /* client delegation callback info */
+@@ -96,9 +107,7 @@ struct nfs4_cb_conn {
+ 	u32                     cb_prog;
+ 	u32			cb_minorversion;
+ 	u32                     cb_ident;	/* minorversion 0 only */
+-	/* RPC client info */
+-	atomic_t		cb_set;     /* successful CB_NULL call */
+-	struct rpc_clnt *       cb_client;
++	struct svc_xprt		*cb_xprt;	/* minorversion 1 only */
+ };
+ 
+ /* Maximum number of slots per session. 160 is useful for long haul TCP */
+@@ -157,7 +166,7 @@ struct nfsd4_session {
+ 	struct list_head	se_hash;	/* hash by sessionid */
+ 	struct list_head	se_perclnt;
+ 	u32			se_flags;
+-	struct nfs4_client	*se_client;	/* for expire_client */
++	struct nfs4_client	*se_client;
+ 	struct nfs4_sessionid	se_sessionid;
+ 	struct nfsd4_channel_attrs se_fchannel;
+ 	struct nfsd4_channel_attrs se_bchannel;
+@@ -212,25 +221,41 @@ struct nfs4_client {
+ 	struct svc_cred		cl_cred; 	/* setclientid principal */
+ 	clientid_t		cl_clientid;	/* generated by server */
+ 	nfs4_verifier		cl_confirm;	/* generated by server */
+-	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
+-	atomic_t		cl_count;	/* ref count */
+ 	u32			cl_firststate;	/* recovery dir creation */
+ 
++	/* for v4.0 and v4.1 callbacks: */
++	struct nfs4_cb_conn	cl_cb_conn;
++	struct rpc_clnt		*cl_cb_client;
++	atomic_t		cl_cb_set;
++
+ 	/* for nfs41 */
+ 	struct list_head	cl_sessions;
+ 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_sessionid	cl_sessionid;
++	/* number of rpc's in progress over an associated session: */
++	atomic_t		cl_refcount;
+ 
+ 	/* for nfs41 callbacks */
+ 	/* We currently support a single back channel with a single slot */
+ 	unsigned long		cl_cb_slot_busy;
+ 	u32			cl_cb_seq_nr;
+-	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
+ };
+ 
++static inline void
++mark_client_expired(struct nfs4_client *clp)
++{
++	clp->cl_time = 0;
++}
++
++static inline bool
++is_client_expired(struct nfs4_client *clp)
++{
++	return clp->cl_time == 0;
++}
++
+ /* struct nfs4_client_reset
+  * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
+  * upon lease reset, or from upcall to state_daemon (to read in state
+@@ -377,11 +402,14 @@ extern void nfs4_lock_state(void);
+ extern void nfs4_unlock_state(void);
+ extern int nfs4_in_grace(void);
+ extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
+-extern void put_nfs4_client(struct nfs4_client *clp);
+ extern void nfs4_free_stateowner(struct kref *kref);
+ extern int set_callback_cred(void);
+-extern void nfsd4_probe_callback(struct nfs4_client *clp);
++extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
++extern void nfsd4_do_callback_rpc(struct work_struct *);
+ extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
++extern int nfsd4_create_callback_queue(void);
++extern void nfsd4_destroy_callback_queue(void);
++extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
+ extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+ extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+ extern void nfsd4_init_recdir(char *recdir_name);
+@@ -392,6 +420,7 @@ extern int nfs4_has_reclaimed_state(cons
+ extern void nfsd4_recdir_purge_old(void);
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
++extern void release_session_client(struct nfsd4_session *);
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
+@@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
+ 	struct nfs4_sessionid	sessionid;
+ };
+ 
++struct nfsd4_reclaim_complete {
++	u32 rca_one_fs;
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -421,6 +425,7 @@ struct nfsd4_op {
+ 		struct nfsd4_create_session	create_session;
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
++		struct nfsd4_reclaim_complete	reclaim_complete;
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+@@ -513,9 +518,8 @@ extern void nfsd4_store_cache_entry(stru
+ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+ 		struct nfsd4_sequence *seq);
+ extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
+-		struct nfsd4_compound_state *,
+-struct nfsd4_exchange_id *);
+-		extern __be32 nfsd4_create_session(struct svc_rqst *,
++		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
++extern __be32 nfsd4_create_session(struct svc_rqst *,
+ 		struct nfsd4_compound_state *,
+ 		struct nfsd4_create_session *);
+ extern __be32 nfsd4_sequence(struct svc_rqst *,
+@@ -524,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_
+ extern __be32 nfsd4_destroy_session(struct svc_rqst *,
+ 		struct nfsd4_compound_state *,
+ 		struct nfsd4_destroy_session *);
++__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
+ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
+ 		struct nfsd4_open *open);
+ extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
+@@ -40,12 +40,12 @@ struct nfs_fhbase_old {
+  * This is the new flexible, extensible style NFSv2/v3 file handle.
+  * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
+  *
+- * The file handle is seens as a list of 4byte words.
+- * The first word contains a version number (1) and four descriptor bytes
++ * The file handle starts with a sequence of four-byte words.
++ * The first word contains a version number (1) and three descriptor bytes
+  * that tell how the remaining 3 variable length fields should be handled.
+  * These three bytes are auth_type, fsid_type and fileid_type.
+  *
+- * All 4byte values are in host-byte-order.
++ * All four-byte values are in host-byte-order.
+  *
+  * The auth_type field specifies how the filehandle can be authenticated
+  * This might allow a file to be confirmed to be in a writable part of a
+diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
+--- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
+@@ -49,11 +49,17 @@ static void cache_init(struct cache_head
+ 	h->last_refresh = now;
+ }
+ 
++static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
++{
++	return  (h->expiry_time < get_seconds()) ||
++		(detail->flush_time > h->last_refresh);
++}
++
+ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
+ 				       struct cache_head *key, int hash)
+ {
+ 	struct cache_head **head,  **hp;
+-	struct cache_head *new = NULL;
++	struct cache_head *new = NULL, *freeme = NULL;
+ 
+ 	head = &detail->hash_table[hash];
+ 
+@@ -62,6 +68,9 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+ 		struct cache_head *tmp = *hp;
+ 		if (detail->match(tmp, key)) {
++			if (cache_is_expired(detail, tmp))
++				/* This entry is expired, we will discard it. */
++				break;
+ 			cache_get(tmp);
+ 			read_unlock(&detail->hash_lock);
+ 			return tmp;
+@@ -86,6 +95,13 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
+ 		struct cache_head *tmp = *hp;
+ 		if (detail->match(tmp, key)) {
++			if (cache_is_expired(detail, tmp)) {
++				*hp = tmp->next;
++				tmp->next = NULL;
++				detail->entries --;
++				freeme = tmp;
++				break;
++			}
+ 			cache_get(tmp);
+ 			write_unlock(&detail->hash_lock);
+ 			cache_put(new, detail);
+@@ -98,6 +114,8 @@ struct cache_head *sunrpc_cache_lookup(s
+ 	cache_get(new);
+ 	write_unlock(&detail->hash_lock);
+ 
++	if (freeme)
++		cache_put(freeme, detail);
+ 	return new;
+ }
+ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
+@@ -183,10 +201,7 @@ static int cache_make_upcall(struct cach
+ 
+ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
+ {
+-	if (!test_bit(CACHE_VALID, &h->flags) ||
+-	    h->expiry_time < get_seconds())
+-		return -EAGAIN;
+-	else if (detail->flush_time > h->last_refresh)
++	if (!test_bit(CACHE_VALID, &h->flags))
+ 		return -EAGAIN;
+ 	else {
+ 		/* entry is valid */
+diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
+--- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
+@@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
+ 			dprintk("svc: recvfrom returned error %d\n", -err);
+ 			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+ 		}
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		return -EAGAIN;
+ 	}
+ 	len = svc_addr_len(svc_addr(rqstp));
+@@ -562,11 +561,6 @@ static int svc_udp_recvfrom(struct svc_r
+ 	svsk->sk_sk->sk_stamp = skb->tstamp;
+ 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
+ 
+-	/*
+-	 * Maybe more packets - kick another thread ASAP.
+-	 */
+-	svc_xprt_received(&svsk->sk_xprt);
+-
+ 	len  = skb->len - sizeof(struct udphdr);
+ 	rqstp->rq_arg.len = len;
+ 
+@@ -917,7 +911,6 @@ static int svc_tcp_recv_record(struct sv
+ 		if (len < want) {
+ 			dprintk("svc: short recvfrom while reading record "
+ 				"length (%d of %d)\n", len, want);
+-			svc_xprt_received(&svsk->sk_xprt);
+ 			goto err_again; /* record header not complete */
+ 		}
+ 
+@@ -953,7 +946,6 @@ static int svc_tcp_recv_record(struct sv
+ 	if (len < svsk->sk_reclen) {
+ 		dprintk("svc: incomplete TCP record (%d of %d)\n",
+ 			len, svsk->sk_reclen);
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		goto err_again;	/* record not complete */
+ 	}
+ 	len = svsk->sk_reclen;
+@@ -961,10 +953,8 @@ static int svc_tcp_recv_record(struct sv
+ 
+ 	return len;
+  error:
+-	if (len == -EAGAIN) {
++	if (len == -EAGAIN)
+ 		dprintk("RPC: TCP recv_record got EAGAIN\n");
+-		svc_xprt_received(&svsk->sk_xprt);
+-	}
+ 	return len;
+  err_delete:
+ 	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+@@ -1110,7 +1100,6 @@ out:
+ 	svsk->sk_tcplen = 0;
+ 
+ 	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
+-	svc_xprt_received(&svsk->sk_xprt);
+ 	if (serv->sv_stats)
+ 		serv->sv_stats->nettcpcnt++;
+ 
+@@ -1119,7 +1108,6 @@ out:
+ err_again:
+ 	if (len == -EAGAIN) {
+ 		dprintk("RPC: TCP recvfrom got EAGAIN\n");
+-		svc_xprt_received(&svsk->sk_xprt);
+ 		return len;
+ 	}
+ error:
+diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
+--- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
+@@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
+ 		if (rqstp->rq_deferred) {
+ 			svc_xprt_received(xprt);
+ 			len = svc_deferred_recv(rqstp);
+-		} else
++		} else {
+ 			len = xprt->xpt_ops->xpo_recvfrom(rqstp);
++			svc_xprt_received(xprt);
++		}
+ 		dprintk("svc: got len=%d\n", len);
+ 	}
+ 
+@@ -893,12 +895,12 @@ void svc_delete_xprt(struct svc_xprt *xp
+ 	 */
+ 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+ 		serv->sv_tmpcnt--;
++	spin_unlock_bh(&serv->sv_lock);
+ 
+ 	while ((dr = svc_deferred_dequeue(xprt)) != NULL)
+ 		kfree(dr);
+ 
+ 	svc_xprt_put(xprt);
+-	spin_unlock_bh(&serv->sv_lock);
+ }
+ 
+ void svc_close_xprt(struct svc_xprt *xprt)
+diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+--- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
+@@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
+ 		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
+ 		rqstp->rq_arg.head[0].iov_len);
+ 
+-	svc_xprt_received(rqstp->rq_xprt);
+ 	return ret;
+ }
+ 
+@@ -665,7 +664,6 @@ int svc_rdma_recvfrom(struct svc_rqst *r
+ 		rqstp->rq_arg.head[0].iov_len);
+ 	rqstp->rq_prot = IPPROTO_MAX;
+ 	svc_xprt_copy_addrs(rqstp, xprt);
+-	svc_xprt_received(xprt);
+ 	return ret;
+ 
+  close_out:
+@@ -678,6 +676,5 @@ int svc_rdma_recvfrom(struct svc_rqst *r
+ 	 */
+ 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ defer:
+-	svc_xprt_received(xprt);
+ 	return 0;
+ }
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
new file mode 100644
index 000000000..a9d78ba0e
--- /dev/null
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -0,0 +1,31788 @@
+diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
+@@ -13,6 +13,7 @@
+ #include <sys/stat.h>
+ #include <sys/mman.h>
+ #include <sys/param.h>
++#include <sys/stat.h>
+ #include "init.h"
+ #include "kern_constants.h"
+ #include "os.h"
+diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
+--- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
+@@ -1009,6 +1009,7 @@ static void disk_release(struct device *
+ struct class block_class = {
+ 	.name		= "block",
+ };
++EXPORT_SYMBOL(block_class);
+ 
+ static char *block_devnode(struct device *dev, mode_t *mode)
+ {
+diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
+@@ -0,0 +1,211 @@
++(c) 2007 Network Appliance Inc.
++
++spNFS
++-----
++
++An spNFS system consists of a Meta Data Server (MDS), a number of Client machines (C) and a number of Data Servers (DS).
++
++A file system is mounted by the clients from the MDS, and all file data
++is striped across the DSs.
++
++Identify the machines that will be filling each of these roles.
++
++The spnfs kernel will be installed on all machines: clients, the MDS and DSs.
++
++
++Building and installing the spNFS kernel
++----------------------------------------
++
++Get the spNFS kernel from:
++
++	git://linux-nfs.org/~bhalevy/linux-pnfs.git
++
++Use the pnfs-all-latest branch and add these options to your .config file
++
++	CONFIG_NETWORK_FILESYSTEMS=y
++	CONFIG_NFS_FS=m
++	CONFIG_NFS_V4=y
++	CONFIG_NFS_V4_1=y
++	CONFIG_PNFS=y
++	CONFIG_NFSD=m
++	CONFIG_PNFSD=y
++	# CONFIG_PNFSD_LOCAL_EXPORT is not set
++	CONFIG_SPNFS=y
++
++By default, spNFS uses whole-file layouts.  Layout segments can be enabled
++by adding:
++
++	CONFIG_SPNFS_LAYOUTSEGMENTS=y
++
++to your .config file.
++
++Building and installation of kernel+modules is as usual.
++This kernel should be installed and booted on the client, MDS and DSs.
++
++Note that CONFIG_PNFSD_LOCAL_EXPORT must be disabled for spnfs as it
++takes over the pnfs export interface.
++
++Building nfs-utils
++------------------
++
++Get the nfs-utils package containing spnfsd from:
++
++	git://linux-nfs.org/~bhalevy/pnfs-nfs-utils.git
++
++Follow the standard instructions for building nfs-utils.
++
++After building, the spnfsd daemon will be located in utils/spnfsd.  The spnfsd
++daemon will only be needed on the MDS.
++
++
++Installation
++------------
++
++The nfs-utils package contains a default spnfsd.conf file in
++utils/spnfsd/spnfsd.conf.  Copy this file to /etc/spnfsd.conf.
++
++By default, the DS-Mount-Directory is set to /spnfs (see spnfsd.conf).  Under
++this directory, mount points must be created for each DS to
++be used for pNFS data stripes.  These mount points are named by the ip address
++of the corresponding DS.  In the sample spnfsd.conf, there are two
++DSs defined (172.16.28.134 and 172.16.28.141).
++
++Following the sample spnfsd.conf,
++
++	mkdir /spnfs
++
++on the MDS (corresponding to DS-Mount-Directory).  Then
++
++	mkdir /spnfs/172.16.28.134
++	mkdir /spnfs/172.16.28.141
++
++to create the mount points for the DSs.
++
++On the DSs, chose a directory where data stripes will be created by the MDS.
++For the sample file, this directory is /pnfs, so on each DS execute:
++
++	mkdir /pnfs
++
++This directory is specified in the spnfsd.conf file by the DS*_ROOT option
++(where * is replaced by the DS number).  DS_ROOT is specified relative to
++the directory being exported by the DSs.  In our example, our DSs are exporting
++the root directory (/) and therefore our DS_ROOT is /pnfs.  On the DSs, we have
++the following entry in /etc/exports:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check)
++
++N.B. If we had created a /exports directory and a /pnfs directory under
++/exports, and if we were exporting /exports, then DS_ROOT would still be /pnfs
++(not /exports/pnfs).
++
++It may be useful to add entries to /etc/fstab on the MDS to automatically
++mount the DS_ROOT file systems.  For this example, our MDS fstab would
++contain:
++
++	172.17.84.128:/pnfs /spnfs/172.17.84.128 nfs    defaults        1 2
++	172.17.84.122:/pnfs /spnfs/172.17.84.122 nfs    defaults        1 2
++
++The DS mounts must be performed manually or via fstab at this time (automatic
++mounting, directory creation, etc. are on the todo list).  To perform I/O
++through the MDS, the DS mounts MUST use NFSv3 at this time (this restriction
++will eventually be removed).
++
++
++On the MDS, choose a file system to use with spNFS and export it, e.g.:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check,pnfs)
++
++Make sure nfsd and all supporting processes are running on the MDS and DSs.
++
++
++Running
++-------
++
++If rpc_pipefs is not already mounted (if you're running idmapd it probably is),
++you may want to add the following line to /etc/fstab:
++
++	rpc_pipefs    /var/lib/nfs/rpc_pipefs rpc_pipefs defaults     0 0
++
++to automatically mount rpc_pipefs.
++
++With spnfsd.conf configured for your environment and the mounts mounted as
++described above, spnfsd can now be started.
++
++On the MDS, execute spnfsd:
++
++	spnfsd
++
++The executable is located in the directory where it was built, and
++may also have been installed elsewhere depending on how you built nfs-utils.
++It will run in the foreground by default, and in fact will do so despite
++any options suggesting the contrary (it's still a debugging build).
++
++On the client, make sure the nfslayoutdriver module is loaded:
++
++	modprobe nfslayoutdriver
++
++Then mount the file system from the MDS:
++
++	mount -t nfs4 -o minorversion=1 mds:/ /mnt
++
++I/O through the MDS is now supported.  To use it, do not load the
++nfslayoutdriver on the client, and mount the MDS using NFSv4 or 4.1
++(NFSv2 and v3 are not yet supported).
++
++You may now use spNFS by performing file system activities in /mnt.
++If you create files in /mnt, you should see stripe files corresponding to
++new files being created on the DSs.  The current implementation names the
++stripe files based on the inode number of the file on the MDS.  For example,
++if you create a file foo in /mnt and do an 'ls -li /mnt/foo':
++
++	# ls -li foo
++	1233 -rw-r--r-- 1 root root 0 Nov 29 15:54 foo
++
++You should see stripe files on each under /pnfs (per the sample) named
++1233.  The file /pnfs/1233 on DS1 will contain the first <stripe size> bytes
++of data written to foo, DS2 will contain the next <stripe size> bytes, etc.
++Removing /mnt/foo will remove the corresponding stripe files on the DSs.
++Other file system operations should behave (mostly :-) as expected.
++
++
++Layout Segments
++---------------
++
++If the kernel is compiled to support layout segments, there will
++be two files created under /proc/fs/spnfs for controlling layout
++segment functionality.
++
++To enable layout segments, write a '1' to /proc/fs/spnfs/layoutseg, e.g.:
++
++	echo 1 > /proc/fs/spnfs/layoutseg
++
++Layout segments can be disabled (returning to whole-file layouts) by
++writing a '0' to /proc/fs/spnfs/layoutseg:
++
++	echo 0 > /proc/fs/spnfs/layoutseg
++
++When layout segments are enabled, the size of the layouts returned can
++be specified by writing a decimal number (ascii representation) to
++/proc/fs/spnfs/layoutsegsize:
++
++	echo 1024 > /proc/fs/spnfs/layoutsegsize
++
++The value'0' has a special meaning--it causes the server to return a
++layout that is exactly the size requested by the client:
++
++	echo 0 > /proc/fs/spnfs/layoutsegsize
++
++
++Troubleshooting
++---------------
++
++If you see data being written to the files on the MDS rather than
++the stripe files, make sure the nfslayoutdriver is loaded on the client
++(see above).
++
++If you get a "permission denied" error, make sure mountd is running on the mds
++(it occasionally fails to start).
++
++Bugs, enhancements, compliments, complaints to: dmuntz@netapp.com
++
++
+diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
+@@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
+ 	return r;
+ }
+ 
++int dm_dev_create(struct dm_ioctl *param)
++{
++	return dev_create(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_create);
++
+ /*
+  * Always use UUID for lookups if it's present, otherwise use name or dev.
+  */
+@@ -745,6 +751,12 @@ static int dev_remove(struct dm_ioctl *p
+ 	return 0;
+ }
+ 
++int dm_dev_remove(struct dm_ioctl *param)
++{
++	return dev_remove(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_remove);
++
+ /*
+  * Check a string doesn't overrun the chunk of
+  * memory we copied from userland.
+@@ -917,6 +929,12 @@ static int do_resume(struct dm_ioctl *pa
+ 	return r;
+ }
+ 
++int dm_do_resume(struct dm_ioctl *param)
++{
++	return do_resume(param);
++}
++EXPORT_SYMBOL(dm_do_resume);
++
+ /*
+  * Set or unset the suspension state of a device.
+  * If the device already is in the requested state we just return its status.
+@@ -1194,6 +1212,12 @@ out:
+ 	return r;
+ }
+ 
++int dm_table_load(struct dm_ioctl *param, size_t param_size)
++{
++	return table_load(param, param_size);
++}
++EXPORT_SYMBOL(dm_table_load);
++
+ static int table_clear(struct dm_ioctl *param, size_t param_size)
+ {
+ 	int r;
+diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
+--- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
+@@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
+ 	put_device(&class_to_shost(dev)->shost_gendev);
+ }
+ 
+-static struct class shost_class = {
++struct class shost_class = {
+ 	.name		= "scsi_host",
+ 	.dev_release	= scsi_host_cls_release,
+ };
+diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
+--- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
+@@ -36,13 +36,9 @@
+ #include <linux/fs.h>
+ #include <linux/time.h>
+ #include <linux/backing-dev.h>
++#include <linux/pnfs_osd_xdr.h>
+ #include "common.h"
+ 
+-/* FIXME: Remove once pnfs hits mainline
+- * #include <linux/exportfs/pnfs_osd_xdr.h>
+- */
+-#include "pnfs.h"
+-
+ #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
+ 
+ #ifdef CONFIG_EXOFS_DEBUG
+@@ -103,6 +99,7 @@ struct exofs_sb_info {
+ struct exofs_i_info {
+ 	struct inode   vfs_inode;          /* normal in-memory inode          */
+ 	wait_queue_head_t i_wq;            /* wait queue for inode            */
++	spinlock_t     i_layout_lock;      /* lock for layout/return/recall   */
+ 	unsigned long  i_flags;            /* various atomic flags            */
+ 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
+ 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
+@@ -166,6 +163,9 @@ static inline unsigned exofs_io_state_si
+  */
+ #define OBJ_2BCREATED	0	/* object will be created soon*/
+ #define OBJ_CREATED	1	/* object has been created on the osd*/
++/* Below are not used atomic but reuse the same i_flags */
++#define OBJ_LAYOUT_IS_GIVEN  2  /* inode has given layouts to clients*/
++#define OBJ_IN_LAYOUT_RECALL 3  /* inode is in the middle of a layout recall*/
+ 
+ static inline int obj_2bcreated(struct exofs_i_info *oi)
+ {
+@@ -304,4 +304,20 @@ extern const struct inode_operations exo
+ extern const struct inode_operations exofs_symlink_inode_operations;
+ extern const struct inode_operations exofs_fast_symlink_inode_operations;
+ 
++/* export.c */
++typedef int (exofs_recall_fn)(struct inode *inode);
++#ifdef CONFIG_PNFSD
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo);
++void exofs_init_export(struct super_block *sb);
++#else
++static inline int exofs_inode_recall_layout(struct inode *inode,
++				enum pnfs_iomode iomode, exofs_recall_fn todo)
++{
++	return todo(inode);
++}
++
++static inline void exofs_init_export(struct super_block *sb) {}
++#endif
++
+ #endif
+diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
+@@ -0,0 +1,396 @@
++/*
++ * export.c - Implementation of the pnfs_export_operations
++ *
++ * Copyright (C) 2009 Panasas Inc.
++ * All rights reserved.
++ *
++ * Boaz Harrosh <bharrosh@panasas.com>
++ *
++ * This file is part of exofs.
++ *
++ * exofs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation.  Since it is based on ext2, and the only
++ * valid version of GPL for the Linux kernel is version 2, the only valid
++ * version of GPL for exofs is version 2.
++ *
++ * exofs is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with exofs; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include "exofs.h"
++
++static int exofs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_OSD2_OBJECTS;
++}
++
++static void set_dev_id(struct pnfs_deviceid *pnfs_devid, u64 sbid, u64 devid)
++{
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)pnfs_devid;
++
++	dev_id->sbid  = sbid;
++	dev_id->devid = devid;
++}
++
++static int cb_layout_recall(struct inode *inode, enum pnfs_iomode iomode,
++			    u64 offset, u64 length, void *cookie)
++{
++	struct nfsd4_pnfs_cb_layout cbl;
++	struct pnfsd_cb_ctl cb_ctl;
++	int status;
++
++	memset(&cb_ctl, 0, sizeof(cb_ctl));
++	status = pnfsd_get_cb_op(&cb_ctl);
++	if (unlikely(status)) {
++		EXOFS_ERR("%s: nfsd unloaded!! inode (0x%lx) status=%d\n",
++			  __func__, inode->i_ino, status);
++		goto err;
++	}
++
++	memset(&cbl, 0, sizeof(cbl));
++	cbl.cbl_recall_type = RETURN_FILE;
++	cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
++	cbl.cbl_seg.iomode = iomode;
++	cbl.cbl_seg.offset = offset;
++	cbl.cbl_seg.length = length;
++	cbl.cbl_cookie = cookie;
++
++	status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
++	pnfsd_put_cb_op(&cb_ctl);
++
++err:
++	return status;
++}
++
++static enum nfsstat4 exofs_layout_get(
++	struct inode *inode,
++	struct exp_xdr_stream *xdr,
++	const struct nfsd4_pnfs_layoutget_arg *args,
++	struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
++	struct exofs_layout *el = &sbi->layout;
++	struct pnfs_osd_object_cred *creds = NULL;
++	struct pnfs_osd_layout layout;
++	__be32 *start;
++	bool in_recall;
++	int i, err;
++	enum nfsstat4 nfserr;
++
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	res->lg_seg.iomode = IOMODE_RW;
++	res->lg_return_on_close = true; /* TODO: unused but will be soon */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	creds = kcalloc(el->s_numdevs, sizeof(*creds), GFP_KERNEL);
++	if (!creds) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto out;
++	}
++
++	/* Fill in a pnfs_osd_layout struct */
++	layout.olo_map = sbi->data_map;
++
++	for (i = 0; i < el->s_numdevs; i++) {
++		struct pnfs_osd_object_cred *cred = &creds[i];
++		osd_id id = exofs_oi_objno(oi);
++		unsigned dev = exofs_layout_od_id(el, id, i);
++
++		set_dev_id(&cred->oc_object_id.oid_device_id, args->lg_sbid,
++			   dev);
++		cred->oc_object_id.oid_partition_id = el->s_pid;
++		cred->oc_object_id.oid_object_id = id;
++		cred->oc_osd_version = osd_dev_is_ver1(el->s_ods[dev]) ?
++						PNFS_OSD_VERSION_1 :
++						PNFS_OSD_VERSION_2;
++		cred->oc_cap_key_sec = PNFS_OSD_CAP_KEY_SEC_NONE;
++
++		cred->oc_cap_key.cred_len	= 0;
++		cred->oc_cap_key.cred		= NULL;
++
++		cred->oc_cap.cred_len	= OSD_CAP_LEN;
++		cred->oc_cap.cred	= oi->i_cred;
++	}
++
++	layout.olo_comps_index = 0;
++	layout.olo_num_comps = el->s_numdevs;
++	layout.olo_comps = creds;
++
++	err = pnfs_osd_xdr_encode_layout(xdr, &layout);
++	if (err) {
++		nfserr = NFS4ERR_TOOSMALL; /* FIXME: Change osd_xdr error codes */
++		goto out;
++	}
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	if (!in_recall) {
++		__set_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		nfserr = NFS4_OK;
++	} else {
++		nfserr = NFS4ERR_RECALLCONFLICT;
++	}
++	spin_unlock(&oi->i_layout_lock);
++
++out:
++	kfree(creds);
++	EXOFS_DBGMSG("(0x%lx) nfserr=%u xdr_bytes=%zu\n",
++		     inode->i_ino, nfserr, exp_xdr_qbytes(xdr->p - start));
++	return nfserr;
++}
++
++/* NOTE: inode mutex must NOT be held */
++static int exofs_layout_commit(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutcommit_arg *args,
++	struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct timespec mtime;
++	loff_t i_size;
++	int in_recall;
++
++	/* In case of a recall we ignore the new size and mtime since they
++	 * are going to be changed again by truncate, and since we cannot take
++	 * the inode lock in that case.
++	 */
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	if (in_recall) {
++		EXOFS_DBGMSG("(0x%lx) commit was called during recall\n",
++			     inode->i_ino);
++		return 0;
++	}
++
++	/* NOTE: I would love to call inode_setattr here
++	 *	 but i cannot since this will cause an eventual vmtruncate,
++	 *	 which will cause a layout_recall. So open code the i_size
++	 *	 and mtime/atime changes under i_mutex.
++	 */
++	mutex_lock_nested(&inode->i_mutex, I_MUTEX_NORMAL);
++
++	if (args->lc_mtime.seconds) {
++		mtime.tv_sec = args->lc_mtime.seconds;
++		mtime.tv_nsec = args->lc_mtime.nseconds;
++
++		/* layout commit may only make time bigger, since there might
++		 * be reordering of the notifications and it might arrive after
++		 * A local change.
++		 * TODO: if mtime > ctime then we know set_attr did an mtime
++		 * in the future. and we can let this update through
++		 */
++		if (0 <= timespec_compare(&mtime, &inode->i_mtime))
++			mtime = inode->i_mtime;
++	} else {
++		mtime = current_fs_time(inode->i_sb);
++	}
++
++	/* TODO: Will below work? since mark_inode_dirty has it's own
++	 *       Time handling
++	 */
++	inode->i_atime = inode->i_mtime = mtime;
++
++	i_size = i_size_read(inode);
++	if (args->lc_newoffset) {
++		loff_t new_size = args->lc_last_wr + 1;
++
++		if (i_size < new_size) {
++			i_size_write(inode, i_size = new_size);
++			res->lc_size_chg = 1;
++			res->lc_newsize = new_size;
++		}
++	}
++	/* TODO: else { i_size = osd_get_object_length() } */
++
++/* TODO: exofs does not currently use the osd_xdr part of the layout_commit */
++
++	mark_inode_dirty_sync(inode);
++
++	mutex_unlock(&inode->i_mutex);
++	EXOFS_DBGMSG("(0x%lx) i_size=0x%llx lcp->off=0x%llx\n",
++		     inode->i_ino, i_size, args->lc_last_wr);
++	return 0;
++}
++
++static void exofs_handle_error(struct pnfs_osd_ioerr *ioerr)
++{
++	EXOFS_ERR("exofs_handle_error: errno=%d is_write=%d obj=0x%llx "
++		  "offset=0x%llx length=0x%llx\n",
++		  ioerr->oer_errno, ioerr->oer_iswrite,
++		  _LLU(ioerr->oer_component.oid_object_id),
++		  _LLU(ioerr->oer_comp_offset),
++		  _LLU(ioerr->oer_comp_length));
++}
++
++static int exofs_layout_return(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	__be32 *p = args->lrf_body;
++	unsigned len = exp_xdr_qwords(args->lrf_body_len);
++
++	EXOFS_DBGMSG("(0x%lx) cookie %p xdr_len %d\n",
++		     inode->i_ino, args->lr_cookie, len);
++
++	while (len >= pnfs_osd_ioerr_xdr_sz()) {
++		struct pnfs_osd_ioerr ioerr;
++
++		p = pnfs_osd_xdr_decode_ioerr(&ioerr, p);
++		len -= pnfs_osd_ioerr_xdr_sz();
++		exofs_handle_error(&ioerr);
++	}
++
++	if (args->lr_cookie) {
++		struct exofs_i_info *oi = exofs_i(inode);
++		bool in_recall;
++
++		spin_lock(&oi->i_layout_lock);
++		in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++		__clear_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		spin_unlock(&oi->i_layout_lock);
++
++		/* TODO: how to communicate cookie with the waiter */
++		if (in_recall)
++			wake_up(&oi->i_wq); /* wakeup any recalls */
++	}
++
++	return 0;
++}
++
++int exofs_get_device_info(struct super_block *sb, struct exp_xdr_stream *xdr,
++			  u32 layout_type,
++			  const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct exofs_sb_info *sbi = sb->s_fs_info;
++	struct pnfs_osd_deviceaddr devaddr;
++	const struct osd_dev_info *odi;
++	u64 devno = devid->devid;
++	__be32 *start;
++	int err;
++
++	memset(&devaddr, 0, sizeof(devaddr));
++
++	if (unlikely(devno >= sbi->layout.s_numdevs))
++		return -ENODEV;
++
++	odi = osduld_device_info(sbi->layout.s_ods[devno]);
++
++	devaddr.oda_systemid.len = odi->systemid_len;
++	devaddr.oda_systemid.data = (void *)odi->systemid; /* !const cast */
++
++	devaddr.oda_osdname.len = odi->osdname_len ;
++	devaddr.oda_osdname.data = (void *)odi->osdname;/* !const cast */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		err = -E2BIG;
++		goto err;
++	}
++
++	err = pnfs_osd_xdr_encode_deviceaddr(xdr, &devaddr);
++	if (err)
++		goto err;
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	EXOFS_DBGMSG("xdr_bytes=%Zu devno=%lld osdname-%s\n",
++		     exp_xdr_qbytes(xdr->p - start), devno, odi->osdname);
++	return 0;
++
++err:
++	EXOFS_DBGMSG("Error: err=%d at_byte=%zu\n",
++		     err, exp_xdr_qbytes(xdr->p - start));
++	return err;
++}
++
++struct pnfs_export_operations exofs_pnfs_ops = {
++	.layout_type	= exofs_layout_type,
++	.layout_get	= exofs_layout_get,
++	.layout_commit	= exofs_layout_commit,
++	.layout_return	= exofs_layout_return,
++	.get_device_info = exofs_get_device_info,
++};
++
++static bool is_layout_returned(struct exofs_i_info *oi)
++{
++	bool layout_given;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	return !layout_given;
++}
++
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	int layout_given;
++	int error = 0;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	__set_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	if (!layout_given)
++		goto exec;
++
++	for (;;) {
++		EXOFS_DBGMSG("(0x%lx) has_layout issue a recall\n",
++			     inode->i_ino);
++		error = cb_layout_recall(inode, iomode, 0, NFS4_MAX_UINT64,
++					 &oi->i_wq);
++		switch (error) {
++		case 0:
++		case -EAGAIN:
++			break;
++		case -ENOENT:
++			goto exec;
++		default:
++			goto err;
++		}
++
++		error = wait_event_interruptible(oi->i_wq,
++						 is_layout_returned(oi));
++		if (error)
++			goto err;
++	}
++
++exec:
++	error = todo(inode);
++
++err:
++	spin_lock(&oi->i_layout_lock);
++	__clear_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	EXOFS_DBGMSG("(0x%lx) return=>%d\n", inode->i_ino, error);
++	return error;
++}
++
++void exofs_init_export(struct super_block *sb)
++{
++	sb->s_pnfs_op = &exofs_pnfs_ops;
++}
+diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
+--- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
+@@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
+ 	if (unlikely(wait_obj_created(oi)))
+ 		goto fail;
+ 
+-	ret = _do_truncate(inode);
++	ret = exofs_inode_recall_layout(inode, IOMODE_ANY, _do_truncate);
+ 	if (ret)
+ 		goto fail;
+ 
+@@ -964,6 +964,7 @@ static void __oi_init(struct exofs_i_inf
+ {
+ 	init_waitqueue_head(&oi->i_wq);
+ 	oi->i_flags = 0;
++	spin_lock_init(&oi->i_layout_lock);
+ }
+ /*
+  * Fill in an inode read from the OSD and set it up for use
+diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
+--- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
+@@ -13,4 +13,5 @@
+ #
+ 
+ exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
++exofs-$(CONFIG_PNFSD) +=  export.o
+ obj-$(CONFIG_EXOFS_FS) += exofs.o
+diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
+--- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
+@@ -1,6 +1,7 @@
+ config EXOFS_FS
+ 	tristate "exofs: OSD based file system support"
+ 	depends on SCSI_OSD_ULD
++	select EXPORTFS_OSD_LAYOUT if PNFSD
+ 	help
+ 	  EXOFS is a file system that uses an OSD storage device,
+ 	  as its backing storage.
+diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
+--- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
+@@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
+ 	sb->s_fs_info = sbi;
+ 	sb->s_op = &exofs_sops;
+ 	sb->s_export_op = &exofs_export_ops;
++	exofs_init_export(sb);
+ 	root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
+ 	if (IS_ERR(root)) {
+ 		EXOFS_ERR("ERROR: exofs_iget failed\n");
+diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
+--- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
+@@ -16,6 +16,13 @@
+ #include <linux/namei.h>
+ #include <linux/sched.h>
+ 
++#if defined(CONFIG_PNFSD)
++struct pnfsd_cb_ctl pnfsd_cb_ctl = {
++	.lock = __SPIN_LOCK_UNLOCKED(pnfsd_cb_ctl.lock)
++};
++EXPORT_SYMBOL(pnfsd_cb_ctl);
++#endif /* CONFIG_PNFSD */
++
+ #define dprintk(fmt, args...) do{}while(0)
+ 
+ 
+diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
+--- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
+@@ -3,4 +3,7 @@
+ 
+ obj-$(CONFIG_EXPORTFS) += exportfs.o
+ 
+-exportfs-objs := expfs.o
++exportfs-y				:= expfs.o
++exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT)	+= nfs4filelayoutxdr.o
++exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
++exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+@@ -0,0 +1,158 @@
++/*
++ *  linux/fs/nfsd/nfs4blocklayoutxdr.c
++ *
++ *
++ *  Created by Rick McNeal on 3/31/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++#include <linux/module.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++static int
++bl_encode_simple(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  12 + 4 + bld->u.simple.bld_sig_len);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u32(p, 1);
++	p = exp_xdr_encode_u64(p, bld->u.simple.bld_offset);
++	exp_xdr_encode_opaque(p, bld->u.simple.bld_sig,
++			      bld->u.simple.bld_sig_len);
++
++	return 0;
++}
++
++static int
++bl_encode_slice(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2 + 2 + 1);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_start);
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_len);
++	exp_xdr_encode_u32(p, bld->u.slice.bld_index);
++
++	return 0;
++}
++
++static int
++bl_encode_concat(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	return -ENOTSUPP;
++}
++
++static int
++bl_encode_stripe(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	int i;
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  2 + 1 + bld->u.stripe.bld_stripes);
++
++	p = exp_xdr_encode_u64(p, bld->u.stripe.bld_chunk_size);
++	p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripes);
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++)
++		p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripe_indexs[i]);
++
++	return 0;
++}
++
++int
++blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			   const struct list_head *volumes)
++{
++	u32				num_vols	= 0,
++					*layoutlen_p	= xdr->p;
++	pnfs_blocklayout_devinfo_t	*bld;
++	int				status		= 0;
++	__be32 *p;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -ETOOSMALL;
++	p += 2;
++
++	/*
++	 * All simple volumes with their signature are required to be listed
++	 * first.
++	 */
++	list_for_each_entry(bld, volumes, bld_list) {
++		num_vols++;
++		p = exp_xdr_reserve_qwords(xdr, 1);
++		if (!p)
++			return -ETOOSMALL;
++		p = exp_xdr_encode_u32(p, bld->bld_type);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				status = bl_encode_simple(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_SLICE:
++				status = bl_encode_slice(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				status = bl_encode_concat(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				status = bl_encode_stripe(xdr, bld);
++				break;
++			default:
++				BUG();
++		}
++		if (status)
++			goto error;
++	}
++
++	/* ---- Fill in the overall length and number of volumes ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (xdr->p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, num_vols);
++
++error:
++	return status;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_devinfo);
++
++enum nfsstat4
++blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++			  const struct list_head *bl_head)
++{
++	struct pnfs_blocklayout_layout	*b;
++	u32				*layoutlen_p	= xdr->p,
++					extents		= 0;
++	__be32 *p;
++
++	/*
++	 * Save spot for opaque block layout length and number of extents,
++	 * fill-in later.
++	 */
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return NFS4ERR_TOOSMALL;
++	p += 2;
++
++	list_for_each_entry(b, bl_head, bll_list) {
++		extents++;
++		p = exp_xdr_reserve_qwords(xdr, 5 * 2 + 1);
++		if (!p)
++			return NFS4ERR_TOOSMALL;
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.sbid);
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.devid);
++		p = exp_xdr_encode_u64(p, b->bll_foff);
++		p = exp_xdr_encode_u64(p, b->bll_len);
++		p = exp_xdr_encode_u64(p, b->bll_soff);
++		p = exp_xdr_encode_u32(p, b->bll_es);
++	}
++
++	/* ---- Fill in the overall length and number of extents ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, extents);
++
++	return NFS4_OK;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+@@ -0,0 +1,218 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/exp_xdr.h>
++#include <linux/module.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++/* We do our-own dprintk so filesystems are not dependent on sunrpc */
++#ifdef dprintk
++#undef dprintk
++#endif
++#define dprintk(fmt, args, ...)	do { } while (0)
++
++/* Calculate the XDR length of the GETDEVICEINFO4resok structure
++ * excluding the gdir_notification and the gdir_device_addr da_layout_type.
++ */
++static int fl_devinfo_xdr_words(const struct pnfs_filelayout_device *fdev)
++{
++	struct pnfs_filelayout_devaddr *fl_addr;
++	struct pnfs_filelayout_multipath *mp;
++	int i, j, nwords;
++
++	/* da_addr_body length, indice length, indices,
++	 * multipath_list4 length */
++	nwords = 1 + 1 + fdev->fl_stripeindices_length + 1;
++	for (i = 0; i < fdev->fl_device_length; i++) {
++		mp = &fdev->fl_device_list[i];
++		nwords++; /* multipath list length */
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			fl_addr = mp->fl_multipath_list;
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_netid.len);
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_addr.len);
++		}
++	}
++	dprintk("<-- %s nwords %d\n", __func__, nwords);
++	return nwords;
++}
++
++/* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++int
++filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			  const struct pnfs_filelayout_device *fdev)
++{
++	unsigned int i, j, len = 0, opaque_words;
++	u32 *p_in;
++	u32 index_count = fdev->fl_stripeindices_length;
++	u32 dev_count = fdev->fl_device_length;
++	int error = 0;
++	__be32 *p;
++
++	opaque_words = fl_devinfo_xdr_words(fdev);
++	dprintk("%s: Begin indx_cnt: %u dev_cnt: %u total size %u\n",
++		__func__,
++		index_count,
++		dev_count,
++		opaque_words*4);
++
++	/* check space for opaque length */
++	p = p_in = exp_xdr_reserve_qwords(xdr, opaque_words);
++	if (!p) {
++		error =  -ETOOSMALL;
++		goto out;
++	}
++
++	/* Fill in length later */
++	p++;
++
++	/* encode device list indices */
++	p = exp_xdr_encode_u32(p, index_count);
++	for (i = 0; i < index_count; i++)
++		p = exp_xdr_encode_u32(p, fdev->fl_stripeindices_list[i]);
++
++	/* encode device list */
++	p = exp_xdr_encode_u32(p, dev_count);
++	for (i = 0; i < dev_count; i++) {
++		struct pnfs_filelayout_multipath *mp = &fdev->fl_device_list[i];
++
++		p = exp_xdr_encode_u32(p, mp->fl_multipath_length);
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			struct pnfs_filelayout_devaddr *da =
++						&mp->fl_multipath_list[j];
++
++			/* Encode device info */
++			p = exp_xdr_encode_opaque(p, da->r_netid.data,
++						     da->r_netid.len);
++			p = exp_xdr_encode_opaque(p, da->r_addr.data,
++						     da->r_addr.len);
++		}
++	}
++
++	/* backfill in length. Subtract 4 for da_addr_body size */
++	len = (char *)p - (char *)p_in;
++	exp_xdr_encode_u32(p_in, len - 4);
++
++	error = 0;
++out:
++	dprintk("%s: End err %d xdrlen %d\n",
++		__func__, error, len);
++	return error;
++}
++EXPORT_SYMBOL(filelayout_encode_devinfo);
++
++/* Encodes the loc_body structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++enum nfsstat4
++filelayout_encode_layout(struct exp_xdr_stream *xdr,
++			 const struct pnfs_filelayout_layout *flp)
++{
++	u32 len = 0, nfl_util, fhlen, i;
++	u32 *layoutlen_p;
++	enum nfsstat4 nfserr;
++	__be32 *p;
++
++	dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
++		__func__,
++		flp->device_id.pnfs_fsid,
++		flp->device_id.pnfs_devid,
++		flp->lg_first_stripe_index,
++		flp->lg_fh_length);
++
++	/* Ensure file system added at least one file handle */
++	if (flp->lg_fh_length <= 0) {
++		dprintk("%s: File Layout has no file handles!!\n", __func__);
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto out;
++	}
++
++	/* Ensure room for len, devid, util, first_stripe_index,
++	 * pattern_offset, number of filehandles */
++	p = layoutlen_p = exp_xdr_reserve_qwords(xdr, 1+2+2+1+1+2+1);
++	if (!p) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	/* save spot for opaque file layout length, fill-in later*/
++	p++;
++
++	/* encode device id */
++	p = exp_xdr_encode_u64(p, flp->device_id.sbid);
++	p = exp_xdr_encode_u64(p, flp->device_id.devid);
++
++	/* set and encode flags */
++	nfl_util = flp->lg_stripe_unit;
++	if (flp->lg_commit_through_mds)
++		nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
++	if (flp->lg_stripe_type == STRIPE_DENSE)
++		nfl_util |= NFL4_UFLG_DENSE;
++	p = exp_xdr_encode_u32(p, nfl_util);
++
++	/* encode first stripe index */
++	p = exp_xdr_encode_u32(p, flp->lg_first_stripe_index);
++
++	/* encode striping pattern start */
++	p = exp_xdr_encode_u64(p, flp->lg_pattern_offset);
++
++	/* encode number of file handles */
++	p = exp_xdr_encode_u32(p, flp->lg_fh_length);
++
++	/* encode file handles */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		fhlen = flp->lg_fh_list[i].fh_size;
++		p = exp_xdr_reserve_space(xdr, 4 + fhlen);
++		if (!p) {
++			nfserr = NFS4ERR_TOOSMALL;
++			goto out;
++		}
++		p = exp_xdr_encode_opaque(p, &flp->lg_fh_list[i].fh_base, fhlen);
++	}
++
++	/* Set number of bytes encoded =  total_bytes_encoded - length var */
++	len = (char *)p - (char *)layoutlen_p;
++	exp_xdr_encode_u32(layoutlen_p, len - 4);
++
++	nfserr = NFS4_OK;
++out:
++	dprintk("%s: End err %u xdrlen %d\n",
++		__func__, nfserr, len);
++	return nfserr;
++}
++EXPORT_SYMBOL(filelayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
+@@ -0,0 +1,289 @@
++/*
++ *  pnfs_osd_xdr_enc.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static int pnfs_osd_xdr_encode_data_map(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_data_map *data_map)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 1+2+1+1+1+1);
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, data_map->odm_num_comps);
++	p = exp_xdr_encode_u64(p, data_map->odm_stripe_unit);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_width);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_depth);
++	p = exp_xdr_encode_u32(p, data_map->odm_mirror_cnt);
++	p = exp_xdr_encode_u32(p, data_map->odm_raid_algorithm);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline int pnfs_osd_xdr_encode_objid(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_objid *object_id)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2+2+2+2);
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)&object_id->oid_device_id;
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u64(p, dev_id->sbid);
++	p = exp_xdr_encode_u64(p, dev_id->devid);
++	p = exp_xdr_encode_u64(p, object_id->oid_partition_id);
++	p = exp_xdr_encode_u64(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * enum pnfs_osd_cap_key_sec4 {
++ * 	PNFS_OSD_CAP_KEY_SEC_NONE = 0,
++ * 	PNFS_OSD_CAP_KEY_SEC_SSV  = 1
++ * };
++ *
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static int pnfs_osd_xdr_encode_object_cred(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_object_cred *olo_comp)
++{
++	__be32 *p;
++	int err;
++
++	err = pnfs_osd_xdr_encode_objid(xdr, &olo_comp->oc_object_id);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_space(xdr, 3*4 + 4+olo_comp->oc_cap.cred_len);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, olo_comp->oc_osd_version);
++
++	/* No sec for now */
++	p = exp_xdr_encode_u32(p, PNFS_OSD_CAP_KEY_SEC_NONE);
++	p = exp_xdr_encode_u32(p, 0); /* opaque oc_capability_key<> */
++
++	exp_xdr_encode_opaque(p, olo_comp->oc_cap.cred,
++			      olo_comp->oc_cap.cred_len);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_layout {
++ * 	struct pnfs_osd_data_map	olo_map;
++ * 	u32				olo_comps_index;
++ * 	u32				olo_num_comps;
++ * 	struct pnfs_osd_object_cred	*olo_comps;
++ * };
++ */
++int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *pol)
++{
++	__be32 *p;
++	u32 i;
++	int err;
++
++	err = pnfs_osd_xdr_encode_data_map(xdr, &pol->olo_map);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, pol->olo_comps_index);
++	p = exp_xdr_encode_u32(p, pol->olo_num_comps);
++
++	for (i = 0; i < pol->olo_num_comps; i++) {
++		err = pnfs_osd_xdr_encode_object_cred(xdr, &pol->olo_comps[i]);
++		if (err)
++			return err;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_layout);
++
++static int _encode_string(struct exp_xdr_stream *xdr,
++			  const struct nfs4_string *str)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr, 4 + str->len);
++
++	if (!p)
++		return -E2BIG;
++	exp_xdr_encode_opaque(p, str->data, str->len);
++	return 0;
++}
++
++/* struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr)
++{
++	__be32 *p;
++	int err;
++
++	p = exp_xdr_reserve_space(xdr, 4 + 4 + sizeof(devaddr->oda_lun));
++	if (!p)
++		return -E2BIG;
++
++	/* Empty oda_targetid */
++	p = exp_xdr_encode_u32(p, OBJ_TARGET_ANON);
++
++	/* Empty oda_targetaddr for now */
++	p = exp_xdr_encode_u32(p, 0);
++
++	/* oda_lun */
++	exp_xdr_encode_bytes(p, devaddr->oda_lun, sizeof(devaddr->oda_lun));
++
++	err = _encode_string(xdr, &devaddr->oda_systemid);
++	if (err)
++		return err;
++
++	err = pnfs_osd_xdr_encode_object_cred(xdr,
++					      &devaddr->oda_root_obj_cred);
++	if (err)
++		return err;
++
++	err = _encode_string(xdr, &devaddr->oda_osdname);
++	if (err)
++		return err;
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_deviceaddr);
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p)
++{
++	lou->dsu_valid = be32_to_cpu(*p++);
++	if (lou->dsu_valid)
++		p = xdr_decode_hyper(p, &lou->dsu_delta);
++	lou->olu_ioerr_flag = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_layoutupdate);
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline __be32 *
++pnfs_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
++{
++	/* FIXME: p = xdr_decode_fixed(...) */
++	memcpy(objid->oid_device_id.data, p, sizeof(objid->oid_device_id.data));
++	p += XDR_QUADLEN(sizeof(objid->oid_device_id.data));
++
++	p = xdr_decode_hyper(p, &objid->oid_partition_id);
++	p = xdr_decode_hyper(p, &objid->oid_object_id);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p)
++{
++	p = pnfs_osd_xdr_decode_objid(p, &ioerr->oer_component);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_offset);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_length);
++	ioerr->oer_iswrite = be32_to_cpu(*p++);
++	ioerr->oer_errno = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
+diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
+--- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
+@@ -19,6 +19,7 @@
+ #include <linux/gfs2_ondisk.h>
+ #include <linux/slow-work.h>
+ #include <linux/quotaops.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "gfs2.h"
+ #include "incore.h"
+@@ -1146,6 +1147,9 @@ static int fill_super(struct super_block
+ 	sb->s_magic = GFS2_MAGIC;
+ 	sb->s_op = &gfs2_super_ops;
+ 	sb->s_export_op = &gfs2_export_ops;
++#if defined(CONFIG_PNFSD)
++	sb->s_pnfs_op = &pnfs_dlm_export_ops;
++#endif /* CONFIG_PNFSD */
+ 	sb->s_xattr = gfs2_xattr_handlers;
+ 	sb->s_qcop = &gfs2_quotactl_ops;
+ 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
+--- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
+@@ -224,6 +224,31 @@ config LOCKD_V4
+ config EXPORTFS
+ 	tristate
+ 
++config EXPORTFS_FILE_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 files layout type.
++	  Must be automatically selected by supporting filesystems.
++
++config EXPORTFS_OSD_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 objects layout type.
++	  Must be automatically selected by supporting osd
++	  filesystems.
++
++	  If unsure, say N.
++
++config EXPORTFS_BLOCK_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 blocks layout type.
++	  Must be automatically selected by supporting filesystems.
++
++
+ config NFS_ACL_SUPPORT
+ 	tristate
+ 	select FS_POSIX_ACL
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
+@@ -0,0 +1,66 @@
++#include <linux/module.h>
++#include <linux/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/sched.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY NFSDBG_PNFS_LD
++
++struct pipefs_list bl_device_list;
++struct dentry *bl_device_pipe;
++
++ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t len)
++{
++	int err;
++	struct pipefs_hdr *msg;
++
++	dprintk("Entering %s...\n", __func__);
++
++	msg = pipefs_readmsg(filp, src, len);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: unable to read pipefs message.\n");
++		return PTR_ERR(msg);
++	}
++
++	/* now assign the result, which wakes the blocked thread */
++	err = pipefs_assign_upcall_reply(msg, &bl_device_list);
++	if (err) {
++		dprintk("ERROR: failed to assign upcall with id %u\n",
++			msg->msgid);
++		kfree(msg);
++	}
++	return len;
++}
++
++static const struct rpc_pipe_ops bl_pipe_ops = {
++	.upcall         = pipefs_generic_upcall,
++	.downcall       = bl_pipe_downcall,
++	.destroy_msg    = pipefs_generic_destroy_msg,
++};
++
++int bl_pipe_init(void)
++{
++	dprintk("%s: block_device pipefs registering...\n", __func__);
++	bl_device_pipe = pipefs_mkpipe("bl_device_pipe", &bl_pipe_ops, 1);
++	if (IS_ERR(bl_device_pipe))
++		dprintk("ERROR, unable to make block_device pipe\n");
++
++	if (!bl_device_pipe)
++		dprintk("bl_device_pipe is NULL!\n");
++	else
++	dprintk("bl_device_pipe created!\n");
++	pipefs_init_list(&bl_device_list);
++	return 0;
++}
++
++void bl_pipe_exit(void)
++{
++	dprintk("%s: block_device pipefs unregistering...\n", __func__);
++	if (IS_ERR(bl_device_pipe))
++		return ;
++	pipefs_closepipe(bl_device_pipe);
++	return;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
+@@ -0,0 +1,1160 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++
++#include <linux/buffer_head.h> /* various write calls */
++#include <linux/bio.h> /* struct bio */
++#include <linux/vmalloc.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
++
++/* Callback operations to the pNFS client */
++static struct pnfs_client_operations *pnfs_block_callback_ops;
++
++static void print_page(struct page *page)
++{
++	dprintk("PRINTPAGE page %p\n", page);
++	dprintk("        PagePrivate %d\n", PagePrivate(page));
++	dprintk("        PageUptodate %d\n", PageUptodate(page));
++	dprintk("        PageError %d\n", PageError(page));
++	dprintk("        PageDirty %d\n", PageDirty(page));
++	dprintk("        PageReferenced %d\n", PageReferenced(page));
++	dprintk("        PageLocked %d\n", PageLocked(page));
++	dprintk("        PageWriteback %d\n", PageWriteback(page));
++	dprintk("        PageMappedToDisk %d\n", PageMappedToDisk(page));
++	dprintk("\n");
++}
++
++/* Given the be associated with isect, determine if page data needs to be
++ * initialized.
++ */
++static int is_hole(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_NONE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return !is_sector_initialized(be->be_inval, isect);
++}
++
++/* Given the be associated with isect, determine if page data can be
++ * written to disk.
++ */
++static int is_writable(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return is_sector_initialized(be->be_inval, isect);
++}
++
++static int
++dont_like_caller(struct nfs_page *req)
++{
++	if (atomic_read(&req->wb_complete)) {
++		/* Called by _multi */
++		return 1;
++	} else {
++		/* Called by _one */
++		return 0;
++	}
++}
++
++static enum pnfs_try_status
++bl_commit(struct nfs_write_data *nfs_data,
++	  int sync)
++{
++	dprintk("%s enter\n", __func__);
++	return PNFS_NOT_ATTEMPTED;
++}
++
++/* The data we are handed might be spread across several bios.  We need
++ * to track when the last one is finished.
++ */
++struct parallel_io {
++	struct kref refcnt;
++	struct rpc_call_ops call_ops;
++	void (*pnfs_callback) (void *data);
++	void *data;
++};
++
++static inline struct parallel_io *alloc_parallel(void *data)
++{
++	struct parallel_io *rv;
++
++	rv  = kmalloc(sizeof(*rv), GFP_KERNEL);
++	if (rv) {
++		rv->data = data;
++		kref_init(&rv->refcnt);
++	}
++	return rv;
++}
++
++static inline void get_parallel(struct parallel_io *p)
++{
++	kref_get(&p->refcnt);
++}
++
++static void destroy_parallel(struct kref *kref)
++{
++	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
++
++	dprintk("%s enter\n", __func__);
++	p->pnfs_callback(p->data);
++	kfree(p);
++}
++
++static inline void put_parallel(struct parallel_io *p)
++{
++	kref_put(&p->refcnt, destroy_parallel);
++}
++
++static struct bio *
++bl_submit_bio(int rw, struct bio *bio)
++{
++	if (bio) {
++		get_parallel(bio->bi_private);
++		dprintk("%s submitting %s bio %u@%llu\n", __func__,
++			rw == READ ? "read" : "write",
++			bio->bi_size, (u64)bio->bi_sector);
++		submit_bio(rw, bio);
++	}
++	return NULL;
++}
++
++static inline void
++bl_done_with_rpage(struct page *page, const int ok)
++{
++	if (ok) {
++		ClearPagePnfsErr(page);
++		SetPageUptodate(page);
++	} else {
++		ClearPageUptodate(page);
++		SetPageError(page);
++		SetPagePnfsErr(page);
++	}
++	/* Page is unlocked via rpc_release.  Should really be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_read(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_rpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++static void bl_read_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	pnfs_block_callback_ops->nfs_readlist_complete(rdata);
++}
++
++static void
++bl_end_par_io_read(void *data)
++{
++	struct nfs_read_data *rdata = data;
++
++	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
++	schedule_work(&rdata->task.u.tk_work);
++}
++
++/* We don't want normal .rpc_call_done callback used, so we replace it
++ * with this stub.
++ */
++static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
++{
++	return;
++}
++
++static enum pnfs_try_status
++bl_read_pagelist(struct nfs_read_data *rdata,
++		 unsigned nr_pages)
++{
++	int i, hole;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t f_offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct page **pages = rdata->args.pages;
++	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
++	       nr_pages, f_offset, count);
++
++	if (dont_like_caller(rdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		goto use_mds;
++	}
++	if ((nr_pages == 1) && PagePnfsErr(rdata->req->wb_page)) {
++		/* We want to fall back to mds in case of read_page
++		 * after error on read_pages.
++		 */
++		dprintk("%s PG_pnfserr set\n", __func__);
++		goto use_mds;
++	}
++	par = alloc_parallel(rdata);
++	if (!par)
++		goto use_mds;
++	par->call_ops = *rdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_read;
++	/* At this point, we can no longer jump to use_mds */
++
++	isect = (sector_t) (f_offset >> 9);
++	/* Code assumes extents are page-aligned */
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			put_extent(cow_read);
++			bio = bl_submit_bio(READ, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(rdata->pdata.lseg),
++					     isect, &cow_read);
++			if (!be) {
++				/* Error out this page */
++				bl_done_with_rpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++			if (cow_read) {
++				sector_t cow_length = cow_read->be_length -
++					(isect - cow_read->be_f_offset);
++				extent_length = min(extent_length, cow_length);
++			}
++		}
++		hole = is_hole(be, isect);
++		if (hole && !cow_read) {
++			bio = bl_submit_bio(READ, bio);
++			/* Fill hole w/ zeroes w/o accessing device */
++			dprintk("%s Zeroing page for hole\n", __func__);
++			zero_user(pages[i], 0,
++				  min_t(int, PAGE_CACHE_SIZE, count));
++			print_page(pages[i]);
++			bl_done_with_rpage(pages[i], 1);
++		} else {
++			struct pnfs_block_extent *be_read;
++
++			be_read = (hole && cow_read) ? cow_read : be;
++			for (;;) {
++				if (!bio) {
++					bio = bio_alloc(GFP_NOIO, nr_pages - i);
++					if (!bio) {
++						/* Error out this page */
++						bl_done_with_rpage(pages[i], 0);
++						break;
++					}
++					bio->bi_sector = isect -
++						be_read->be_f_offset +
++						be_read->be_v_offset;
++					bio->bi_bdev = be_read->be_mdev;
++					bio->bi_end_io = bl_end_io_read;
++					bio->bi_private = par;
++				}
++				if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++					break;
++				bio = bl_submit_bio(READ, bio);
++			}
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	if ((isect << 9) >= rdata->inode->i_size) {
++		rdata->res.eof = 1;
++		rdata->res.count = rdata->inode->i_size - f_offset;
++	} else {
++		rdata->res.count = (isect << 9) - f_offset;
++	}
++	put_extent(be);
++	put_extent(cow_read);
++	bl_submit_bio(READ, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++
++ use_mds:
++	dprintk("Giving up and using normal NFS\n");
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static void mark_extents_written(struct pnfs_block_layout *bl,
++				 __u64 offset, __u32 count)
++{
++	sector_t isect, end;
++	struct pnfs_block_extent *be;
++
++	dprintk("%s(%llu, %u)\n", __func__, offset, count);
++	if (count == 0)
++		return;
++	isect = (offset & (long)(PAGE_CACHE_MASK)) >> 9;
++	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
++	end >>= 9;
++	while (isect < end) {
++		sector_t len;
++		be = find_get_extent(bl, isect, NULL);
++		BUG_ON(!be); /* FIXME */
++		len = min(end, be->be_f_offset + be->be_length) - isect;
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			mark_for_commit(be, isect, len); /* What if fails? */
++		isect += len;
++		put_extent(be);
++	}
++}
++
++/* STUB - this needs thought */
++static inline void
++bl_done_with_wpage(struct page *page, const int ok)
++{
++	if (!ok) {
++		SetPageError(page);
++		SetPagePnfsErr(page);
++		/* This is an inline copy of nfs_zap_mapping */
++		/* This is oh so fishy, and needs deep thought */
++		if (page->mapping->nrpages != 0) {
++			struct inode *inode = page->mapping->host;
++			spin_lock(&inode->i_lock);
++			NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	/* end_page_writeback called in rpc_release.  Should be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_write(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_wpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++/* Function scheduled for call during bl_end_par_io_write,
++ * it marks sectors as written and extends the commitlist.
++ */
++static void bl_write_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	if (!wdata->task.tk_status) {
++		/* Marks for LAYOUTCOMMIT */
++		/* BUG - this should be called after each bio, not after
++		 * all finish, unless have some way of storing success/failure
++		 */
++		mark_extents_written(BLK_LSEG2EXT(wdata->pdata.lseg),
++				     wdata->args.offset, wdata->args.count);
++	}
++	pnfs_block_callback_ops->nfs_writelist_complete(wdata);
++}
++
++/* Called when last of bios associated with a bl_write_pagelist call finishes */
++static void
++bl_end_par_io_write(void *data)
++{
++	struct nfs_write_data *wdata = data;
++
++	/* STUB - ignoring error handling */
++	wdata->task.tk_status = 0;
++	wdata->verf.committed = NFS_FILE_SYNC;
++	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
++	schedule_work(&wdata->task.u.tk_work);
++}
++
++static enum pnfs_try_status
++bl_write_pagelist(struct nfs_write_data *wdata,
++		  unsigned nr_pages,
++		  int sync)
++{
++	int i;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t offset = wdata->args.offset;
++	size_t count = wdata->args.count;
++	struct page **pages = wdata->args.pages;
++	int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
++	if (!wdata->req->wb_lseg) {
++		dprintk("%s no lseg, falling back to MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	if (dont_like_caller(wdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
++	 * We want to write each, and if there is an error remove it from
++	 * list and call
++	 * nfs_retry_request(req) to have it redone using nfs.
++	 * QUEST? Do as block or per req?  Think have to do per block
++	 * as part of end_bio
++	 */
++	par = alloc_parallel(wdata);
++	if (!par)
++		return PNFS_NOT_ATTEMPTED;
++	par->call_ops = *wdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_write;
++	/* At this point, have to be more careful with error handling */
++
++	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> 9);
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			bio = bl_submit_bio(WRITE, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(wdata->pdata.lseg),
++					     isect, NULL);
++			if (!be || !is_writable(be, isect)) {
++				/* FIXME */
++				bl_done_with_wpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++		}
++		for (;;) {
++			if (!bio) {
++				bio = bio_alloc(GFP_NOIO, nr_pages - i);
++				if (!bio) {
++					/* Error out this page */
++					/* FIXME */
++					bl_done_with_wpage(pages[i], 0);
++					break;
++				}
++				bio->bi_sector = isect - be->be_f_offset +
++					be->be_v_offset;
++				bio->bi_bdev = be->be_mdev;
++				bio->bi_end_io = bl_end_io_write;
++				bio->bi_private = par;
++			}
++			if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++				break;
++			bio = bl_submit_bio(WRITE, bio);
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	wdata->res.count = (isect << 9) - (offset & (long)PAGE_CACHE_MASK);
++	put_extent(be);
++	bl_submit_bio(WRITE, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++}
++
++/* FIXME - range ignored */
++static void
++release_extents(struct pnfs_block_layout *bl,
++		struct nfs4_pnfs_layout_segment *range)
++{
++	int i;
++	struct pnfs_block_extent *be;
++
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		while (!list_empty(&bl->bl_extents[i])) {
++			be = list_first_entry(&bl->bl_extents[i],
++					      struct pnfs_block_extent,
++					      be_node);
++			list_del(&be->be_node);
++			put_extent(be);
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++}
++
++static void
++release_inval_marks(struct pnfs_inval_markings *marks)
++{
++	struct pnfs_inval_tracking *pos, *temp;
++
++	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
++		list_del(&pos->it_link);
++		kfree(pos);
++	}
++	return;
++}
++
++/* Note we are relying on caller locking to prevent nasty races. */
++static void
++bl_free_layout(struct pnfs_layout_type *lo)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++
++	dprintk("%s enter\n", __func__);
++	release_extents(bl, NULL);
++	release_inval_marks(&bl->bl_inval);
++	kfree(bl);
++}
++
++static struct pnfs_layout_type *
++bl_alloc_layout(struct inode *inode)
++{
++	struct pnfs_block_layout	*bl;
++
++	dprintk("%s enter\n", __func__);
++	bl = kzalloc(sizeof(*bl), GFP_KERNEL);
++	if (!bl)
++		return NULL;
++	spin_lock_init(&bl->bl_ext_lock);
++	INIT_LIST_HEAD(&bl->bl_extents[0]);
++	INIT_LIST_HEAD(&bl->bl_extents[1]);
++	INIT_LIST_HEAD(&bl->bl_commit);
++	bl->bl_count = 0;
++	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> 9;
++	INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
++	return &bl->bl_layout;
++}
++
++static void
++bl_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter\n", __func__);
++	kfree(lseg);
++}
++
++/* Because the generic infrastructure does not correctly merge layouts,
++ * we pretty much ignore lseg, and store all data layout wide, so we
++ * can correctly merge.  Eventually we should push some correct merge
++ * behavior up to the generic code, as the current behavior tends to
++ * cause lots of unnecessary overlapping LAYOUTGET requests.
++ */
++static struct pnfs_layout_segment *
++bl_alloc_lseg(struct pnfs_layout_type *lo,
++	      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct pnfs_layout_segment *lseg;
++	int status;
++
++	dprintk("%s enter\n", __func__);
++	lseg = kzalloc(sizeof(*lseg) + 0, GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++	status = nfs4_blk_process_layoutget(lo, lgr);
++	if (status) {
++		/* We don't want to call the full-blown bl_free_lseg,
++		 * since on error extents were not touched.
++		 */
++		/* STUB - we really want to distinguish between 2 error
++		 * conditions here.  This lseg failed, but lo data structures
++		 * are OK, or we hosed the lo data structures.  The calling
++		 * code probably needs to distinguish this too.
++		 */
++		kfree(lseg);
++		return ERR_PTR(status);
++	}
++	return lseg;
++}
++
++static int
++bl_setup_layoutcommit(struct pnfs_layout_type *lo,
++		      struct pnfs_layoutcommit_arg *arg)
++{
++	struct nfs_server *nfss = PNFS_NFS_SERVER(lo);
++	struct bl_layoutupdate_data *layoutupdate_data;
++
++	dprintk("%s enter\n", __func__);
++	/* Need to ensure commit is block-size aligned */
++	if (nfss->pnfs_blksize) {
++		u64 mask = nfss->pnfs_blksize - 1;
++		u64 offset = arg->lseg.offset & mask;
++
++		arg->lseg.offset -= offset;
++		arg->lseg.length += offset + mask;
++		arg->lseg.length &= ~mask;
++	}
++
++	layoutupdate_data = kmalloc(sizeof(struct bl_layoutupdate_data),
++					 GFP_KERNEL);
++	if (unlikely(!layoutupdate_data))
++		return -ENOMEM;
++	INIT_LIST_HEAD(&layoutupdate_data->ranges);
++	arg->layoutdriver_data = layoutupdate_data;
++
++	return 0;
++}
++
++static void
++bl_encode_layoutcommit(struct pnfs_layout_type *lo, struct xdr_stream *xdr,
++		       const struct pnfs_layoutcommit_arg *arg)
++{
++	dprintk("%s enter\n", __func__);
++	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
++}
++
++static void
++bl_cleanup_layoutcommit(struct pnfs_layout_type *lo,
++			struct pnfs_layoutcommit_arg *arg, int status)
++{
++	dprintk("%s enter\n", __func__);
++	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), arg, status);
++	kfree(arg->layoutdriver_data);
++}
++
++static void free_blk_mountid(struct block_mount_id *mid)
++{
++	if (mid) {
++		struct pnfs_block_dev *dev;
++		spin_lock(&mid->bm_lock);
++		while (!list_empty(&mid->bm_devlist)) {
++			dev = list_first_entry(&mid->bm_devlist,
++					       struct pnfs_block_dev,
++					       bm_node);
++			list_del(&dev->bm_node);
++			free_block_dev(dev);
++		}
++		spin_unlock(&mid->bm_lock);
++		kfree(mid);
++	}
++}
++
++/* This is mostly copied form the filelayout's get_device_info function.
++ * It seems much of this should be at the generic pnfs level.
++ */
++static struct pnfs_block_dev *
++nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
++			struct pnfs_deviceid *d_id,
++			struct list_head *sdlist)
++{
++	struct pnfs_device *dev;
++	struct pnfs_block_dev *rv = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	int i, rc;
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s max_resp_sz %u max_pages %d\n",
++		__func__, max_resp_sz, max_pages);
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev) {
++		dprintk("%s kmalloc failed\n", __func__);
++		return NULL;
++	}
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(dev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set dev->area */
++	dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!dev->area)
++		goto out_free;
++
++	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
++	dev->layout_type = LAYOUT_BLOCK_VOLUME;
++	dev->dev_notify_types = 0;
++	dev->pages = pages;
++	dev->pgbase = 0;
++	dev->pglen = PAGE_SIZE * max_pages;
++	dev->mincount = 0;
++
++	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
++	rc = pnfs_block_callback_ops->nfs_getdeviceinfo(server, dev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	rv = nfs4_blk_decode_device(server, dev, sdlist);
++ out_free:
++	if (dev->area != NULL)
++		vunmap(dev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(dev);
++	return rv;
++}
++
++
++/*
++ * Retrieve the list of available devices for the mountpoint.
++ */
++static int
++bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
++{
++	struct block_mount_id *b_mt_id = NULL;
++	struct pnfs_mount_type *mtype = NULL;
++	struct pnfs_devicelist *dlist = NULL;
++	struct pnfs_block_dev *bdev;
++	LIST_HEAD(block_disklist);
++	int status = 0, i;
++
++	dprintk("%s enter\n", __func__);
++
++	if (server->pnfs_blksize == 0) {
++		dprintk("%s Server did not return blksize\n", __func__);
++		return -EINVAL;
++	}
++	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
++	if (!b_mt_id) {
++		status = -ENOMEM;
++		goto out_error;
++	}
++	/* Initialize nfs4 block layout mount id */
++	spin_lock_init(&b_mt_id->bm_lock);
++	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
++
++	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
++	if (!dlist)
++		goto out_error;
++	dlist->eof = 0;
++	while (!dlist->eof) {
++		status = pnfs_block_callback_ops->nfs_getdevicelist(
++							server, fh, dlist);
++		if (status)
++			goto out_error;
++		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
++			__func__, dlist->num_devs, dlist->eof);
++		/* For each device returned in dlist, call GETDEVICEINFO, and
++		 * decode the opaque topology encoding to create a flat
++		 * volume topology, matching VOLUME_SIMPLE disk signatures
++		 * to disks in the visible block disk list.
++		 * Construct an LVM meta device from the flat volume topology.
++		 */
++		for (i = 0; i < dlist->num_devs; i++) {
++			bdev = nfs4_blk_get_deviceinfo(server, fh,
++						     &dlist->dev_id[i],
++						     &block_disklist);
++			if (!bdev)
++				goto out_error;
++			spin_lock(&b_mt_id->bm_lock);
++			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
++			spin_unlock(&b_mt_id->bm_lock);
++		}
++	}
++	dprintk("%s SUCCESS\n", __func__);
++	server->pnfs_ld_data = b_mt_id;
++
++ out_return:
++	kfree(dlist);
++	return status;
++
++ out_error:
++	free_blk_mountid(b_mt_id);
++	kfree(mtype);
++	goto out_return;
++}
++
++static int
++bl_uninitialize_mountpoint(struct nfs_server *server)
++{
++	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
++
++	dprintk("%s enter\n", __func__);
++	free_blk_mountid(b_mt_id);
++	dprintk("%s RETURNS\n", __func__);
++	return 0;
++}
++
++/* STUB - mark intersection of layout and page as bad, so is not
++ * used again.
++ */
++static void mark_bad_read(void)
++{
++	return;
++}
++
++/* Copied from buffer.c */
++static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
++{
++	if (uptodate) {
++		set_buffer_uptodate(bh);
++	} else {
++		/* This happens, due to failed READA attempts. */
++		clear_buffer_uptodate(bh);
++	}
++	unlock_buffer(bh);
++}
++
++/* Copied from buffer.c */
++static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
++{
++	__end_buffer_read_notouch(bh, uptodate);
++}
++
++/*
++ * map_block:  map a requested I/0 block (isect) into an offset in the LVM
++ * meta block_device
++ */
++static void
++map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
++{
++	dprintk("%s enter be=%p\n", __func__, be);
++
++	set_buffer_mapped(bh);
++	bh->b_bdev = be->be_mdev;
++	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
++		(be->be_mdev->bd_inode->i_blkbits - 9);
++
++	dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
++				__func__, (long)isect,
++				(long)bh->b_blocknr,
++				bh->b_size);
++	return;
++}
++
++/* Given an unmapped page, zero it (or read in page for COW),
++ * and set appropriate flags/markings, but it is safe to not initialize
++ * the range given in [from, to).
++ */
++/* This is loosely based on nobh_write_begin */
++static int
++init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
++		    unsigned from, unsigned to, sector_t **pages_to_mark)
++{
++	struct buffer_head *bh;
++	int inval, ret = -EIO;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect;
++
++	dprintk("%s enter, %p\n", __func__, page);
++	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
++	if (!bh) {
++		ret = -ENOMEM;
++		goto cleanup;
++	}
++
++	isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
++	be = find_get_extent(bl, isect, &cow_read);
++	if (!be)
++		goto cleanup;
++	inval = is_hole(be, isect);
++	dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
++	if (inval) {
++		if (be->be_state == PNFS_BLOCK_NONE_DATA) {
++			dprintk("%s PANIC - got NONE_DATA extent %p\n",
++				__func__, be);
++			goto cleanup;
++		}
++		map_block(isect, be, bh);
++		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
++	}
++	if (PageUptodate(page)) {
++		/* Do nothing */
++	} else if (inval & !cow_read) {
++		zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
++	} else if (0 < from || PAGE_CACHE_SIZE > to) {
++		struct pnfs_block_extent *read_extent;
++
++		read_extent = (inval && cow_read) ? cow_read : be;
++		map_block(isect, read_extent, bh);
++		lock_buffer(bh);
++		bh->b_end_io = end_buffer_read_nobh;
++		submit_bh(READ, bh);
++		dprintk("%s: Waiting for buffer read\n", __func__);
++		/* XXX Don't really want to hold layout lock here */
++		wait_on_buffer(bh);
++		if (!buffer_uptodate(bh))
++			goto cleanup;
++	}
++	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		/* There is a BUG here if is a short copy after write_begin,
++		 * but I think this is a generic fs bug.  The problem is that
++		 * we have marked the page as initialized, but it is possible
++		 * that the section not copied may never get copied.
++		 */
++		ret = mark_initialized_sectors(be->be_inval, isect,
++					       PAGE_CACHE_SECTORS,
++					       pages_to_mark);
++		/* Want to preallocate mem so above can't fail */
++		if (ret)
++			goto cleanup;
++	}
++	SetPageMappedToDisk(page);
++	ret = 0;
++
++cleanup:
++	free_buffer_head(bh);
++	put_extent(be);
++	put_extent(cow_read);
++	if (ret) {
++		/* Need to mark layout with bad read...should now
++		 * just use nfs4 for reads and writes.
++		 */
++		mark_bad_read();
++	}
++	return ret;
++}
++
++static int
++bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
++	       unsigned count, struct pnfs_fsdata *fsdata)
++{
++	unsigned from, to;
++	int ret;
++	sector_t *pages_to_mark = NULL;
++	struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
++
++	dprintk("%s enter, %u@%lld\n", __func__, count, pos);
++	print_page(page);
++	/* The following code assumes blocksize >= PAGE_CACHE_SIZE */
++	if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
++		dprintk("%s Can't handle blocksize %llu\n", __func__,
++			(u64)bl->bl_blocksize);
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		return 0;
++	}
++	if (PageMappedToDisk(page)) {
++		/* Basically, this is a flag that says we have
++		 * successfully called write_begin already on this page.
++		 */
++		/* NOTE - there are cache consistency issues here.
++		 * For example, what if the layout is recalled, then regained?
++		 * If the file is closed and reopened, will the page flags
++		 * be reset?  If not, we'll have to use layout info instead of
++		 * the page flag.
++		 */
++		return 0;
++	}
++	from = pos & (PAGE_CACHE_SIZE - 1);
++	to = from + count;
++	ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
++	if (ret) {
++		dprintk("%s init page failed with %i", __func__, ret);
++		/* Revert back to plain NFS and just continue on with
++		 * write.  This assumes there is no request attached, which
++		 * should be true if we get here.
++		 */
++		BUG_ON(PagePrivate(page));
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		kfree(pages_to_mark);
++		ret = 0;
++	} else {
++		fsdata->private = pages_to_mark;
++	}
++	return ret;
++}
++
++/* CAREFUL - what happens if copied < count??? */
++static int
++bl_write_end(struct inode *inode, struct page *page, loff_t pos,
++	     unsigned count, unsigned copied, struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter, %u@%lld, lseg=%p\n", __func__, count, pos, lseg);
++	print_page(page);
++	if (lseg)
++		SetPageUptodate(page);
++	return 0;
++}
++
++/* Return any memory allocated to fsdata->private, and take advantage
++ * of no page locks to mark pages noted in write_begin as needing
++ * initialization.
++ */
++static void
++bl_write_end_cleanup(struct file *filp, struct pnfs_fsdata *fsdata)
++{
++	struct page *page;
++	pgoff_t index;
++	sector_t *pos;
++	struct address_space *mapping = filp->f_mapping;
++	struct pnfs_fsdata *fake_data;
++	struct pnfs_layout_segment *lseg;
++
++	if (!fsdata)
++		return;
++	lseg = fsdata->lseg;
++	if (!lseg)
++		return;
++	pos = fsdata->private;
++	if (!pos)
++		return;
++	dprintk("%s enter with pos=%llu\n", __func__, (u64)(*pos));
++	for (; *pos != ~0; pos++) {
++		index = *pos >> (PAGE_CACHE_SHIFT - 9);
++		/* XXX How do we properly deal with failures here??? */
++		page = grab_cache_page_write_begin(mapping, index, 0);
++		if (!page) {
++			printk(KERN_ERR "%s BUG BUG BUG NoMem\n", __func__);
++			continue;
++		}
++		dprintk("%s: Examining block page\n", __func__);
++		print_page(page);
++		if (!PageMappedToDisk(page)) {
++			/* XXX How do we properly deal with failures here??? */
++			dprintk("%s Marking block page\n", __func__);
++			init_page_for_write(BLK_LSEG2EXT(fsdata->lseg), page,
++					    PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
++					    NULL);
++			print_page(page);
++			fake_data = kzalloc(sizeof(*fake_data), GFP_KERNEL);
++			if (!fake_data) {
++				printk(KERN_ERR "%s BUG BUG BUG NoMem\n",
++				       __func__);
++				unlock_page(page);
++				continue;
++			}
++			get_lseg(lseg);
++			fake_data->lseg = lseg;
++			fake_data->bypass_eof = 1;
++			mapping->a_ops->write_end(filp, mapping,
++						  index << PAGE_CACHE_SHIFT,
++						  PAGE_CACHE_SIZE,
++						  PAGE_CACHE_SIZE,
++						  page, fake_data);
++			/* Note fake_data is freed by nfs_write_end */
++		} else
++			unlock_page(page);
++	}
++	kfree(fsdata->private);
++	fsdata->private = NULL;
++}
++
++static ssize_t
++bl_get_stripesize(struct pnfs_layout_type *lo)
++{
++	dprintk("%s enter\n", __func__);
++	return 0;
++}
++
++/* This is called by nfs_can_coalesce_requests via nfs_pageio_do_add_request.
++ * Should return False if there is a reason requests can not be coalesced,
++ * otherwise, should default to returning True.
++ */
++static int
++bl_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++	   struct nfs_page *req)
++{
++	dprintk("%s enter\n", __func__);
++	if (pgio->pg_iswrite)
++		return prev->wb_lseg == req->wb_lseg;
++	else
++		return 1;
++}
++
++static struct layoutdriver_io_operations blocklayout_io_operations = {
++	.commit				= bl_commit,
++	.read_pagelist			= bl_read_pagelist,
++	.write_pagelist			= bl_write_pagelist,
++	.write_begin			= bl_write_begin,
++	.write_end			= bl_write_end,
++	.write_end_cleanup		= bl_write_end_cleanup,
++	.alloc_layout			= bl_alloc_layout,
++	.free_layout			= bl_free_layout,
++	.alloc_lseg			= bl_alloc_lseg,
++	.free_lseg			= bl_free_lseg,
++	.setup_layoutcommit		= bl_setup_layoutcommit,
++	.encode_layoutcommit		= bl_encode_layoutcommit,
++	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
++	.initialize_mountpoint		= bl_initialize_mountpoint,
++	.uninitialize_mountpoint	= bl_uninitialize_mountpoint,
++};
++
++static struct layoutdriver_policy_operations blocklayout_policy_operations = {
++	.get_stripesize			= bl_get_stripesize,
++	.pg_test			= bl_pg_test,
++};
++
++static struct pnfs_layoutdriver_type blocklayout_type = {
++	.id = LAYOUT_BLOCK_VOLUME,
++	.name = "LAYOUT_BLOCK_VOLUME",
++	.ld_io_ops = &blocklayout_io_operations,
++	.ld_policy_ops = &blocklayout_policy_operations,
++};
++
++static int __init nfs4blocklayout_init(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
++
++	pnfs_block_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
++	bl_pipe_init();
++	return 0;
++}
++
++static void __exit nfs4blocklayout_exit(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
++	       __func__);
++
++	pnfs_unregister_layoutdriver(&blocklayout_type);
++	bl_pipe_exit();
++}
++
++module_init(nfs4blocklayout_init);
++module_exit(nfs4blocklayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
+@@ -0,0 +1,335 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/buffer_head.h> /* __bread */
++
++#include <linux/genhd.h>
++#include <linux/blkdev.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
++{
++	uint32_t *q = p + XDR_QUADLEN(nbytes);
++	if (unlikely(q > end || q < p))
++		return NULL;
++	return p;
++}
++EXPORT_SYMBOL(blk_overflow);
++
++/* Open a block_device by device number. */
++struct block_device *nfs4_blkdev_get(dev_t dev)
++{
++	struct block_device *bd;
++
++	dprintk("%s enter\n", __func__);
++	bd = open_by_devnum(dev, FMODE_READ);
++	if (IS_ERR(bd))
++		goto fail;
++	return bd;
++fail:
++	dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++	return NULL;
++}
++
++/*
++ * Release the block device
++ */
++int nfs4_blkdev_put(struct block_device *bdev)
++{
++	dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
++			MINOR(bdev->bd_dev));
++	bd_release(bdev);
++	return blkdev_put(bdev, FMODE_READ);
++}
++
++/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
++ * in dev->dev_addr_buf.
++ */
++struct pnfs_block_dev *
++nfs4_blk_decode_device(struct nfs_server *server,
++		       struct pnfs_device *dev,
++		       struct list_head *sdlist)
++{
++	struct pnfs_block_dev *rv = NULL;
++	struct block_device *bd = NULL;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint32_t major, minor;
++
++	dprintk("%s enter\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return NULL;
++	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
++	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
++		dev->mincount);
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
++				    dev->mincount);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out_err;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out_err;
++	}
++	if (reply->status != BL_DEVICE_REQUEST_PROC) {
++		dprintk("%s failed to open device: %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++	memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
++	memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
++		sizeof(uint32_t));
++	bd = nfs4_blkdev_get(MKDEV(major, minor));
++	if (IS_ERR(bd)) {
++		dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++
++	rv = kzalloc(sizeof(*rv), GFP_KERNEL);
++	if (!rv)
++		goto out_err;
++
++	rv->bm_mdev = bd;
++	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
++	dprintk("%s Created device %s with bd_block_size %u\n",
++		__func__,
++		bd->bd_disk->disk_name,
++		bd->bd_block_size);
++	kfree(reply);
++	kfree(msg);
++	return rv;
++
++out_err:
++	kfree(rv);
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return NULL;
++}
++
++/* Map deviceid returned by the server to constructed block_device */
++static struct block_device *translate_devid(struct pnfs_layout_type *lo,
++					    struct pnfs_deviceid *id)
++{
++	struct block_device *rv = NULL;
++	struct block_mount_id *mid;
++	struct pnfs_block_dev *dev;
++
++	dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
++	mid = BLK_ID(lo);
++	spin_lock(&mid->bm_lock);
++	list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
++		if (memcmp(id->data, dev->bm_mdevid.data,
++			   NFS4_PNFS_DEVICEID4_SIZE) == 0) {
++			rv = dev->bm_mdev;
++			goto out;
++		}
++	}
++ out:
++	spin_unlock(&mid->bm_lock);
++	dprintk("%s returning %p\n", __func__, rv);
++	return rv;
++}
++
++/* Tracks info needed to ensure extents in layout obey constraints of spec */
++struct layout_verification {
++	u32 mode;	/* R or RW */
++	u64 start;	/* Expected start of next non-COW extent */
++	u64 inval;	/* Start of INVAL coverage */
++	u64 cowread;	/* End of COW read coverage */
++};
++
++/* Verify the extent meets the layout requirements of the pnfs-block draft,
++ * section 2.3.1.
++ */
++static int verify_extent(struct pnfs_block_extent *be,
++			 struct layout_verification *lv)
++{
++	if (lv->mode == IOMODE_READ) {
++		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
++		    be->be_state == PNFS_BLOCK_INVALID_DATA)
++			return -EIO;
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	}
++	/* lv->mode == IOMODE_RW */
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		if (lv->cowread > lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		lv->inval = lv->start;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
++		if (be->be_f_offset > lv->start)
++			return -EIO;
++		if (be->be_f_offset < lv->inval)
++			return -EIO;
++		if (be->be_f_offset < lv->cowread)
++			return -EIO;
++		/* It looks like you might want to min this with lv->start,
++		 * but you really don't.
++		 */
++		lv->inval = lv->inval + be->be_length;
++		lv->cowread = be->be_f_offset + be->be_length;
++		return 0;
++	} else
++		return -EIO;
++}
++
++/* XDR decode pnfs_block_layout4 structure */
++int
++nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
++			   struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t *end = (uint32_t *)((char *)lgr->layout.buf + lgr->layout.len);
++	int i, status = -EIO;
++	uint32_t count;
++	struct pnfs_block_extent *be = NULL, *save;
++	uint64_t tmp; /* Used by READSECTOR */
++	struct layout_verification lv = {
++		.mode = lgr->lseg.iomode,
++		.start = lgr->lseg.offset >> 9,
++		.inval = lgr->lseg.offset >> 9,
++		.cowread = lgr->lseg.offset >> 9,
++	};
++
++	LIST_HEAD(extents);
++
++	BLK_READBUF(p, end, 4);
++	READ32(count);
++
++	dprintk("%s enter, number of extents %i\n", __func__, count);
++	BLK_READBUF(p, end, (28 + NFS4_PNFS_DEVICEID4_SIZE) * count);
++
++	/* Decode individual extents, putting them in temporary
++	 * staging area until whole layout is decoded to make error
++	 * recovery easier.
++	 */
++	for (i = 0; i < count; i++) {
++		be = alloc_extent();
++		if (!be) {
++			status = -ENOMEM;
++			goto out_err;
++		}
++		READ_DEVID(&be->be_devid);
++		be->be_mdev = translate_devid(lo, &be->be_devid);
++		if (!be->be_mdev)
++			goto out_err;
++		/* The next three values are read in as bytes,
++		 * but stored as 512-byte sector lengths
++		 */
++		READ_SECTOR(be->be_f_offset);
++		READ_SECTOR(be->be_length);
++		READ_SECTOR(be->be_v_offset);
++		READ32(be->be_state);
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			be->be_inval = &bl->bl_inval;
++		if (verify_extent(be, &lv)) {
++			dprintk("%s verify failed\n", __func__);
++			goto out_err;
++		}
++		list_add_tail(&be->be_node, &extents);
++	}
++	if (p != end) {
++		dprintk("%s Undecoded cruft at end of opaque\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lgr->lseg.offset + lgr->lseg.length != lv.start << 9) {
++		dprintk("%s Final length mismatch\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lv.start < lv.cowread) {
++		dprintk("%s Final uncovered COW extent\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	/* Extents decoded properly, now try to merge them in to
++	 * existing layout extents.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	list_for_each_entry_safe(be, save, &extents, be_node) {
++		list_del(&be->be_node);
++		status = add_and_merge_extent(bl, be);
++		if (status) {
++			spin_unlock(&bl->bl_ext_lock);
++			/* This is a fairly catastrophic error, as the
++			 * entire layout extent lists are now corrupted.
++			 * We should have some way to distinguish this.
++			 */
++			be = NULL;
++			goto out_err;
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	status = 0;
++ out:
++	dprintk("%s returns %i\n", __func__, status);
++	return status;
++
++ out_err:
++	put_extent(be);
++	while (!list_empty(&extents)) {
++		be = list_first_entry(&extents, struct pnfs_block_extent,
++				      be_node);
++		list_del(&be->be_node);
++		put_extent(be);
++	}
++	goto out;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
+@@ -0,0 +1,120 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdm.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2007 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Fred Isaman <iisaman@umich.edu>
++ *  Andy Adamson <andros@citi.umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include <linux/genhd.h> /* gendisk - used in a dprintk*/
++#include <linux/sched.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Defines used for calculating memory usage in nfs4_blk_flatten() */
++#define ARGSIZE   24    /* Max bytes needed for linear target arg string */
++#define SPECSIZE (sizeof8(struct dm_target_spec) + ARGSIZE)
++#define SPECS_PER_PAGE (PAGE_SIZE / SPECSIZE)
++#define SPEC_HEADER_ADJUST (SPECS_PER_PAGE - \
++			    (PAGE_SIZE - sizeof8(struct dm_ioctl)) / SPECSIZE)
++#define roundup8(x) (((x)+7) & ~7)
++#define sizeof8(x) roundup8(sizeof(x))
++
++static int dev_remove(dev_t dev)
++{
++	int ret = 1;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint64_t bl_dev;
++	uint32_t major = MAJOR(dev), minor = MINOR(dev);
++
++	dprintk("Entering %s\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return ret;
++
++	memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
++	memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
++				    sizeof(uint64_t));
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out;
++	}
++
++	if (reply->status == BL_DEVICE_REQUEST_PROC)
++		ret = 0; /*TODO: what to return*/
++out:
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return ret;
++}
++
++/*
++ * Release meta device
++ */
++static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
++{
++	int rv;
++
++	dprintk("%s Releasing\n", __func__);
++	/* XXX Check return? */
++	rv = nfs4_blkdev_put(bdev->bm_mdev);
++	dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);
++
++	rv = dev_remove(bdev->bm_mdev->bd_dev);
++	dprintk("%s Returns %d\n", __func__, rv);
++	return rv;
++}
++
++void free_block_dev(struct pnfs_block_dev *bdev)
++{
++	if (bdev) {
++		if (bdev->bm_mdev) {
++			dprintk("%s Removing DM device: %d:%d\n",
++				__func__,
++				MAJOR(bdev->bm_mdev->bd_dev),
++				MINOR(bdev->bm_mdev->bd_dev));
++			/* XXX Check status ?? */
++			nfs4_blk_metadev_release(bdev);
++		}
++		kfree(bdev);
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
+@@ -0,0 +1,303 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#ifndef FS_NFS_NFS4BLOCKLAYOUT_H
++#define FS_NFS_NFS4BLOCKLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/pnfs_xdr.h> /* Needed by nfs4_pnfs.h */
++#include <linux/nfs4_pnfs.h>
++#include <linux/dm-ioctl.h> /* Needed for struct dm_ioctl*/
++
++#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
++
++#define PG_pnfserr PG_owner_priv_1
++#define PagePnfsErr(page)	test_bit(PG_pnfserr, &(page)->flags)
++#define SetPagePnfsErr(page)	set_bit(PG_pnfserr, &(page)->flags)
++#define ClearPagePnfsErr(page)	clear_bit(PG_pnfserr, &(page)->flags)
++
++extern int dm_dev_create(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_dev_remove(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_do_resume(struct dm_ioctl *param);
++extern int dm_table_load(struct dm_ioctl *param, size_t param_size);
++
++struct block_mount_id {
++	spinlock_t			bm_lock;    /* protects list */
++	struct list_head		bm_devlist; /* holds pnfs_block_dev */
++};
++
++struct pnfs_block_dev {
++	struct list_head		bm_node;
++	struct pnfs_deviceid		bm_mdevid;    /* associated devid */
++	struct block_device		*bm_mdev;     /* meta device itself */
++};
++
++/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
++struct visible_block_device {
++	struct list_head	vi_node;
++	struct block_device	*vi_bdev;
++	int			vi_mapped;
++	int			vi_put_done;
++};
++
++enum blk_vol_type {
++	PNFS_BLOCK_VOLUME_SIMPLE   = 0,	/* maps to a single LU */
++	PNFS_BLOCK_VOLUME_SLICE    = 1,	/* slice of another volume */
++	PNFS_BLOCK_VOLUME_CONCAT   = 2,	/* concatenation of multiple volumes */
++	PNFS_BLOCK_VOLUME_STRIPE   = 3	/* striped across multiple volumes */
++};
++
++/* All disk offset/lengths are stored in 512-byte sectors */
++struct pnfs_blk_volume {
++	uint32_t		bv_type;
++	sector_t 		bv_size;
++	struct pnfs_blk_volume 	**bv_vols;
++	int 			bv_vol_n;
++	union {
++		dev_t			bv_dev;
++		sector_t		bv_stripe_unit;
++		sector_t 		bv_offset;
++	};
++};
++
++/* Since components need not be aligned, cannot use sector_t */
++struct pnfs_blk_sig_comp {
++	int64_t 	bs_offset;  /* In bytes */
++	uint32_t   	bs_length;  /* In bytes */
++	char 		*bs_string;
++};
++
++/* Maximum number of signatures components in a simple volume */
++# define PNFS_BLOCK_MAX_SIG_COMP 16
++
++struct pnfs_blk_sig {
++	int 				si_num_comps;
++	struct pnfs_blk_sig_comp	si_comps[PNFS_BLOCK_MAX_SIG_COMP];
++};
++
++enum exstate4 {
++	PNFS_BLOCK_READWRITE_DATA	= 0,
++	PNFS_BLOCK_READ_DATA		= 1,
++	PNFS_BLOCK_INVALID_DATA		= 2, /* mapped, but data is invalid */
++	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
++};
++
++#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
++
++struct my_tree_t {
++	sector_t		mtt_step_size;	/* Internal sector alignment */
++	struct list_head	mtt_stub; /* Should be a radix tree */
++};
++
++struct pnfs_inval_markings {
++	spinlock_t	im_lock;
++	struct my_tree_t im_tree;	/* Sectors that need LAYOUTCOMMIT */
++	sector_t	im_block_size;	/* Server blocksize in sectors */
++};
++
++struct pnfs_inval_tracking {
++	struct list_head it_link;
++	int		 it_sector;
++	int		 it_tags;
++};
++
++/* sector_t fields are all in 512-byte sectors */
++struct pnfs_block_extent {
++	struct kref	be_refcnt;
++	struct list_head be_node;	/* link into lseg list */
++	struct pnfs_deviceid be_devid;  /* STUB - remevable??? */
++	struct block_device *be_mdev;
++	sector_t	be_f_offset;	/* the starting offset in the file */
++	sector_t	be_length;	/* the size of the extent */
++	sector_t	be_v_offset;	/* the starting offset in the volume */
++	enum exstate4	be_state;	/* the state of this extent */
++	struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */
++};
++
++/* Shortened extent used by LAYOUTCOMMIT */
++struct pnfs_block_short_extent {
++	struct list_head bse_node;
++	struct pnfs_deviceid bse_devid;	/* STUB - removable??? */
++	struct block_device *bse_mdev;
++	sector_t	bse_f_offset;	/* the starting offset in the file */
++	sector_t	bse_length;	/* the size of the extent */
++};
++
++static inline void
++INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
++{
++	spin_lock_init(&marks->im_lock);
++	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
++	marks->im_block_size = blocksize;
++	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
++					   blocksize);
++}
++
++enum extentclass4 {
++	RW_EXTENT	= 0, /* READWRTE and INVAL */
++	RO_EXTENT	= 1, /* READ and NONE */
++	EXTENT_LISTS	= 2,
++};
++
++static inline int choose_list(enum exstate4 state)
++{
++	if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA)
++		return RO_EXTENT;
++	else
++		return RW_EXTENT;
++}
++
++struct pnfs_block_layout {
++	struct pnfs_layout_type bl_layout;
++	struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */
++	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
++	struct list_head	bl_extents[EXTENT_LISTS]; /* R and RW extents */
++	struct list_head	bl_commit;	/* Needs layout commit */
++	unsigned int		bl_count;	/* entries in bl_commit */
++	sector_t		bl_blocksize;  /* Server blocksize in sectors */
++};
++
++/* this struct is comunicated between:
++ * bl_setup_layoutcommit && bl_encode_layoutcommit && bl_cleanup_layoutcommit
++ */
++struct bl_layoutupdate_data {
++	struct list_head ranges;
++};
++
++#define BLK_ID(lo) ((struct block_mount_id *)(PNFS_NFS_SERVER(lo)->pnfs_ld_data))
++
++static inline struct pnfs_block_layout *
++BLK_LO2EXT(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct pnfs_block_layout, bl_layout);
++}
++
++static inline struct pnfs_block_layout *
++BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
++{
++	return BLK_LO2EXT(lseg->layout);
++}
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
++
++#define BLK_READBUF(p, e, nbytes)  do { \
++	p = blk_overflow(p, e, nbytes); \
++	if (!p) { \
++		printk(KERN_WARNING \
++			"%s: reply buffer overflowed in line %d.\n", \
++			__func__, __LINE__); \
++		goto out_err; \
++	} \
++} while (0)
++
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {                  \
++	(x) = (uint64_t)ntohl(*p++) << 32;           \
++	(x) |= ntohl(*p++);                     \
++} while (0)
++#define COPYMEM(x, nbytes) do {                 \
++	memcpy((x), p, nbytes);                 \
++	p += XDR_QUADLEN(nbytes);               \
++} while (0)
++#define READ_DEVID(x)	COPYMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++#define READ_SECTOR(x)     do { \
++	READ64(tmp); \
++	if (tmp & 0x1ff) { \
++		printk(KERN_WARNING \
++		       "%s Value not 512-byte aligned at line %d\n", \
++		       __func__, __LINE__);			     \
++		goto out_err; \
++	} \
++	(x) = tmp >> 9; \
++} while (0)
++
++#define WRITE32(n)               do { \
++	*p++ = htonl(n); \
++	} while (0)
++#define WRITE64(n)               do {                           \
++	*p++ = htonl((uint32_t)((n) >> 32));			\
++	*p++ = htonl((uint32_t)(n));				\
++} while (0)
++#define WRITEMEM(ptr, nbytes)     do {                          \
++	p = xdr_encode_opaque_fixed(p, ptr, nbytes);	\
++} while (0)
++#define WRITE_DEVID(x)  WRITEMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++
++/* blocklayoutdev.c */
++struct block_device *nfs4_blkdev_get(dev_t dev);
++int nfs4_blkdev_put(struct block_device *bdev);
++struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
++					      struct pnfs_device *dev,
++					      struct list_head *sdlist);
++int nfs4_blk_process_layoutget(struct pnfs_layout_type *lo,
++			       struct nfs4_pnfs_layoutget_res *lgr);
++int nfs4_blk_create_block_disk_list(struct list_head *);
++void nfs4_blk_destroy_disk_list(struct list_head *);
++/* blocklayoutdm.c */
++int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
++void free_block_dev(struct pnfs_block_dev *bdev);
++/* extents.c */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++		struct pnfs_block_extent **cow_read);
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages);
++void put_extent(struct pnfs_block_extent *be);
++struct pnfs_block_extent *alloc_extent(void);
++struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
++int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   struct xdr_stream *xdr,
++				   const struct pnfs_layoutcommit_arg *arg);
++void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   const struct pnfs_layoutcommit_arg *arg,
++				   int status);
++int add_and_merge_extent(struct pnfs_block_layout *bl,
++			 struct pnfs_block_extent *new);
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length);
++
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++extern struct pipefs_list bl_device_list;
++extern struct dentry *bl_device_pipe;
++
++int bl_pipe_init(void);
++void bl_pipe_exit(void);
++
++#define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
++#define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
++#define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
++#define BL_DEVICE_REQUEST_PROC         0x1 /* User level process succeeds */
++#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
++
++#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
+@@ -0,0 +1,948 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include "blocklayout.h"
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Bit numbers */
++#define EXTENT_INITIALIZED 0
++#define EXTENT_WRITTEN     1
++#define EXTENT_IN_COMMIT   2
++#define INTERNAL_EXISTS    MY_MAX_TAGS
++#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
++
++/* Returns largest t<=s s.t. t%base==0 */
++static inline sector_t normalize(sector_t s, int base)
++{
++	sector_t tmp = s; /* Since do_div modifies its argument */
++	return s - do_div(tmp, base);
++}
++
++static inline sector_t normalize_up(sector_t s, int base)
++{
++	return normalize(s + base - 1, base);
++}
++
++/* Complete stub using list while determine API wanted */
++
++/* Returns tags, or negative */
++static int32_t _find_entry(struct my_tree_t *tree, u64 s)
++{
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu) enter\n", __func__, s);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s)
++			return pos->it_tags & INTERNAL_MASK;
++		else
++			break;
++	}
++	return -ENOENT;
++}
++
++static inline
++int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
++{
++	int32_t tags;
++
++	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
++	s = normalize(s, tree->mtt_step_size);
++	tags = _find_entry(tree, s);
++	if ((tags < 0) || !(tags & (1 << tag)))
++		return 0;
++	else
++		return 1;
++}
++
++/* Creates entry with tag, or if entry already exists, unions tag to it.
++ * If storage is not NULL, newly created entry will use it.
++ * Returns number of entries added, or negative on error.
++ */
++static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
++		      struct pnfs_inval_tracking *storage)
++{
++	int found = 0;
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s) {
++			found = 1;
++			break;
++		} else
++			break;
++	}
++	if (found) {
++		pos->it_tags |= (1 << tag);
++		return 0;
++	} else {
++		struct pnfs_inval_tracking *new;
++		if (storage)
++			new = storage;
++		else {
++			new = kmalloc(sizeof(*new), GFP_KERNEL);
++			if (!new)
++				return -ENOMEM;
++		}
++		new->it_sector = s;
++		new->it_tags = (1 << tag);
++		list_add(&new->it_link, &pos->it_link);
++		return 1;
++	}
++}
++
++/* XXXX Really want option to not create */
++/* Over range, unions tag with existing entries, else creates entry with tag */
++static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
++{
++	u64 i;
++
++	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
++	for (i = normalize(s, tree->mtt_step_size); i < s + length;
++	     i += tree->mtt_step_size)
++		if (_add_entry(tree, i, tag, NULL))
++			return -ENOMEM;
++	return 0;
++}
++
++/* Ensure that future operations on given range of tree will not malloc */
++static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
++{
++	u64 start, end, s;
++	int count, i, used = 0, status = -ENOMEM;
++	struct pnfs_inval_tracking **storage;
++
++	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
++	start = normalize(offset, tree->mtt_step_size);
++	end = normalize_up(offset + length, tree->mtt_step_size);
++	count = (int)(end - start) / (int)tree->mtt_step_size;
++
++	/* Pre-malloc what memory we might need */
++	storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
++	if (!storage)
++		return -ENOMEM;
++	for (i = 0; i < count; i++) {
++		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
++				     GFP_KERNEL);
++		if (!storage[i])
++			goto out_cleanup;
++	}
++
++	/* Now need lock - HOW??? */
++
++	for (s = start; s < end; s += tree->mtt_step_size)
++		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
++
++	/* Unlock - HOW??? */
++	status = 0;
++
++ out_cleanup:
++	for (i = used; i < count; i++) {
++		if (!storage[i])
++			break;
++		kfree(storage[i]);
++	}
++	kfree(storage);
++	return status;
++}
++
++static void set_needs_init(sector_t *array, sector_t offset)
++{
++	sector_t *p = array;
++
++	dprintk("%s enter\n", __func__);
++	if (!p)
++		return;
++	while (*p < offset)
++		p++;
++	if (*p == offset)
++		return;
++	else if (*p == ~0) {
++		*p++ = offset;
++		*p = ~0;
++		return;
++	} else {
++		sector_t *save = p;
++		dprintk("%s Adding %llu\n", __func__, (u64)offset);
++		while (*p != ~0)
++			p++;
++		p++;
++		memmove(save + 1, save, (char *)p - (char *)save);
++		*save = offset;
++		return;
++	}
++}
++
++/* We are relying on page lock to serialize this */
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Assume start, end already sector aligned */
++static int
++_range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
++{
++	struct pnfs_inval_tracking *pos;
++	u64 expect = 0;
++
++	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector >= end)
++			continue;
++		if (!expect) {
++			if ((pos->it_sector == end - tree->mtt_step_size) &&
++			    (pos->it_tags & (1 << tag))) {
++				expect = pos->it_sector - tree->mtt_step_size;
++				if (expect < start)
++					return 1;
++				continue;
++			} else {
++				return 0;
++			}
++		}
++		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
++			return 0;
++		expect -= tree->mtt_step_size;
++		if (expect < start)
++			return 1;
++	}
++	return 0;
++}
++
++static int is_range_written(struct pnfs_inval_markings *marks,
++			    sector_t start, sector_t end)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Marks sectors in [offest, offset_length) as having been initialized.
++ * All lengths are step-aligned, where step is min(pagesize, blocksize).
++ * Notes where partial block is initialized, and helps prepare it for
++ * complete initialization later.
++ */
++/* Currently assumes offset is page-aligned */
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages)
++{
++	sector_t s, start, end;
++	sector_t *array = NULL; /* Pages to mark */
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n",
++		__func__, (u64)offset, (u64)length);
++	s = max((sector_t) 3,
++		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
++	dprintk("%s set max=%llu\n", __func__, (u64)s);
++	if (pages) {
++		array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
++		if (!array)
++			goto outerr;
++		array[0] = ~0;
++	}
++
++	start = normalize(offset, marks->im_block_size);
++	end = normalize_up(offset + length, marks->im_block_size);
++	if (_preload_range(&marks->im_tree, start, end - start))
++		goto outerr;
++
++	spin_lock(&marks->im_lock);
++
++	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
++	     s < offset; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s pre-area pages\n", __func__);
++		/* Portion of used block is not initialized */
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
++		goto out_unlock;
++	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
++	     s < end; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s post-area pages\n", __func__);
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++
++	spin_unlock(&marks->im_lock);
++
++	if (pages) {
++		if (array[0] == ~0) {
++			kfree(array);
++			*pages = NULL;
++		} else
++			*pages = array;
++	}
++	return 0;
++
++ out_unlock:
++	spin_unlock(&marks->im_lock);
++ outerr:
++	if (pages) {
++		kfree(array);
++		*pages = NULL;
++	}
++	return -ENOMEM;
++}
++
++/* Marks sectors in [offest, offset+length) as having been written to disk.
++ * All lengths should be block aligned.
++ */
++int mark_written_sectors(struct pnfs_inval_markings *marks,
++			 sector_t offset, sector_t length)
++{
++	int status;
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
++		(u64)offset, (u64)length);
++	spin_lock(&marks->im_lock);
++	status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
++	spin_unlock(&marks->im_lock);
++	return status;
++}
++
++static void print_short_extent(struct pnfs_block_short_extent *be)
++{
++	dprintk("PRINT SHORT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->bse_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->bse_length);
++	}
++}
++
++void print_clist(struct list_head *list, unsigned int count)
++{
++	struct pnfs_block_short_extent *be;
++	unsigned int i = 0;
++
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, bse_node) {
++		i++;
++		print_short_extent(be);
++	}
++	if (i != count)
++		dprintk("\n\nExpected %u entries\n\n\n", count);
++	dprintk("****************\n");
++}
++
++/* Note: In theory, we should do more checking that devid's match between
++ * old and new, but if they don't, the lists are too corrupt to salvage anyway.
++ */
++/* Note this is very similar to add_and_merge_extent */
++static void add_to_commitlist(struct pnfs_block_layout *bl,
++			      struct pnfs_block_short_extent *new)
++{
++	struct list_head *clist = &bl->bl_commit;
++	struct pnfs_block_short_extent *old, *save;
++	sector_t end = new->bse_f_offset + new->bse_length;
++
++	dprintk("%s enter\n", __func__);
++	print_short_extent(new);
++	print_clist(clist, bl->bl_count);
++	bl->bl_count++;
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe(old, save, clist, bse_node) {
++		if (new->bse_f_offset < old->bse_f_offset)
++			break;
++		if (end <= old->bse_f_offset + old->bse_length) {
++			/* Range is already in list */
++			bl->bl_count--;
++			kfree(new);
++			return;
++		} else if (new->bse_f_offset <=
++				old->bse_f_offset + old->bse_length) {
++			/* new overlaps or abuts existing be */
++			if (new->bse_mdev == old->bse_mdev) {
++				/* extend new to fully replace old */
++				new->bse_length += new->bse_f_offset -
++						old->bse_f_offset;
++				new->bse_f_offset = old->bse_f_offset;
++				list_del(&old->bse_node);
++				bl->bl_count--;
++				kfree(old);
++			}
++		}
++	}
++	/* Note that if we never hit the above break, old will not point to a
++	 * valid extent.  However, in that case &old->bse_node==list.
++	 */
++	list_add_tail(&new->bse_node, &old->bse_node);
++	/* Scan forward for overlaps.  If we find any, extend new and
++	 * remove the overlapped extent.
++	 */
++	old = list_prepare_entry(new, clist, bse_node);
++	list_for_each_entry_safe_continue(old, save, clist, bse_node) {
++		if (end < old->bse_f_offset)
++			break;
++		/* new overlaps or abuts old */
++		if (new->bse_mdev == old->bse_mdev) {
++			if (end < old->bse_f_offset + old->bse_length) {
++				/* extend new to fully cover old */
++				end = old->bse_f_offset + old->bse_length;
++				new->bse_length = end - new->bse_f_offset;
++			}
++			list_del(&old->bse_node);
++			bl->bl_count--;
++			kfree(old);
++		}
++	}
++	dprintk("%s: after merging\n", __func__);
++	print_clist(clist, bl->bl_count);
++}
++
++/* Note the range described by offset, length is guaranteed to be contained
++ * within be.
++ */
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length)
++{
++	sector_t new_end, end = offset + length;
++	struct pnfs_block_short_extent *new;
++	struct pnfs_block_layout *bl = container_of(be->be_inval,
++						    struct pnfs_block_layout,
++						    bl_inval);
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return -ENOMEM;
++
++	mark_written_sectors(be->be_inval, offset, length);
++	/* We want to add the range to commit list, but it must be
++	 * block-normalized, and verified that the normalized range has
++	 * been entirely written to disk.
++	 */
++	new->bse_f_offset = offset;
++	offset = normalize(offset, bl->bl_blocksize);
++	if (offset < new->bse_f_offset) {
++		if (is_range_written(be->be_inval, offset, new->bse_f_offset))
++			new->bse_f_offset = offset;
++		else
++			new->bse_f_offset = offset + bl->bl_blocksize;
++	}
++	new_end = normalize_up(end, bl->bl_blocksize);
++	if (end < new_end) {
++		if (is_range_written(be->be_inval, end, new_end))
++			end = new_end;
++		else
++			end = new_end - bl->bl_blocksize;
++	}
++	if (end <= new->bse_f_offset) {
++		kfree(new);
++		return 0;
++	}
++	new->bse_length = end - new->bse_f_offset;
++	new->bse_devid = be->be_devid;
++	new->bse_mdev = be->be_mdev;
++
++	spin_lock(&bl->bl_ext_lock);
++	/* new will be freed, either by add_to_commitlist if it decides not
++	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
++	 */
++	add_to_commitlist(bl, new);
++	spin_unlock(&bl->bl_ext_lock);
++	return 0;
++}
++
++static void print_bl_extent(struct pnfs_block_extent *be)
++{
++	dprintk("PRINT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->be_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->be_length);
++		dprintk("        be_v_offset %llu\n", (u64)be->be_v_offset);
++		dprintk("        be_state    %d\n", be->be_state);
++	}
++}
++
++static void
++destroy_extent(struct kref *kref)
++{
++	struct pnfs_block_extent *be;
++
++	be = container_of(kref, struct pnfs_block_extent, be_refcnt);
++	dprintk("%s be=%p\n", __func__, be);
++	kfree(be);
++}
++
++void
++put_extent(struct pnfs_block_extent *be)
++{
++	if (be) {
++		dprintk("%s enter %p (%i)\n", __func__, be,
++			atomic_read(&be->be_refcnt.refcount));
++		kref_put(&be->be_refcnt, destroy_extent);
++	}
++}
++
++struct pnfs_block_extent *alloc_extent(void)
++{
++	struct pnfs_block_extent *be;
++
++	be = kmalloc(sizeof(struct pnfs_block_extent), GFP_KERNEL);
++	if (!be)
++		return NULL;
++	INIT_LIST_HEAD(&be->be_node);
++	kref_init(&be->be_refcnt);
++	be->be_inval = NULL;
++	return be;
++}
++
++struct pnfs_block_extent *
++get_extent(struct pnfs_block_extent *be)
++{
++	if (be)
++		kref_get(&be->be_refcnt);
++	return be;
++}
++
++void print_elist(struct list_head *list)
++{
++	struct pnfs_block_extent *be;
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, be_node) {
++		print_bl_extent(be);
++	}
++	dprintk("****************\n");
++}
++
++static inline int
++extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new)
++{
++	/* Note this assumes new->be_f_offset >= old->be_f_offset */
++	return (new->be_state == old->be_state) &&
++		((new->be_state == PNFS_BLOCK_NONE_DATA) ||
++		 ((new->be_v_offset - old->be_v_offset ==
++		   new->be_f_offset - old->be_f_offset) &&
++		  new->be_mdev == old->be_mdev));
++}
++
++/* Adds new to appropriate list in bl, modifying new and removing existing
++ * extents as appropriate to deal with overlaps.
++ *
++ * See find_get_extent for list constraints.
++ *
++ * Refcount on new is already set.  If end up not using it, or error out,
++ * need to put the reference.
++ *
++ * Lock is held by caller.
++ */
++int
++add_and_merge_extent(struct pnfs_block_layout *bl,
++		     struct pnfs_block_extent *new)
++{
++	struct pnfs_block_extent *be, *tmp;
++	sector_t end = new->be_f_offset + new->be_length;
++	struct list_head *list;
++
++	dprintk("%s enter with be=%p\n", __func__, new);
++	print_bl_extent(new);
++	list = &bl->bl_extents[choose_list(new->be_state)];
++	print_elist(list);
++
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
++		if (new->be_f_offset >= be->be_f_offset + be->be_length)
++			break;
++		if (new->be_f_offset >= be->be_f_offset) {
++			if (end <= be->be_f_offset + be->be_length) {
++				/* new is a subset of existing be*/
++				if (extents_consistent(be, new)) {
++					dprintk("%s: new is subset, ignoring\n",
++						__func__);
++					put_extent(new);
++					return 0;
++				} else {
++					goto out_err;
++				}
++			} else {
++				/* |<--   be   -->|
++				 *          |<--   new   -->| */
++				if (extents_consistent(be, new)) {
++					/* extend new to fully replace be */
++					new->be_length += new->be_f_offset -
++						be->be_f_offset;
++					new->be_f_offset = be->be_f_offset;
++					new->be_v_offset = be->be_v_offset;
++					dprintk("%s: removing %p\n", __func__, be);
++					list_del(&be->be_node);
++					put_extent(be);
++				} else {
++					goto out_err;
++				}
++			}
++		} else if (end >= be->be_f_offset + be->be_length) {
++			/* new extent overlap existing be */
++			if (extents_consistent(be, new)) {
++				/* extend new to fully replace be */
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		} else if (end > be->be_f_offset) {
++			/*           |<--   be   -->|
++			 *|<--   new   -->| */
++			if (extents_consistent(new, be)) {
++				/* extend new to fully replace be */
++				new->be_length += be->be_f_offset + be->be_length -
++					new->be_f_offset - new->be_length;
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		}
++	}
++	/* Note that if we never hit the above break, be will not point to a
++	 * valid extent.  However, in that case &be->be_node==list.
++	 */
++	list_add(&new->be_node, &be->be_node);
++	dprintk("%s: inserting new\n", __func__);
++	print_elist(list);
++	/* STUB - The per-list consistency checks have all been done,
++	 * should now check cross-list consistency.
++	 */
++	return 0;
++
++ out_err:
++	put_extent(new);
++	return -EIO;
++}
++
++/* Returns extent, or NULL.  If a second READ extent exists, it is returned
++ * in cow_read, if given.
++ *
++ * The extents are kept in two seperate ordered lists, one for READ and NONE,
++ * one for READWRITE and INVALID.  Within each list, we assume:
++ * 1. Extents are ordered by file offset.
++ * 2. For any given isect, there is at most one extents that matches.
++ */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++	    struct pnfs_block_extent **cow_read)
++{
++	struct pnfs_block_extent *be, *cow, *ret;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	cow = ret = NULL;
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret &&
++		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				if (!ret)
++					ret = be;
++				else if (be->be_state != PNFS_BLOCK_READ_DATA)
++					put_extent(be);
++				else
++					cow = be;
++				break;
++			}
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	if (cow_read)
++		*cow_read = cow;
++	print_bl_extent(ret);
++	return ret;
++}
++
++/* Similar to find_get_extent, but called with lock held, and ignores cow */
++static struct pnfs_block_extent *
++find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
++{
++	struct pnfs_block_extent *be, *ret = NULL;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret)
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				ret = be;
++				break;
++			}
++		}
++	}
++	print_bl_extent(ret);
++	return ret;
++}
++
++int
++encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			       struct xdr_stream *xdr,
++			       const struct pnfs_layoutcommit_arg *arg)
++{
++	sector_t start, end;
++	struct pnfs_block_short_extent *lce, *save;
++	unsigned int count = 0;
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct list_head *ranges = &bld->ranges;
++	__be32 *p, *xdr_start;
++
++	dprintk("%s enter\n", __func__);
++	start = arg->lseg.offset >> 9;
++	end = start + (arg->lseg.length >> 9);
++	dprintk("%s set start=%llu, end=%llu\n",
++		__func__, (u64)start, (u64)end);
++
++	/* BUG - creation of bl_commit is buggy - need to wait for
++	 * entire block to be marked WRITTEN before it can be added.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	/* Want to adjust for possible truncate */
++	/* We now want to adjust argument range */
++
++	/* XDR encode the ranges found */
++	xdr_start = xdr_reserve_space(xdr, 8);
++	if (!xdr_start)
++		goto out;
++	list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) {
++		p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data));
++		if (!p)
++			break;
++		WRITE_DEVID(&lce->bse_devid);
++		WRITE64(lce->bse_f_offset << 9);
++		WRITE64(lce->bse_length << 9);
++		WRITE64(0LL);
++		WRITE32(PNFS_BLOCK_READWRITE_DATA);
++		list_del(&lce->bse_node);
++		list_add_tail(&lce->bse_node, ranges);
++		bl->bl_count--;
++		count++;
++	}
++	xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
++	xdr_start[1] = cpu_to_be32(count);
++out:
++	spin_unlock(&bl->bl_ext_lock);
++	dprintk("%s found %i ranges\n", __func__, count);
++	return 0;
++}
++
++/* Helper function to set_to_rw that initialize a new extent */
++static void
++_prep_new_extent(struct pnfs_block_extent *new,
++		 struct pnfs_block_extent *orig,
++		 sector_t offset, sector_t length, int state)
++{
++	kref_init(&new->be_refcnt);
++	/* don't need to INIT_LIST_HEAD(&new->be_node) */
++	memcpy(&new->be_devid, &orig->be_devid, sizeof(struct pnfs_deviceid));
++	new->be_mdev = orig->be_mdev;
++	new->be_f_offset = offset;
++	new->be_length = length;
++	new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset;
++	new->be_state = state;
++	new->be_inval = orig->be_inval;
++}
++
++/* Tries to merge be with extent in front of it in list.
++ * Frees storage if not used.
++ */
++static struct pnfs_block_extent *
++_front_merge(struct pnfs_block_extent *be, struct list_head *head,
++	     struct pnfs_block_extent *storage)
++{
++	struct pnfs_block_extent *prev;
++
++	if (!storage)
++		goto no_merge;
++	if (&be->be_node == head || be->be_node.prev == head)
++		goto no_merge;
++	prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node);
++	if ((prev->be_f_offset + prev->be_length != be->be_f_offset) ||
++	    !extents_consistent(prev, be))
++		goto no_merge;
++	_prep_new_extent(storage, prev, prev->be_f_offset,
++			 prev->be_length + be->be_length, prev->be_state);
++	list_replace(&prev->be_node, &storage->be_node);
++	put_extent(prev);
++	list_del(&be->be_node);
++	put_extent(be);
++	return storage;
++
++ no_merge:
++	kfree(storage);
++	return be;
++}
++
++static u64
++set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length)
++{
++	u64 rv = offset + length;
++	struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old;
++	struct pnfs_block_extent *children[3];
++	struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL;
++	int i = 0, j;
++
++	dprintk("%s(%llu, %llu)\n", __func__, offset, length);
++	/* Create storage for up to three new extents e1, e2, e3 */
++	e1 = kmalloc(sizeof(*e1), GFP_KERNEL);
++	e2 = kmalloc(sizeof(*e2), GFP_KERNEL);
++	e3 = kmalloc(sizeof(*e3), GFP_KERNEL);
++	/* BUG - we are ignoring any failure */
++	if (!e1 || !e2 || !e3)
++		goto out_nosplit;
++
++	spin_lock(&bl->bl_ext_lock);
++	be = find_get_extent_locked(bl, offset);
++	rv = be->be_f_offset + be->be_length;
++	if (be->be_state != PNFS_BLOCK_INVALID_DATA) {
++		spin_unlock(&bl->bl_ext_lock);
++		goto out_nosplit;
++	}
++	/* Add e* to children, bumping e*'s krefs */
++	if (be->be_f_offset != offset) {
++		_prep_new_extent(e1, be, be->be_f_offset,
++				 offset - be->be_f_offset,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e1;
++		print_bl_extent(e1);
++	} else
++		merge1 = e1;
++	_prep_new_extent(e2, be, offset,
++			 min(length, be->be_f_offset + be->be_length - offset),
++			 PNFS_BLOCK_READWRITE_DATA);
++	children[i++] = e2;
++	print_bl_extent(e2);
++	if (offset + length < be->be_f_offset + be->be_length) {
++		_prep_new_extent(e3, be, e2->be_f_offset + e2->be_length,
++				 be->be_f_offset + be->be_length -
++				 offset - length,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e3;
++		print_bl_extent(e3);
++	} else
++		merge2 = e3;
++
++	/* Remove be from list, and insert the e* */
++	/* We don't get refs on e*, since this list is the base reference
++	 * set when init'ed.
++	 */
++	if (i < 3)
++		children[i] = NULL;
++	new = children[0];
++	list_replace(&be->be_node, &new->be_node);
++	put_extent(be);
++	new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1);
++	for (j = 1; j < i; j++) {
++		old = new;
++		new = children[j];
++		list_add(&new->be_node, &old->be_node);
++	}
++	if (merge2) {
++		/* This is a HACK, should just create a _back_merge function */
++		new = list_entry(new->be_node.next,
++				 struct pnfs_block_extent, be_node);
++		new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2);
++	}
++	spin_unlock(&bl->bl_ext_lock);
++
++	/* Since we removed the base reference above, be is now scheduled for
++	 * destruction.
++	 */
++	put_extent(be);
++	dprintk("%s returns %llu after split\n", __func__, rv);
++	return rv;
++
++ out_nosplit:
++	kfree(e1);
++	kfree(e2);
++	kfree(e3);
++	dprintk("%s returns %llu without splitting\n", __func__, rv);
++	return rv;
++}
++
++void
++clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			      const struct pnfs_layoutcommit_arg *arg,
++			      int status)
++{
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct pnfs_block_short_extent *lce, *save;
++
++	dprintk("%s status %d\n", __func__, status);
++	list_for_each_entry_safe_reverse(lce, save, &bld->ranges, bse_node) {
++		if (likely(!status)) {
++			u64 offset = lce->bse_f_offset;
++			u64 end = offset + lce->bse_length;
++
++			do {
++				offset = set_to_rw(bl, offset, end - offset);
++			} while (offset < end);
++
++			kfree(lce);
++		} else {
++			spin_lock(&bl->bl_ext_lock);
++			add_to_commitlist(bl, lce);
++			spin_unlock(&bl->bl_ext_lock);
++		}
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
+@@ -0,0 +1,6 @@
++#
++# Makefile for the pNFS block layout driver kernel module
++#
++obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
++blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o \
++			extents.o block-device-discovery-pipe.o
+diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
+--- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
+@@ -8,6 +8,8 @@
+ #ifndef __LINUX_FS_NFS_CALLBACK_H
+ #define __LINUX_FS_NFS_CALLBACK_H
+ 
++#include <linux/pnfs_xdr.h>
++
+ #define NFS4_CALLBACK 0x40000000
+ #define NFS4_CALLBACK_XDRSIZE 2048
+ #define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE)
+@@ -72,6 +74,8 @@ struct cb_recallargs {
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++#include <linux/pnfs_xdr.h>
++
+ struct referring_call {
+ 	uint32_t			rc_sequenceid;
+ 	uint32_t			rc_slotid;
+@@ -111,6 +115,13 @@ extern int nfs41_validate_delegation_sta
+ 
+ #define RCA4_TYPE_MASK_RDATA_DLG	0
+ #define RCA4_TYPE_MASK_WDATA_DLG	1
++#define RCA4_TYPE_MASK_DIR_DLG         2
++#define RCA4_TYPE_MASK_FILE_LAYOUT     3
++#define RCA4_TYPE_MASK_BLK_LAYOUT      4
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN  8
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+ 
+ struct cb_recallanyargs {
+ 	struct sockaddr	*craa_addr;
+@@ -127,6 +138,37 @@ struct cb_recallslotargs {
+ extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ 					  void *dummy);
+ 
++struct cb_pnfs_layoutrecallargs {
++	struct sockaddr		*cbl_addr;
++	struct nfs_fh		cbl_fh;
++	struct nfs4_pnfs_layout_segment cbl_seg;
++	struct nfs_fsid		cbl_fsid;
++	uint32_t		cbl_recall_type;
++	uint32_t		cbl_layout_type;
++	uint32_t		cbl_layoutchanged;
++	nfs4_stateid		cbl_stateid;
++};
++
++extern unsigned pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
++				     void *dummy);
++
++struct cb_pnfs_devicenotifyitem {
++	uint32_t		cbd_notify_type;
++	uint32_t		cbd_layout_type;
++	struct pnfs_deviceid	cbd_dev_id;
++	uint32_t		cbd_immediate;
++};
++
++/* XXX: Should be dynamic up to max compound size */
++#define NFS4_DEV_NOTIFY_MAXENTRIES 10
++struct cb_pnfs_devicenotifyargs {
++	struct sockaddr			*addr;
++	int				 ndevs;
++	struct cb_pnfs_devicenotifyitem	 devs[NFS4_DEV_NOTIFY_MAXENTRIES];
++};
++
++extern unsigned pnfs_cb_devicenotify(struct cb_pnfs_devicenotifyargs *args,
++				     void *dummy);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
+--- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
+@@ -8,10 +8,15 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/slab.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/writeback.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #ifdef NFS_DEBUG
+ #define NFSDBG_FACILITY NFSDBG_CALLBACK
+@@ -62,16 +67,6 @@ out:
+ 	return res->status;
+ }
+ 
+-static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
+-{
+-#if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion > 0)
+-		return nfs41_validate_delegation_stateid;
+-#endif
+-	return nfs4_validate_delegation_stateid;
+-}
+-
+-
+ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+@@ -92,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_re
+ 		inode = nfs_delegation_find_inode(clp, &args->fh);
+ 		if (inode != NULL) {
+ 			/* Set up a helper thread to actually return the delegation */
+-			switch (nfs_async_inode_return_delegation(inode, &args->stateid,
+-								  nfs_validate_delegation_stateid(clp))) {
++			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ 				case 0:
+ 					res = 0;
+ 					break;
+@@ -116,24 +110,364 @@ out:
+ 
+ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+-	if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
+-					 sizeof(delegation->stateid.data)) != 0)
++	if (delegation == NULL || memcmp(delegation->stateid.u.data,
++					 stateid->u.data,
++					 sizeof(delegation->stateid.u.data)))
+ 		return 0;
+ 	return 1;
+ }
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static bool
++pnfs_is_next_layout_stateid(const struct pnfs_layout_type *lo,
++			    const nfs4_stateid stateid)
++{
++	int seqlock;
++	bool res;
++	u32 oldseqid, newseqid;
++
++	do {
++		seqlock = read_seqbegin(&lo->seqlock);
++		oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
++		newseqid = be32_to_cpu(stateid.u.stateid.seqid);
++		res = !memcmp(lo->stateid.u.stateid.other,
++			      stateid.u.stateid.other,
++			      NFS4_STATEID_OTHER_SIZE);
++		if (res) { /* comparing layout stateids */
++			if (oldseqid == ~0)
++				res = (newseqid == 1);
++			else
++				res = (newseqid == oldseqid + 1);
++		} else { /* open stateid */
++			res = !memcmp(lo->stateid.u.data,
++				      &zero_stateid,
++				      NFS4_STATEID_SIZE);
++			if (res)
++				res = (newseqid == 1);
++		}
++	} while (read_seqretry(&lo->seqlock, seqlock));
++
++	return res;
++}
++
++/*
++ * Retrieve an inode based on layout recall parameters
++ *
++ * Note: caller must iput(inode) to dereference the inode.
++ */
++static struct inode *
++nfs_layoutrecall_find_inode(struct nfs_client *clp,
++			    const struct cb_pnfs_layoutrecallargs *args)
++{
++	struct nfs_inode *nfsi;
++	struct pnfs_layout_type *layout;
++	struct nfs_server *server;
++	struct inode *ino = NULL;
++
++	dprintk("%s: Begin recall_type=%d clp %p\n",
++		__func__, args->cbl_recall_type, clp);
++
++	spin_lock(&clp->cl_lock);
++	list_for_each_entry(layout, &clp->cl_layouts, lo_layouts) {
++		nfsi = PNFS_NFS_INODE(layout);
++		if (!nfsi)
++			continue;
++
++		dprintk("%s: Searching inode=%lu\n",
++			__func__, nfsi->vfs_inode.i_ino);
++
++		if (args->cbl_recall_type == RETURN_FILE) {
++		    if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
++			continue;
++		} else if (args->cbl_recall_type == RETURN_FSID) {
++			server = NFS_SERVER(&nfsi->vfs_inode);
++			if (server->fsid.major != args->cbl_fsid.major ||
++			    server->fsid.minor != args->cbl_fsid.minor)
++				continue;
++		}
++
++		/* Make sure client didn't clean up layout without
++		 * telling the server */
++		if (!has_layout(nfsi))
++			continue;
++
++		ino = igrab(&nfsi->vfs_inode);
++		dprintk("%s: Found inode=%p\n", __func__, ino);
++		break;
++	}
++	spin_unlock(&clp->cl_lock);
++	return ino;
++}
++
++struct recall_layout_threadargs {
++	struct inode *inode;
++	struct nfs_client *clp;
++	struct completion started;
++	struct cb_pnfs_layoutrecallargs *rl;
++	int result;
++};
++
++static int pnfs_recall_layout(void *data)
++{
++	struct inode *inode, *ino;
++	struct nfs_client *clp;
++	struct cb_pnfs_layoutrecallargs rl;
++	struct nfs4_pnfs_layoutreturn *lrp;
++	struct recall_layout_threadargs *args =
++		(struct recall_layout_threadargs *)data;
++	int status = 0;
++
++	daemonize("nfsv4-layoutreturn");
++
++	dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
++		__func__, args->rl->cbl_recall_type,
++		args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
++
++	clp = args->clp;
++	inode = args->inode;
++	rl = *args->rl;
++
++	/* support whole file layouts only */
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	if (rl.cbl_recall_type == RETURN_FILE) {
++		if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
++						rl.cbl_stateid))
++			status = pnfs_return_layout(inode, &rl.cbl_seg,
++						    &rl.cbl_stateid, RETURN_FILE,
++						    false);
++		else
++			status = cpu_to_be32(NFS4ERR_DELAY);
++		if (status)
++			dprintk("%s RETURN_FILE error: %d\n", __func__, status);
++		else
++			status =  cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++		args->result = status;
++		complete(&args->started);
++		goto out;
++	}
++
++	status = cpu_to_be32(NFS4_OK);
++	args->result = status;
++	complete(&args->started);
++	args = NULL;
++
++	/* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
++	while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
++		/* FIXME: need to check status on pnfs_return_layout */
++		pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
++		iput(ino);
++	}
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (!lrp) {
++		dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
++			__func__);
++		goto out;
++	}
++
++	/* send final layoutreturn */
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = rl.cbl_layout_type;
++	lrp->args.return_type = rl.cbl_recall_type;
++	lrp->args.lseg = rl.cbl_seg;
++	lrp->args.inode = inode;
++	pnfs4_proc_layoutreturn(lrp, true);
++
++out:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	nfs_put_client(clp);
++	module_put_and_exit(0);
++	dprintk("%s: exit status %d\n", __func__, 0);
++	return 0;
++}
++
++/*
++ * Asynchronous layout recall!
++ */
++static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
++				    struct cb_pnfs_layoutrecallargs *rl)
++{
++	struct recall_layout_threadargs data = {
++		.clp = clp,
++		.inode = inode,
++		.rl = rl,
++	};
++	struct task_struct *t;
++	int status = -EAGAIN;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* FIXME: do not allow two concurrent layout recalls */
++	if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
++		return status;
++
++	init_completion(&data.started);
++	__module_get(THIS_MODULE);
++	if (!atomic_inc_not_zero(&clp->cl_count))
++		goto out_put_no_client;
++
++	t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
++	if (IS_ERR(t)) {
++		printk(KERN_INFO "NFS: Layout recall callback thread failed "
++			"for client (clientid %08x/%08x)\n",
++			(unsigned)(clp->cl_clientid >> 32),
++			(unsigned)(clp->cl_clientid));
++		status = PTR_ERR(t);
++		goto out_module_put;
++	}
++	wait_for_completion(&data.started);
++	return data.result;
++out_module_put:
++	nfs_put_client(clp);
++out_put_no_client:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	module_put(THIS_MODULE);
++	return status;
++}
++
++static int pnfs_recall_all_layouts(struct nfs_client *clp)
++{
++	struct cb_pnfs_layoutrecallargs rl;
++	struct inode *inode;
++	int status = 0;
++
++	rl.cbl_recall_type = RETURN_ALL;
++	rl.cbl_seg.iomode = IOMODE_ANY;
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	/* we need the inode to get the nfs_server struct */
++	inode = nfs_layoutrecall_find_inode(clp, &rl);
++	if (!inode)
++		return status;
++	status = pnfs_async_return_layout(clp, inode, &rl);
++	iput(inode);
++
++	return status;
++}
++
++__be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
++			    void *dummy)
++{
++	struct nfs_client *clp;
++	struct inode *inode = NULL;
++	__be32 res;
++	int status;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
++	clp  = nfs_find_client(args->cbl_addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->cbl_addr));
++		goto out;
++	}
++
++	res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		/* the callback must come from the MDS personality */
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
++			goto loop;
++		if (args->cbl_recall_type == RETURN_FILE) {
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (inode != NULL) {
++				status = pnfs_async_return_layout(clp, inode,
++								  args);
++				if (status)
++					res = cpu_to_be32(NFS4ERR_DELAY);
++				iput(inode);
++			}
++		} else { /* _ALL or _FSID */
++			/* we need the inode to get the nfs_server struct */
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (!inode)
++				goto loop;
++			status = pnfs_async_return_layout(clp, inode, args);
++			if (status)
++				res = cpu_to_be32(NFS4ERR_DELAY);
++			iput(inode);
++		}
++loop:
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
++/* Remove the deviceid(s) from the nfs_client deviceid cache */
++static __be32 pnfs_devicenotify_client(struct nfs_client *clp,
++				       struct cb_pnfs_devicenotifyargs *args)
++{
++	uint32_t type;
++	int i;
++
++	dprintk("%s: --> clp %p\n", __func__, clp);
++
++	for (i = 0; i < args->ndevs; i++) {
++		struct cb_pnfs_devicenotifyitem *dev = &args->devs[i];
++		type = dev->cbd_notify_type;
++		if (type == NOTIFY_DEVICEID4_DELETE && clp->cl_devid_cache)
++			nfs4_delete_device(clp->cl_devid_cache,
++					   &dev->cbd_dev_id);
++		else if (type == NOTIFY_DEVICEID4_CHANGE)
++			printk(KERN_ERR "%s: NOTIFY_DEVICEID4_CHANGE "
++					"not supported\n", __func__);
++	}
++	return 0;
++}
++
++__be32 pnfs_cb_devicenotify(struct cb_pnfs_devicenotifyargs *args,
++			    void *dummy)
++{
++	struct nfs_client *clp;
++	__be32 res = 0;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = __constant_htonl(NFS4ERR_INVAL);
++	clp = nfs_find_client(args->addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->addr));
++		goto out;
++	}
++
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		res = pnfs_devicenotify_client(clp, args);
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
+ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+ 	if (delegation == NULL)
+ 		return 0;
+ 
+-	/* seqid is 4-bytes long */
+-	if (((u32 *) &stateid->data)[0] != 0)
++	if (stateid->u.stateid.seqid != 0)
+ 		return 0;
+-	if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
+-		   sizeof(stateid->data)-4))
++	if (memcmp(&delegation->stateid.u.stateid.other,
++		   &stateid->u.stateid.other,
++		   NFS4_STATEID_OTHER_SIZE))
+ 		return 0;
+ 
+ 	return 1;
+@@ -335,13 +669,37 @@ out:
+ 	return status;
+ }
+ 
++static inline bool
++validate_bitmap_values(const unsigned long *mask)
++{
++	int i;
++
++	if (*mask == 0)
++		return true;
++	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
++	    test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
++		return true;
++	for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	return false;
++}
++
+ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+ 	__be32 status;
+ 	fmode_t flags = 0;
+ 
+-	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
++	status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ 	clp = nfs_find_client(args->craa_addr, 4);
+ 	if (clp == NULL)
+ 		goto out;
+@@ -349,16 +707,25 @@ __be32 nfs4_callback_recallany(struct cb
+ 	dprintk("NFS: RECALL_ANY callback request from %s\n",
+ 		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ 
++	status = cpu_to_be32(NFS4ERR_INVAL);
++	if (!validate_bitmap_values((const unsigned long *)
++				    &args->craa_type_mask))
++		return status;
++
++	status = cpu_to_be32(NFS4_OK);
+ 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags = FMODE_READ;
+ 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags |= FMODE_WRITE;
++	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
++		     &args->craa_type_mask))
++		if (pnfs_recall_all_layouts(clp) == -EAGAIN)
++			status = cpu_to_be32(NFS4ERR_DELAY);
+ 
+ 	if (flags)
+ 		nfs_expire_all_delegation_types(clp, flags);
+-	status = htonl(NFS4_OK);
+ out:
+ 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ 	return status;
+diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
+--- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
+@@ -22,6 +22,8 @@
+ #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ 
+ #if defined(CONFIG_NFS_V4_1)
++#define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
++#define CB_OP_DEVICENOTIFY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+ 					4 + 1 + 3)
+ #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+@@ -136,7 +138,7 @@ static __be32 decode_stateid(struct xdr_
+ 	p = read_buf(xdr, 16);
+ 	if (unlikely(p == NULL))
+ 		return htonl(NFS4ERR_RESOURCE);
+-	memcpy(stateid->data, p, 16);
++	memcpy(stateid->u.data, p, 16);
+ 	return 0;
+ }
+ 
+@@ -220,6 +222,148 @@ out:
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static __be32 decode_pnfs_layoutrecall_args(struct svc_rqst *rqstp,
++					    struct xdr_stream *xdr,
++					    struct cb_pnfs_layoutrecallargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++
++	args->cbl_addr = svc_addr(rqstp);
++	p = read_buf(xdr, 4 * sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_BADXDR);
++		goto out;
++	}
++
++	args->cbl_layout_type = ntohl(*p++);
++	args->cbl_seg.iomode = ntohl(*p++);
++	args->cbl_layoutchanged = ntohl(*p++);
++	args->cbl_recall_type = ntohl(*p++);
++
++	if (likely(args->cbl_recall_type == RETURN_FILE)) {
++		status = decode_fh(xdr, &args->cbl_fh);
++		if (unlikely(status != 0))
++			goto out;
++
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_seg.offset);
++		p = xdr_decode_hyper(p, &args->cbl_seg.length);
++		status = decode_stateid(xdr, &args->cbl_stateid);
++		if (unlikely(status != 0))
++			goto out;
++	} else if (args->cbl_recall_type == RETURN_FSID) {
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
++		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
++	}
++	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d "
++		"fsid %llx-%llx fhsize %d\n", __func__,
++		args->cbl_layout_type, args->cbl_seg.iomode,
++		args->cbl_layoutchanged, args->cbl_recall_type,
++		args->cbl_fsid.major, args->cbl_fsid.minor,
++		args->cbl_fh.size);
++out:
++	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
++	return status;
++}
++
++static
++__be32 decode_pnfs_devicenotify_args(struct svc_rqst *rqstp,
++				     struct xdr_stream *xdr,
++				     struct cb_pnfs_devicenotifyargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++	u32 tmp;
++	int n, i;
++	args->ndevs = 0;
++
++	args->addr = svc_addr(rqstp);
++
++	/* Num of device notifications */
++	p = read_buf(xdr, sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_RESOURCE);
++		goto out;
++	}
++	n = ntohl(*p++);
++	if (n <= 0)
++		goto out;
++
++	/* XXX: need to possibly return error in this case */
++	if (n > NFS4_DEV_NOTIFY_MAXENTRIES) {
++		dprintk("%s: Processing (%d) notifications out of (%d)\n",
++			__func__, NFS4_DEV_NOTIFY_MAXENTRIES, n);
++		n = NFS4_DEV_NOTIFY_MAXENTRIES;
++	}
++
++	/* Decode each dev notification */
++	for (i = 0; i < n; i++) {
++		struct cb_pnfs_devicenotifyitem *dev = &args->devs[i];
++
++		p = read_buf(xdr, (4 * sizeof(uint32_t))
++			     + NFS4_PNFS_DEVICEID4_SIZE);
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_RESOURCE);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* bitmap size */
++		if (tmp != 1) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_notify_type = ntohl(*p++);
++		if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		    dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* opaque size */
++		if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 8)) ||
++		    ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 4))) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_layout_type = ntohl(*p++);
++		memcpy(dev->cbd_dev_id.data, p, NFS4_PNFS_DEVICEID4_SIZE);
++		p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++
++		if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
++			p = read_buf(xdr, sizeof(uint32_t));
++			if (unlikely(p == NULL)) {
++				status = htonl(NFS4ERR_DELAY);
++				goto out;
++			}
++			dev->cbd_immediate = ntohl(*p++);
++		} else {
++			dev->cbd_immediate = 0;
++		}
++
++		args->ndevs++;
++
++		dprintk("%s: type %d layout 0x%x immediate %d\n",
++			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
++			dev->cbd_immediate);
++	}
++out:
++	dprintk("%s: status %d ndevs %d\n",
++		__func__, ntohl(status), args->ndevs);
++	return status;
++}
++
+ static __be32 decode_sessionid(struct xdr_stream *xdr,
+ 				 struct nfs4_sessionid *sid)
+ {
+@@ -574,11 +718,11 @@ preprocess_nfs41_op(int nop, unsigned in
+ 	case OP_CB_SEQUENCE:
+ 	case OP_CB_RECALL_ANY:
+ 	case OP_CB_RECALL_SLOT:
++	case OP_CB_LAYOUTRECALL:
++	case OP_CB_NOTIFY_DEVICEID:
+ 		*op = &callback_ops[op_nr];
+ 		break;
+ 
+-	case OP_CB_LAYOUTRECALL:
+-	case OP_CB_NOTIFY_DEVICEID:
+ 	case OP_CB_NOTIFY:
+ 	case OP_CB_PUSH_DELEG:
+ 	case OP_CB_RECALLABLE_OBJ_AVAIL:
+@@ -739,6 +883,18 @@ static struct callback_op callback_ops[]
+ 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+ 	},
+ #if defined(CONFIG_NFS_V4_1)
++	[OP_CB_LAYOUTRECALL] = {
++		.process_op = (callback_process_op_t)pnfs_cb_layoutrecall,
++		.decode_args =
++			(callback_decode_arg_t)decode_pnfs_layoutrecall_args,
++		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
++	},
++	[OP_CB_NOTIFY_DEVICEID] = {
++		.process_op = (callback_process_op_t)pnfs_cb_devicenotify,
++		.decode_args =
++			(callback_decode_arg_t)decode_pnfs_devicenotify_args,
++		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
++	},
+ 	[OP_CB_SEQUENCE] = {
+ 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
+ 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
+@@ -39,6 +39,7 @@
+ #include <net/ipv6.h>
+ #include <linux/nfs_xdr.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -48,6 +49,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_CLIENT
+ 
+@@ -150,11 +152,14 @@ static struct nfs_client *nfs_alloc_clie
+ 	clp->cl_boot_time = CURRENT_TIME;
+ 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+ 	clp->cl_minorversion = cl_init->minorversion;
++	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ #endif
+ 	cred = rpc_lookup_machine_cred();
+ 	if (!IS_ERR(cred))
+ 		clp->cl_machine_cred = cred;
+-
++#if defined(CONFIG_NFS_V4_1)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++#endif
+ 	nfs_fscache_get_client_cookie(clp);
+ 
+ 	return clp;
+@@ -178,7 +183,7 @@ static void nfs4_clear_client_minor_vers
+ 		clp->cl_session = NULL;
+ 	}
+ 
+-	clp->cl_call_sync = _nfs4_call_sync;
++	clp->cl_mvops = nfs_v4_minor_ops[0];
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
+@@ -188,7 +193,7 @@ static void nfs4_clear_client_minor_vers
+ static void nfs4_destroy_callback(struct nfs_client *clp)
+ {
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+-		nfs_callback_down(clp->cl_minorversion);
++		nfs_callback_down(clp->cl_mvops->minor_version);
+ }
+ 
+ static void nfs4_shutdown_client(struct nfs_client *clp)
+@@ -251,6 +256,7 @@ void nfs_put_client(struct nfs_client *c
+ 		nfs_free_client(clp);
+ 	}
+ }
++EXPORT_SYMBOL(nfs_put_client);
+ 
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ /*
+@@ -343,7 +349,7 @@ static int nfs_sockaddr_match_ipaddr(con
+  * Test if two socket addresses represent the same actual socket,
+  * by comparing (only) relevant fields, including the port number.
+  */
+-static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ 			    const struct sockaddr *sa2)
+ {
+ 	if (sa1->sa_family != sa2->sa_family)
+@@ -357,6 +363,7 @@ static int nfs_sockaddr_cmp(const struct
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs_sockaddr_cmp);
+ 
+ /*
+  * Find a client by IP address and protocol version
+@@ -548,6 +555,7 @@ int nfs4_check_client_ready(struct nfs_c
+ 		return -EPROTONOSUPPORT;
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs4_check_client_ready);
+ 
+ /*
+  * Initialise the timeout values for a connection
+@@ -865,9 +873,34 @@ error:
+ }
+ 
+ /*
++ * Initialize the pNFS layout driver and setup pNFS related parameters
++ */
++static void nfs4_init_pnfs(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
++{
++#if defined(CONFIG_NFS_V4_1)
++	struct nfs_client *clp = server->nfs_client;
++
++	if (nfs4_has_session(clp) &&
++	    (clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
++		server->pnfs_blksize = fsinfo->blksize;
++		set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype);
++		pnfs_set_ds_iosize(server);
++	}
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++static void nfs4_uninit_pnfs(struct nfs_server *server)
++{
++#if defined(CONFIG_NFS_V4_1)
++	if (server->nfs_client && nfs4_has_session(server->nfs_client))
++		unmount_pnfs_layoutdriver(server);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++/*
+  * Load up the server record from information gained in an fsinfo record
+  */
+-static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
++static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
+ {
+ 	unsigned long max_rpc_payload;
+ 
+@@ -897,6 +930,8 @@ static void nfs_server_set_fsinfo(struct
+ 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ 		server->wsize = NFS_MAX_FILE_IO_SIZE;
+ 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++	nfs4_init_pnfs(server, mntfh, fsinfo);
++
+ 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+ 
+ 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+@@ -938,7 +973,7 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	if (error < 0)
+ 		goto out_error;
+ 
+-	nfs_server_set_fsinfo(server, &fsinfo);
++	nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ 
+ 	/* Get some general file system info */
+ 	if (server->namelen == 0) {
+@@ -1016,6 +1051,7 @@ void nfs_free_server(struct nfs_server *
+ {
+ 	dprintk("--> nfs_free_server()\n");
+ 
++	nfs4_uninit_pnfs(server);
+ 	spin_lock(&nfs_client_lock);
+ 	list_del(&server->client_link);
+ 	list_del(&server->master_link);
+@@ -1126,7 +1162,7 @@ static int nfs4_init_callback(struct nfs
+ 				return error;
+ 		}
+ 
+-		error = nfs_callback_up(clp->cl_minorversion,
++		error = nfs_callback_up(clp->cl_mvops->minor_version,
+ 					clp->cl_rpcclient->cl_xprt);
+ 		if (error < 0) {
+ 			dprintk("%s: failed to start callback. Error = %d\n",
+@@ -1143,10 +1179,8 @@ static int nfs4_init_callback(struct nfs
+  */
+ static int nfs4_init_client_minor_version(struct nfs_client *clp)
+ {
+-	clp->cl_call_sync = _nfs4_call_sync;
+-
+ #if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion) {
++	if (clp->cl_mvops->minor_version) {
+ 		struct nfs4_session *session = NULL;
+ 		/*
+ 		 * Create the session and mark it expired.
+@@ -1158,7 +1192,13 @@ static int nfs4_init_client_minor_versio
+ 			return -ENOMEM;
+ 
+ 		clp->cl_session = session;
+-		clp->cl_call_sync = _nfs4_call_sync_session;
++		/*
++		 * The create session reply races with the server back
++		 * channel probe. Mark the client NFS_CS_SESSION_INITING
++		 * so that the client back channel can find the
++		 * nfs_client struct
++		 */
++		clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+@@ -1216,7 +1256,7 @@ error:
+ /*
+  * Set up an NFS4 client
+  */
+-static int nfs4_set_client(struct nfs_server *server,
++int nfs4_set_client(struct nfs_server *server,
+ 		const char *hostname,
+ 		const struct sockaddr *addr,
+ 		const size_t addrlen,
+@@ -1259,6 +1299,7 @@ error:
+ 	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ 	return error;
+ }
++EXPORT_SYMBOL(nfs4_set_client);
+ 
+ 
+ /*
+@@ -1448,7 +1489,7 @@ struct nfs_server *nfs4_create_referral_
+ 				data->authflavor,
+ 				parent_server->client->cl_xprt->prot,
+ 				parent_server->client->cl_timeout,
+-				parent_client->cl_minorversion);
++				parent_client->cl_mvops->minor_version);
+ 	if (error < 0)
+ 		goto error;
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
+@@ -0,0 +1,292 @@
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/sched.h>
++#include <linux/exportfs.h>
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/workqueue.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/nfs_fs.h>
++
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd4_block.h>
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++static ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++    char __user *, size_t);
++static ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
++static void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops bl_upcall_ops = {
++	.upcall		= bl_pipe_upcall,
++	.downcall	= bl_pipe_downcall,
++	.destroy_msg	= bl_pipe_destroy_msg,
++};
++
++bl_comm_t	*bl_comm_global;
++
++int
++nfsd_bl_start(void)
++{
++	bl_comm_t	*bl_comm = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	dprintk("%s: starting pipe\n", __func__);
++	if (bl_comm_global)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	bl_comm = kzalloc(sizeof (*bl_comm), GFP_KERNEL);
++	if (!bl_comm) {
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	/* FIXME: rename to "spnfs_block" */
++	bl_comm->pipe_dentry = rpc_mkpipe(nd.path.dentry, "pnfs_block", bl_comm,
++					 &bl_upcall_ops, 0);
++	if (IS_ERR(bl_comm->pipe_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++	mutex_init(&bl_comm->lock);
++	mutex_init(&bl_comm->pipe_lock);
++	init_waitqueue_head(&bl_comm->pipe_wq);
++
++	bl_comm_global = bl_comm;
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(bl_comm);
++	return rc;
++}
++
++void
++nfsd_bl_stop(void)
++{
++	bl_comm_t	*c = bl_comm_global;
++
++	dprintk("%s: stopping pipe\n", __func__);
++	if (!c)
++		return;
++	rpc_unlink(c->pipe_dentry);
++	rpc_put_mount();
++	bl_comm_global = NULL;
++	kfree(c);
++}
++
++static ssize_t
++bl_pipe_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *dst,
++    size_t buflen)
++{
++	char	*data	= (char *)msg->data + msg->copied;
++	ssize_t	mlen	= msg->len - msg->copied,
++		left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen		-= left;
++	msg->copied	+= mlen;
++	msg->errno	= 0;
++
++	return mlen;
++}
++
++static ssize_t
++bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode	*rpci	= RPC_I(filp->f_dentry->d_inode);
++	bl_comm_t		*bc	= (bl_comm_t *)rpci->private;
++	bl_comm_msg_t		*im	= &bc->msg;
++	int			ret;
++	bl_comm_res_t		*res;
++	
++
++	if (mlen == 0) {
++		im->msg_status = PNFS_BLOCK_FAILURE;
++		im->msg_res = NULL;
++		wake_up(&bc->pipe_wq);
++		return -EFAULT;
++	}
++	
++	if ((res = kmalloc(mlen, GFP_KERNEL)) == NULL)
++		return -ENOMEM;
++	
++	if (copy_from_user(res, src, mlen)) {
++		kfree(res);
++		return -EFAULT;
++	}
++	
++	mutex_lock(&bc->pipe_lock);
++	
++	ret		= mlen;
++	im->msg_status	= res->res_status;
++	im->msg_res	= res;
++	
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++	return ret;
++}
++
++static void
++bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	bl_comm_msg_t	*im = msg->data;
++	bl_comm_t	*bc = container_of(im, struct bl_comm, msg);
++	
++	if (msg->errno >= 0)
++		return;
++
++	mutex_lock(&bc->pipe_lock);
++	im->msg_status = PNFS_BLOCK_FAILURE;
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++}
++
++int
++bl_upcall(bl_comm_t *bc, bl_comm_msg_t *upmsg, bl_comm_res_t **res)
++{
++	struct rpc_pipe_msg	msg;
++	DECLARE_WAITQUEUE(wq, current);
++	int			rval	= 1;
++	bl_comm_msg_t		*m	= &bc->msg;
++	
++	if (bc == NULL) {
++		dprintk("%s: No pNFS block daemon available\n", __func__);
++		return 1;
++	}
++	
++	mutex_lock(&bc->lock);
++	mutex_lock(&bc->pipe_lock);
++	
++	memcpy(m, upmsg, sizeof (*m));
++	
++	memset(&msg, 0, sizeof (msg));
++	msg.data = m;
++	msg.len = sizeof (*m);
++	
++	add_wait_queue(&bc->pipe_wq, &wq);
++	rval = rpc_queue_upcall(bc->pipe_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&bc->pipe_wq, &wq);
++		goto out;
++	}
++	
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&bc->pipe_lock);
++	schedule();
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(&bc->pipe_wq, &wq);
++	mutex_lock(&bc->pipe_lock);
++	
++	if (m->msg_status == PNFS_BLOCK_SUCCESS) {
++		*res = m->msg_res;
++		rval = 0;
++	} else
++		rval = 1;
++	
++out:
++	mutex_unlock(&bc->pipe_lock);
++	mutex_unlock(&bc->lock);
++	return rval;
++}
++
++static ssize_t ctl_write(struct file *file, const char __user *buf, size_t len,
++    loff_t *offset)
++{
++	int		cmd,
++			rc;
++	bl_comm_t	*bc	= bl_comm_global;
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof (int)))
++		return -EFAULT;
++	switch (cmd) {
++	case PNFS_BLOCK_CTL_STOP:
++		msg.msg_type = PNFS_UPCALL_MSG_STOP;
++		(void) bl_upcall(bc, &msg, &res);
++		kfree(res);
++		nfsd_bl_stop();
++		break;
++		
++	case PNFS_BLOCK_CTL_START:
++		rc = nfsd_bl_start();
++		if (rc != 0)
++			return rc;
++		break;
++		
++	case PNFS_BLOCK_CTL_VERS:
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bc, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++			    __func__);
++			return 0;
++		}
++		kfree(res);
++		break;
++		
++	default:
++		dprintk("%s: unknown ctl command %d\n", __func__, cmd);
++		break;
++	}
++	return len;
++}
++
++static struct file_operations ctl_ops = {
++	.write	= ctl_write,
++};
++
++/*
++ * bl_init_proc -- set up proc interfaces
++ *
++ * Creating a pnfs_block directory isn't really required at this point
++ * since we've only got a single node in that directory. If the need for
++ * more nodes doesn't present itself shortly this code should revert
++ * to a single top level node. McNeal 11-Aug-2008.
++ */
++int
++bl_init_proc(void)
++{
++	struct proc_dir_entry *e;
++
++	e = proc_mkdir("fs/pnfs_block", NULL);
++	if (!e)
++		return -ENOMEM;
++
++	e = create_proc_entry("fs/pnfs_block/ctl", 0, NULL);
++	if (!e)
++		return -ENOMEM;
++	e->proc_fops = &ctl_ops;
++
++	return 0;
++}
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
+@@ -0,0 +1,1672 @@
++/*
++ *  bl_ops.c
++ *  spNFS
++ *
++ *  Created by Rick McNeal on 4/1/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++
++/*
++ * Block layout operations.
++ *
++ * These functions, with the exception of pnfs_block_enabled, are assigned to
++ * the super block s_export_op structure.
++ */
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/genhd.h>
++#include <linux/fs.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd/export.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/spinlock_types.h>
++#include <linux/dm-ioctl.h>
++#include <asm/uaccess.h>
++#include <linux/falloc.h>
++#include <linux/nfsd4_block.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY	NFSDDBG_PNFS
++
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++
++#define BL_LAYOUT_HASH_BITS	4
++#define BL_LAYOUT_HASH_SIZE	(1 << BL_LAYOUT_HASH_BITS)
++#define BL_LAYOUT_HASH_MASK	(BL_LAYOUT_HASH_SIZE - 1)
++#define BL_LIST_REQ	(sizeof (struct dm_ioctl) + 256)
++
++#define bl_layout_hashval(id) \
++	((id) & BL_LAYOUT_HASH_MASK)
++
++#define BLL_F_END(p) ((p)->bll_foff + (p)->bll_len)
++#define BLL_S_END(p) ((p)->bll_soff + (p)->bll_len)
++#define _2SECTS(v) ((v) >> 9)
++
++#ifndef READ32
++#define READ32(x)	(x) = ntohl(*p++)
++#define READ64(x)	do {			\
++(x) = (u64)ntohl(*p++) << 32;	\
++(x) |= ntohl(*p++);		\
++} while (0)
++#endif
++
++
++typedef enum {True, False} boolean_t;
++/* ---- block layoutget and commit structure ---- */
++typedef struct bl_layout_rec {
++	struct list_head	blr_hash,
++				blr_layouts;
++	dev_t			blr_rdev;
++	struct inode		*blr_inode;
++	int			blr_recalled;	// debug
++	u64			blr_orig_size,
++				blr_commit_size,
++				blr_ext_size;
++	spinlock_t		blr_lock;	// Protects blr_layouts
++} bl_layout_rec_t;
++
++static struct list_head layout_hash;
++static struct list_head layout_hashtbl[BL_LAYOUT_HASH_SIZE];
++static spinlock_t layout_hashtbl_lock;
++
++/* ---- prototypes ---- */
++static boolean_t device_slice(dev_t devid);
++static boolean_t device_dm(dev_t devid);
++static boolean_t layout_inode_add(struct inode *i, bl_layout_rec_t **);
++static bl_layout_rec_t *layout_inode_find(struct inode *i);
++static void layout_inode_del(struct inode *i);
++static char *map_state2name(enum pnfs_block_extent_state4 s);
++static pnfs_blocklayout_devinfo_t *bld_alloc(struct list_head *volume, int type);
++static void bld_free(pnfs_blocklayout_devinfo_t *bld);
++static pnfs_blocklayout_devinfo_t *bld_simple(struct list_head *volumes,
++    dev_t devid, int local_index);
++static pnfs_blocklayout_devinfo_t *bld_slice(struct list_head *volumes,
++    dev_t devid, int my_loc, int idx);
++static int layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg);
++struct list_head *layout_cache_iter(bl_layout_rec_t *r,
++    struct list_head *bl_possible, struct nfsd4_layout_seg *seg);
++static void layout_cache_merge(bl_layout_rec_t *r, struct list_head *h);
++static int layout_cache_update(bl_layout_rec_t *r, struct list_head *h);
++static void layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg);
++static void print_bll(pnfs_blocklayout_layout_t *b, char *);
++static inline boolean_t layout_cache_fill_from_list(bl_layout_rec_t *r,
++    struct list_head *h, struct nfsd4_layout_seg *seg);
++static inline void bll_collapse(bl_layout_rec_t *r,
++    pnfs_blocklayout_layout_t *c);
++static pnfs_blocklayout_layout_t *bll_alloc(u64 offset, u64 len,
++    enum bl_cache_state state, struct list_head *h);
++static pnfs_blocklayout_layout_t *bll_alloc_dup(pnfs_blocklayout_layout_t *b,
++    enum bl_cache_state c, struct list_head *h);
++static inline boolean_t layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++    enum pnfs_block_extent_state4 *s);
++static void extents_setup(struct fiemap_extent_info *fei);
++static void extents_count(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_get(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_process(struct fiemap_extent_info *fei,
++    struct list_head *bl_candidates, struct nfsd4_layout_seg *, dev_t dev,
++    pnfs_blocklayout_layout_t *b);
++static void extents_cleanup(struct fiemap_extent_info *fei);
++
++void
++nfsd_bl_init(void)
++{
++	int	i;
++	dprintk("%s loaded\n", __func__);
++
++	spin_lock_init(&layout_hashtbl_lock);
++	INIT_LIST_HEAD(&layout_hash);
++	for (i = 0; i < BL_LAYOUT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&layout_hashtbl[i]);
++	bl_init_proc();
++}
++
++/*
++ * pnfs_block_enabled -- check to see if this file system should be export as
++ * block pnfs
++ */
++int
++pnfs_block_enabled(struct inode *inode, int ex_flags)
++{
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res	= NULL;
++	static int bl_comm_once	= 0;
++	
++	dprintk("--> %s\n", __func__);
++	/*
++	 * FIXME: Figure out method to determine if this file system should
++	 * be exported. The following areas need to be checked.
++	 * (1) Validate that this file system was exported as a pNFS
++	 *     block-layout
++	 * (2) Has there been successful communication with the
++	 *     volume daemon?
++	 */
++	/* Check #1 */
++#ifdef notyet
++	if (!(ex_flags & NFSEXP_PNFS_BLOCK)) {
++		dprintk("%s: pnfs_block not set in export\n", __func__);
++		return 0;
++	}
++#endif
++	
++	/* Check #1 */
++	if (!bl_comm_once) {
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bl_comm_global, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++				__func__);
++			return 0;
++		}
++		if (msg.u.msg_vers != res->u.vers) {
++			dprintk("%s: vers mismatch, kernel != daemon\n",
++				__func__);
++			kfree(res);
++			return 0;
++		}
++	}
++	bl_comm_once = 1;
++
++	kfree(res);
++	
++	dprintk("<-- %s okay\n", __func__);
++	return 1;
++}
++
++int
++bl_layout_type(struct super_block *sb)
++{
++	return LAYOUT_BLOCK_VOLUME;
++}
++
++int
++bl_getdeviceiter(struct super_block *sb,
++		 u32 layout_type,
++		 struct nfsd4_pnfs_dev_iter_res *res)
++{
++	res->gd_eof = 1;	
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_devid	= sb->s_dev;
++	res->gd_verf	= 1;
++	res->gd_cookie	= 1;
++	return 0;
++}
++
++static int
++bl_getdeviceinfo_slice(struct super_block *sb, struct exp_xdr_stream *xdr,
++		       const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld_slice_p,
++					*bld_simple_p,
++					*bld;
++	int				status		= -EIO,
++					location	= 0;
++	struct list_head		volumes;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++
++	bld_simple_p = bld_simple(&volumes, devid->devid,
++				  location++);
++	if (!bld_simple_p)
++		goto out;
++	bld_slice_p = bld_slice(&volumes, devid->devid, location++,
++	    bld_simple_p->bld_index_loc);
++
++	if (!bld_slice_p)
++		goto out;
++	
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		if (bld->bld_type == PNFS_BLOCK_VOLUME_SIMPLE)
++			kfree(bld->u.simple.bld_sig);
++		bld_free(bld);
++	}
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++static int
++bl_getdeviceinfo_dm(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld		= NULL;
++	int				status		= -EIO,	// default to error
++					i,
++					location	= 0;
++	struct list_head		volumes;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++	
++	msg.msg_type = PNFS_UPCALL_MSG_DMGET;
++	msg.u.msg_dev = devid->devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: upcall for DMGET failed\n", __func__);
++		goto out;
++	}
++		
++	/*
++	 * Don't use bld_alloc() here. If used this will be the first volume
++	 * type added to the list whereas the protocol requires it to be the
++	 * last.
++	 */
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		goto out;
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type			= PNFS_BLOCK_VOLUME_STRIPE;
++	bld->u.stripe.bld_stripes	= res->u.stripe.num_stripes;
++	bld->u.stripe.bld_chunk_size	= res->u.stripe.stripe_size * 512LL;
++	dprintk("%s: stripes %d, chunk_size %Lu\n", __func__,
++	    bld->u.stripe.bld_stripes, bld->u.stripe.bld_chunk_size / 512LL);
++	
++	bld->u.stripe.bld_stripe_indexs = kmalloc(bld->u.stripe.bld_stripes *
++						  sizeof (int), GFP_KERNEL);
++	if (!bld->u.stripe.bld_stripe_indexs)
++		goto out;
++
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++) {
++		dev_t			dev;
++		pnfs_blocklayout_devinfo_t	*bldp;
++		
++		dev = MKDEV(res->u.stripe.devs[i].major,
++			    res->u.stripe.devs[i].minor);
++		if (dev == 0)
++			goto out;
++		
++		bldp = bld_simple(&volumes, dev, location++);
++		if (!bldp) {
++			dprintk("%s: bld_simple failed\n", __func__);
++			goto out;
++		}
++		bldp = bld_slice(&volumes, dev, location++, bldp->bld_index_loc);
++
++		if (!bldp) {
++			dprintk("%s: bld_slice failed\n", __func__);
++			goto out;
++		}
++		bld->u.stripe.bld_stripe_indexs[i] = bldp->bld_index_loc;
++
++	}
++	list_add_tail(&bld->bld_list, &volumes);
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++	
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SLICE:
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				// No memory to release for these
++				break;
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				kfree(bld->u.simple.bld_sig);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				kfree(bld->u.stripe.bld_stripe_indexs);
++				break;
++		}
++		bld_free(bld);
++	}
++	kfree(res);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++/*
++ * bl_getdeviceinfo -- determine device tree for requested devid
++ */
++int
++bl_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		 u32 layout_type,
++		 const struct nfsd4_pnfs_deviceid *devid)
++{
++	if (device_slice(devid->devid) == True)
++		return bl_getdeviceinfo_slice(sb, xdr, devid);
++	else if (device_dm(devid->devid) == True)
++		return bl_getdeviceinfo_dm(sb, xdr, devid);
++	return -EINVAL;
++}
++
++enum nfsstat4
++bl_layoutget(struct inode *i, struct exp_xdr_stream *xdr,
++	     const struct nfsd4_pnfs_layoutget_arg *arg,
++	     struct nfsd4_pnfs_layoutget_res *res)
++{
++	pnfs_blocklayout_layout_t	*b;
++	bl_layout_rec_t			*r;
++	struct list_head		bl_possible,
++					*bl_candidates	= NULL;
++	boolean_t			del_on_error	= False;
++	int				adj;
++	enum nfsstat4			nfserr		= NFS4_OK;
++	
++	dprintk("--> %s (inode=[0x%x:%lu], offset=%Lu, len=%Lu, iomode=%d)\n",
++	    __func__, i->i_sb->s_dev, i->i_ino, _2SECTS(res->lg_seg.offset),
++	    _2SECTS(res->lg_seg.length), res->lg_seg.iomode);
++
++	if (res->lg_seg.length == 0) {
++		printk("%s: request length of 0, error condition\n", __func__);
++		return NFS4ERR_BADLAYOUT;
++	}
++	
++	/*
++	 * Adjust the length as required per spec.
++	 * - First case is were the length is set to (u64)-1. Cheap means to
++	 *   define the end of the file.
++	 * - Second case is were the I/O mode is read-only, but the request is
++	 *   past the end of the file so the request needs to be trimed.
++	 */
++	if ((res->lg_seg.length == NFS4_MAX_UINT64) ||
++	    (((res->lg_seg.offset + res->lg_seg.length) > i->i_size) &&
++	     (res->lg_seg.iomode == IOMODE_READ)))
++		res->lg_seg.length = i->i_size - res->lg_seg.offset;
++	
++	adj = (res->lg_seg.offset & 511) ? res->lg_seg.offset & 511 : 0;
++	res->lg_seg.offset -= adj;
++	res->lg_seg.length = (res->lg_seg.length + adj + 511) & ~511;
++	
++	if (res->lg_seg.iomode != IOMODE_READ)
++		if (i->i_op->fallocate(i, FALLOC_FL_KEEP_SIZE,
++				       res->lg_seg.offset, res->lg_seg.length))
++			return NFS4ERR_IO;
++		
++	INIT_LIST_HEAD(&bl_possible);
++	
++	if ((r = layout_inode_find(i)) == NULL) {
++		if (layout_inode_add(i, &r) == False) {
++			printk("%s: layout_inode_add failed\n", __func__);
++			return NFS4ERR_IO;
++		}
++		del_on_error = True;
++	}
++	BUG_ON(!r);
++	
++	spin_lock(&r->blr_lock);
++	
++	if (layout_cache_fill_from(r, &bl_possible, &res->lg_seg)) {
++		/*
++		 * This will send LAYOUTTRYAGAIN error to the client.
++		 */
++		dprintk("%s: layout_cache_fill_from() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	res->lg_return_on_close	= 1;
++	res->lg_seg.length	= 0;
++	
++	bl_candidates = layout_cache_iter(r, &bl_possible, &res->lg_seg);
++	if (!bl_candidates) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	layout_cache_merge(r, bl_candidates);
++	if (layout_cache_update(r, bl_candidates)) {
++		/* ---- Failed to allocate memory. ---- */
++		dprintk("%s: layout_cache_update() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	nfserr = blocklayout_encode_layout(xdr, bl_candidates);
++	if (nfserr)
++		dprintk("%s: layoutget xdr routine failed\n", __func__);
++	
++layoutget_cleanup:
++	if (bl_candidates) {
++		while (!list_empty(bl_candidates)) {
++			b = list_entry(bl_candidates->next,
++			    struct pnfs_blocklayout_layout, bll_list);
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++	}
++
++	spin_unlock(&r->blr_lock);
++	if (unlikely(nfserr)) {
++		if (del_on_error == True)
++			layout_inode_del(i);
++		res->lg_seg.length = 0;
++		res->lg_seg.offset = 0;
++	}
++	
++	dprintk("<-- %s (rval %u)\n", __func__, nfserr);
++	return nfserr;
++}
++
++/*
++ * bl_layoutcommit -- commit changes, especially size, to file systemj
++ *
++ * Currently this routine isn't called and everything is handled within
++ * nfsd4_layoutcommit(). By not calling this routine the server doesn't
++ * handle a partial return, a set of extents, of the layout. The extents
++ * are decoded here, but nothing is done with them. If this routine is
++ * be called the interface must change to pass the 'dentry' pointer such
++ * that notify_change() can be called.
++ */
++int
++bl_layoutcommit(struct inode *i,
++		const struct nfsd4_pnfs_layoutcommit_arg *args,
++		struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	bl_layout_rec_t			*r;
++	int				status	= 0;
++	u64				lw_plus;
++	
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	r = layout_inode_find(i);
++	if (r) {
++		lw_plus = args->lc_last_wr + 1;
++		if (args->lc_newoffset) {
++			dprintk("  lc_last_wr %Lu\n", lw_plus);
++			if (r->blr_orig_size < lw_plus) {
++				r->blr_orig_size	= lw_plus;
++				res->lc_size_chg	= 1;
++				res->lc_newsize		= lw_plus;
++			}
++		}
++
++		if (args->lc_up_len) {
++			int	extents,
++				i;
++			struct pnfs_blocklayout_layout *b;
++			__be32 *p = args->lc_up_layout;
++			
++			/*
++			 * Client is returning a set of extents which
++			 * should/could be used to update the file system.
++			 * See section 2.3.2 in draft-ietf-nfsv4-pnfs-block-08
++			 */
++			READ32(extents);
++			dprintk("  Client returning %d extents: data size %d\n",
++			    extents, args->lc_up_len);
++			b = kmalloc(sizeof (struct pnfs_blocklayout_layout) *
++				    extents, GFP_KERNEL);
++			if (b) {
++				for (i = 0; i < extents; i++) {
++					READ64(b[i].bll_vol_id.sbid);
++					READ64(b[i].bll_vol_id.devid);
++					READ64(b[i].bll_foff);
++					READ64(b[i].bll_len);
++					READ64(b[i].bll_soff);
++					READ32(b[i].bll_es);
++					dprintk("  %d: foff %Lu, len %Lu, soff %Lu "
++					    "state %s\n",
++					    i, _2SECTS(b[i].bll_foff),
++					    _2SECTS(b[i].bll_len),
++					    _2SECTS(b[i].bll_soff),
++					    map_state2name(b[i].bll_es));
++				}
++				kfree(b);
++			} else {
++				status = -ENOMEM;
++			}
++		}
++	} else
++		dprintk("%s: Unexpected commit to inode %p\n", __func__, i);
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutreturn(struct inode *i,
++		const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	int				status	= 0;
++	bl_layout_rec_t			*r;
++
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	
++	r = layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		layout_cache_del(r, &args->lr_seg);
++		spin_unlock(&r->blr_lock);
++		dprintk("    ext_size %Lu, i_size %Lu, orig_size %Lu\n",
++		    r->blr_ext_size, i->i_size, r->blr_orig_size);
++	}
++
++	layout_inode_del(i);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block		*sb;
++	struct nfsd4_pnfs_cb_layout	lr;
++	bl_layout_rec_t			*r;
++	pnfs_blocklayout_layout_t	*b;
++	u64				adj;
++	
++	dprintk("--> %s\n", __func__);
++	BUG_ON(!len);
++	switch (type) {
++		case RETURN_FILE:
++			sb = inode->i_sb;
++			dprintk("  recalling layout [0x%x:%lu], %Lu:%Lu\n",
++			    inode->i_sb->s_dev, inode->i_ino,
++				_2SECTS(offset), _2SECTS(len));
++			break;
++		case RETURN_FSID:
++			sb = inode->i_sb;
++			dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++				__func__);
++			return 0;
++		case RETURN_ALL:
++			/*
++			 * XXX figure out how to get a sb since there's no
++			 * inode ptr
++			 */
++			dprintk("%s: recalling all layouts (unimplemented)\n",
++				__func__);
++			return 0;
++		default:
++			return -EINVAL;
++	}
++	
++restart:
++	r = layout_inode_find(inode);
++	if (r && len && !r->blr_recalled) {
++		spin_lock(&r->blr_lock);
++		list_for_each_entry(b, &r->blr_layouts, bll_list) {
++			if (!r->blr_recalled && !b->bll_recalled &&
++			    (offset >= b->bll_foff) && (offset < BLL_F_END(b))) {
++				b->bll_recalled		= 1;
++				lr.cbl_recall_type	= type;
++				lr.cbl_seg.layout_type	= LAYOUT_BLOCK_VOLUME;
++				lr.cbl_seg.clientid	= 0;
++				lr.cbl_seg.offset	= 0;
++				lr.cbl_seg.length	= NFS4_MAX_UINT64;
++				r->blr_recalled		= 1;
++				dprintk("  FULL LAYOUTRECALL\n");
++				lr.cbl_seg.iomode = IOMODE_ANY;
++
++				/*
++				 * Currently there are only two cases where the
++				 * layout is being returned.
++				 *    (1) Someone is issuing a NFS_WRITE operation
++				 *        to this layout.
++				 *    (2) The file has been truncated which means
++				 *        the layout is immediately made invalid.
++				 * In both cases the client must write any
++				 * uncommitted modifications to the server via
++				 * NFS_WRITE.
++				 */
++				lr.cbl_layoutchanged = 1;
++
++				/*
++				 * Need to drop the lock because we'll get a
++				 * layoutreturn which will block waiting for
++				 * the lock. The request will come in on the
++				 * same thread which will cause a deadlock.
++				 */
++				spin_unlock(&r->blr_lock);
++				nfsd_layout_recall_cb(sb, inode, &lr);
++				adj = MIN(b->bll_len - (offset - b->bll_foff),
++				    len);
++				offset += adj;
++				len -= adj;
++				if (!len) {
++					spin_lock(&r->blr_lock);
++					break;
++				}
++				/*
++				 * Since layoutreturn will have been called we
++				 * can't assume blr_layouts is still valid,
++				 * so restart.
++				 */
++				goto restart;
++			}
++		}
++		spin_unlock(&r->blr_lock);
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++/*
++ * []------------------------------------------------------------------[]
++ * | Support functions from here on down.				|
++ * []------------------------------------------------------------------[]
++ */
++
++/*
++ * bld_simple -- given a dev_t build a simple volume structure
++ *
++ * Simple volume contains the device signature and offset to that data in
++ * the storage volume.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_simple(struct list_head *volumes, dev_t devid, int local_index)
++{
++	pnfs_blocklayout_devinfo_t	*bld	= NULL;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res	= NULL;
++	
++	msg.msg_type = PNFS_UPCALL_MSG_GETSIG;
++	msg.u.msg_dev = devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: Failed to get signature information\n", __func__);
++		goto error;
++	}
++	
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SIMPLE);
++	if (!bld)
++		return NULL;
++	
++	bld->u.simple.bld_offset = (res->u.sig.sector * 512LL) + res->u.sig.offset;
++	bld->u.simple.bld_sig_len = res->u.sig.len;
++	bld->u.simple.bld_sig = kmalloc(res->u.sig.len, GFP_KERNEL);
++	if (!bld->u.simple.bld_sig)
++		goto error;
++	
++	memcpy(bld->u.simple.bld_sig, res->u.sig.sig, res->u.sig.len);
++	kfree(res);
++	return bld;
++	
++error:
++	if (bld)
++		bld_free(bld);
++	if (res)
++		kfree(res);
++	dprintk("%s: error in bld_simple\n", __func__);
++	return NULL;
++}
++
++/*
++ * bld_slice -- given a dev_t build a slice volume structure
++ *
++ * A slice volume contains the length of the slice/partition and its offset
++ * from the beginning of the storage volume. There's also a reference to
++ * the "simple" volume which contains this slice.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_slice(struct list_head *volumes, dev_t devid, int my_loc, int simple_loc)
++{
++	pnfs_blocklayout_devinfo_t	*bld;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SLICE);
++	if (!bld)
++		return NULL;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_GETSLICE;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Upcall to get slice info failed\n");
++		bld_free(bld);
++		return NULL;
++	}
++	
++	bld->bld_devid.devid = devid;
++	bld->bld_index_loc	= my_loc;
++	bld->u.slice.bld_start	= res->u.slice.start * 512LL;
++	bld->u.slice.bld_len	= res->u.slice.length * 512LL;
++	bld->u.slice.bld_index	= simple_loc;
++
++	dprintk("%s: start %Lu, len %Lu\n", __func__,
++		bld->u.slice.bld_start / 512LL, bld->u.slice.bld_len / 512LL);
++
++	kfree(res);
++	dprintk("<-- %s (rval %p)\n", __func__, bld);
++	return bld;
++}
++
++static int
++layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	dprintk("--> %s\n", __func__);
++	
++	if (!list_empty(&r->blr_layouts))
++		if (layout_cache_fill_from_list(r, h, seg) == False)
++			return -EIO;
++	
++	/*
++	 * This deals with two conditions.
++	 *    (1) When blr_layouts is empty we need to create the first entry
++	 *    (2) When the range requested falls past the end of any current
++	 *        layout the residual must be taken care of.
++	 */	
++	if (seg->length) {
++		n = bll_alloc(seg->offset, seg->length, BLOCK_LAYOUT_NEW, h);
++		if (!n)
++			return -ENOMEM;
++		dprintk("  remaining at %Lu, len %Lu\n", _2SECTS(n->bll_foff),
++			_2SECTS(n->bll_len));
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++struct list_head *
++layout_cache_iter(bl_layout_rec_t *r, struct list_head *bl_possible,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n		= NULL;
++	struct list_head		*bl_candidates	= NULL;
++	struct fiemap_extent_info	fei;
++	struct inode			*i;
++	dev_t				dev;
++	
++	dev	= r->blr_rdev;
++	i	= r->blr_inode;
++	
++	dprintk("--> %s\n", __func__);
++	bl_candidates = kmalloc(sizeof (*bl_candidates), GFP_KERNEL);
++	if (!bl_candidates)
++		return NULL;
++	INIT_LIST_HEAD(bl_candidates);
++	extents_setup(&fei);
++	
++	list_for_each_entry(b, bl_possible, bll_list) {
++		if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			extents_count(&fei, i, b->bll_foff, b->bll_len);
++			if (fei.fi_extents_mapped) {
++				
++				/*
++				 * Common case here. Got a range which has
++				 * extents. Now get those extents and process
++				 * them into pNFS extents.
++				 */
++				if (extents_get(&fei, i, b->bll_foff,
++				    b->bll_len) == False)
++					goto cleanup;
++				if (extents_process(&fei, bl_candidates,
++				    seg, dev, b) == False)
++					goto cleanup;
++				extents_cleanup(&fei);
++				
++			} else if (seg->iomode == IOMODE_READ) {
++				
++				/*
++				 * Found a hole in a file while reading. No 
++				 * problem, just create a pNFS extent for the
++				 * range and let the client know there's no
++				 * backing store.
++				 */
++				n = bll_alloc(b->bll_foff, b->bll_len,
++				    BLOCK_LAYOUT_NEW, bl_candidates);
++				n->bll_es = PNFS_BLOCK_NONE_DATA;
++				n->bll_vol_id.sbid = 0;
++				n->bll_vol_id.devid = dev;
++				seg->length += b->bll_len;
++			} else {
++				
++				/*
++				 * There's a problem here. Since the iomode
++				 * is read/write fallocate should have allocated
++				 * any necessary storage for the given range.
++				 */
++				dprintk("    Extent count for RW is 0\n");
++				goto cleanup;
++			}
++			
++		} else {
++			n = bll_alloc_dup(b, b->bll_cache_state, bl_candidates);
++			seg->length += n->bll_len;
++		}
++
++		if (r->blr_ext_size < (b->bll_foff + b->bll_len))
++			r->blr_ext_size = b->bll_foff + b->bll_len;
++	}
++	
++	while (!list_empty(bl_possible)) {
++		b = list_entry(bl_possible->next,
++		    struct pnfs_blocklayout_layout, bll_list);
++		list_del(&b->bll_list);
++		kfree(b);
++	}
++		
++	b = list_first_entry(bl_candidates, struct pnfs_blocklayout_layout,
++	    bll_list);
++	seg->offset = b->bll_foff;
++	dprintk("<-- %s okay\n", __func__);
++	return bl_candidates;
++	
++cleanup:
++	extents_cleanup(&fei);
++	if (bl_candidates)
++		kfree(bl_candidates);
++	dprintk("<-- %s, error occurred\n", __func__);
++	return NULL;
++}
++
++/*
++ * layout_cache_merge -- collapse layouts which make up a contiguous range.
++ */
++static void
++layout_cache_merge(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*p;
++	
++	dprintk("--> %s\n", __func__);
++restart:
++	p = NULL;
++	list_for_each_entry(b, h, bll_list) {
++		if (p && (BLL_S_END(p) == b->bll_soff) &&
++		    (p->bll_es == b->bll_es) &&
++		    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++			/*
++			 * We've got a condidate.
++			 */
++#ifdef too_verbose
++			dprintk("  merge %Lu(f):%Lu(l):%Lu(s) into %Lu(f):%Lu(l):%Lu(s)\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff),
++				_2SECTS(p->bll_foff), _2SECTS(p->bll_len),
++				_2SECTS(b->bll_soff));
++#endif
++			
++			if (p->bll_cache_state == BLOCK_LAYOUT_CACHE)
++				p->bll_cache_state = BLOCK_LAYOUT_UPDATE;
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else if (p && (BLL_F_END(p) == b->bll_foff) &&
++			   (p->bll_es == b->bll_es) &&
++			   (b->bll_es == PNFS_BLOCK_NONE_DATA)) {
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else
++			p = b;
++	}
++	dprintk("<-- %s\n", __func__);
++}
++
++static int
++layout_cache_update(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*c,
++					*n;
++	boolean_t			status = 0;
++	
++	dprintk("--> %s\n", __func__);
++	if (list_empty(&r->blr_layouts)) {
++		/* ---- Just add entries and return ---- */
++		dprintk("  cache empty for inode 0x%x:%ld\n", r->blr_rdev,
++			r->blr_inode->i_ino);
++		list_for_each_entry(b, h, bll_list) {
++			c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE,
++					  &r->blr_layouts);
++			if (!c) {
++				status = -ENOMEM;
++				break;
++			}
++			dprintk("    adding %Lu(f):%Lu(l):%Lu(s):%d\n",
++				_2SECTS(c->bll_foff), _2SECTS(c->bll_len),
++				_2SECTS(c->bll_soff), c->bll_es);
++		}
++		return status;
++	}
++	
++	list_for_each_entry(b, h, bll_list) {
++		BUG_ON(!b->bll_vol_id.devid);
++		if (b->bll_cache_state == BLOCK_LAYOUT_UPDATE) {
++			boolean_t found = False;
++			list_for_each_entry(c, &r->blr_layouts, bll_list) {
++				if ((b->bll_soff >= c->bll_soff) &&
++				    (b->bll_soff < BLL_S_END(c)) &&
++				    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++					u64	u;
++					
++					if ((b->bll_foff < c->bll_foff) ||
++					    (b->bll_foff > BLL_F_END(c)))
++						BUG();
++					
++					u = BLL_S_END(b) - BLL_S_END(c);
++					/*
++					 * The updated cache entry has to be
++					 * different than the current.
++					 * Otherwise the cache state for 'b'
++					 * should be BLOCK_LAYOUT_CACHE.
++					 */
++					BUG_ON(BLL_S_END(b) < BLL_S_END(c));
++					
++					dprintk("  "
++						"updating %Lu(f):%Lu(l):%Lu(s) to len %Lu\n",
++						_2SECTS(c->bll_foff),
++						_2SECTS(c->bll_len),
++						_2SECTS(c->bll_soff),
++						_2SECTS(c->bll_len + u));
++					c->bll_len += u;
++					bll_collapse(r, c);
++					found = True;
++					break;
++				}
++			}
++
++			if (found == False) {
++				dprintk("  ERROR Expected to find"
++				    " %Lu(f):%Lu(l):%Lu(s), but didn't\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff));
++				list_for_each_entry(c, &r->blr_layouts, bll_list)
++					print_bll(c, "Cached");
++				BUG();
++			}
++		} else if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			c = list_first_entry(&r->blr_layouts,
++			    struct pnfs_blocklayout_layout, bll_list);
++			if (b->bll_foff < c->bll_foff) {
++				/*
++				 * Special case where new entry is before
++				 * first cached entry.
++				 */
++				c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE, NULL);
++				list_add(&c->bll_list, &r->blr_layouts);
++				dprintk("  new entry at head of list at %Lu, "
++					"len %Lu\n",
++					_2SECTS(c->bll_foff), _2SECTS(c->bll_len));
++			} else {
++				list_for_each_entry(c, &r->blr_layouts,
++				    bll_list) {
++					n = list_entry(c->bll_list.next,
++					    struct pnfs_blocklayout_layout,
++					    bll_list);
++					/*
++					 * This is ugly, but can't think of
++					 * another way to examine this case.
++					 * Consider the following. Need to
++					 * add an entry which starts at 40
++					 * and the cache has the following
++					 * entries:
++					 * Start    Length
++					 * 10       5
++					 * 30       5
++					 * 50       5
++					 * So, need to look and see if the new
++					 * entry starts after the current
++					 * cache, but before the next one.
++					 * There's a catch in that the next
++					 * entry might not be valid as it's
++					 * really just a pointer to the list
++					 * head.
++					 */
++					if (((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (c->bll_list.next == &r->blr_layouts)) ||
++					    ((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (b->bll_foff < n->bll_foff))) {
++						
++						n = bll_alloc_dup(b,
++								  BLOCK_LAYOUT_CACHE, NULL);
++						dprintk("  adding new %Lu:%Lu"
++							" after %Lu:%Lu\n",
++							_2SECTS(n->bll_foff),
++							_2SECTS(n->bll_len),
++							_2SECTS(c->bll_foff),
++							_2SECTS(c->bll_len));
++						list_add(&n->bll_list,
++							 &c->bll_list);
++						break;
++					}
++				}
++			}
++		}
++	}
++	dprintk("<-- %s\n", __func__);
++	return status;
++}
++
++static void
++layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg_in)
++{
++	struct pnfs_blocklayout_layout	*b,
++					*n;
++	u64				len;
++	struct nfsd4_layout_seg		seg = *seg_in;
++	
++	dprintk("--> %s\n", __func__);
++	if (seg.length == NFS4_MAX_UINT64) {
++		r->blr_recalled = 0;
++		dprintk("  Fast return of all layouts\n");
++		while (!list_empty(&r->blr_layouts)) {
++			b = list_entry(r->blr_layouts.next,
++				       struct pnfs_blocklayout_layout, bll_list);
++			dprintk("    foff %Lu, len %Lu, soff %Lu\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff));
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++		dprintk("<-- %s\n", __func__);
++		return;
++	}
++
++restart:
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg.offset == b->bll_foff) {
++			/*
++			 * This handle the following three cases:
++			 * (1) return layout matches entire cache layout
++			 * (2) return layout matches beginning portion of cache
++			 * (3) return layout matches entire cache layout and
++			 *     into next entry. Varies from #1 in end case.
++			 */
++			dprintk("  match on offsets, %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length));
++			len = MIN(seg.length, b->bll_len);
++			b->bll_foff	+= len;
++			b->bll_soff	+= len;
++			b->bll_len	-= len;
++			seg.length	-= len;
++			seg.offset	+= len;
++			if (!b->bll_len) {
++				list_del(&b->bll_list);
++				kfree(b);
++				dprintk("    removing cache line\n");
++				if (!seg.length) {
++					dprintk("    also finished\n");
++					goto complete;
++				}
++				/*
++				 * Since 'b' was freed we can't continue at the
++				 * next entry which is referenced as
++				 * b->bll_list.next by the list_for_each_entry
++				 * macro. Need to restart the loop.
++				 * TODO: Think about creating a dummy 'b' which
++				 *       would keep list_for_each_entry() happy.
++				 */
++				goto restart;
++			}
++			if (!seg.length) {
++				dprintk("    finished, but cache line not"
++					"empty\n");
++				goto complete;
++			}
++		} else if ((seg.offset >= b->bll_foff) &&
++		    (seg.offset < BLL_F_END(b))) {
++			/*
++			 * layout being returned is within this cache line.
++			 */
++			dprintk("  layout %Lu:%Lu within cache line %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length),
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			BUG_ON(!seg.length);
++			if ((seg.offset + seg.length) >= BLL_F_END(b)) {
++				/*
++				 * Layout returned starts in the middle of
++				 * cache entry and just need to trim back
++				 * cache to shorter length.
++				 */
++				dprintk("    trim back cache line\n");
++				len = seg.offset - b->bll_foff;
++				seg.offset += b->bll_len - len;
++				seg.length -= b->bll_len - len;
++				b->bll_len = len;
++				if (!seg.length)
++					return;
++			} else {
++				/*
++				 * Need to split current cache layout because
++				 * chunk is being removed from the middle.
++				 */
++				dprintk("    split cache line\n");
++				len = seg.offset + seg.length;
++				n = bll_alloc(len,
++					      (b->bll_foff + b->bll_len) - len,
++					      BLOCK_LAYOUT_CACHE, NULL);
++				n->bll_soff = b->bll_soff + len;
++				list_add(&n->bll_list, &b->bll_list);
++				b->bll_len = seg.offset - b->bll_foff;
++				return;
++			}
++		}
++	}
++complete:
++	if (list_empty(&r->blr_layouts))
++		r->blr_recalled = 0;
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++ * layout_cache_fill_from_list -- fills from cache list
++ *
++ * NOTE: This routine was only seperated out from layout_cache_file_from()
++ * to reduce the indentation level which makes the code easier to read.
++ */
++static inline boolean_t
++layout_cache_fill_from_list(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n;
++	enum pnfs_block_extent_state4	s;
++	
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg->offset < b->bll_foff) {
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, b->bll_foff - seg->offset),
++			    BLOCK_LAYOUT_NEW, NULL);
++			if (!n)
++				return False;
++			
++			list_add(&n->bll_list, h->prev);
++			dprintk("  new: %Lu:%Lu, added before %Lu:%Lu\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++		
++		if ((seg->offset >= b->bll_foff) &&
++		    (seg->offset < BLL_F_END(b))) {
++			if (layout_conflict(b, seg->iomode, &s) == False) {
++				dprintk("  CONFLICT FOUND: "
++				    "%Lu(f):%Lu(l):%Lu(s) state %d, iomode %d\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff), b->bll_es,
++				    seg->iomode);
++				return False;
++			}
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, BLL_F_END(b) - seg->offset),
++			    BLOCK_LAYOUT_CACHE, h);
++			dprintk("  CACHE hit: Found %Lu(f):%Lu(l): "
++			    "in %Lu(f):%Lu(l):%Lu(s):%d\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++			    _2SECTS(b->bll_soff), b->bll_es);
++			if (!n)
++				return False;
++			
++			n->bll_soff = b->bll_soff + seg->offset - b->bll_foff;
++			n->bll_vol_id.sbid = 0;
++			n->bll_vol_id.devid = b->bll_vol_id.devid;
++			n->bll_es = s;
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++	}
++	return True;
++}
++
++static u64
++bll_alloc_holey(struct list_head *bl_candidates, u64 offset, u64 length,
++    dev_t dev)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	n = bll_alloc(offset, length, BLOCK_LAYOUT_NEW, bl_candidates);
++	if (!n)
++		return 0;
++	n->bll_es = PNFS_BLOCK_NONE_DATA;
++	n->bll_vol_id.sbid = 0;
++	n->bll_vol_id.devid = dev;
++	
++	return n->bll_len;
++}
++
++static void
++extents_setup(struct fiemap_extent_info *fei)
++{
++	fei->fi_extents_start	= NULL;
++}
++
++/*
++ * extents_count -- Determine the number of extents for a given range.
++ *
++ * No need to call set_fs() here because the function
++ * doesn't use copy_to_user() if it's only counting
++ * the number of extents needed.
++ */
++static void
++extents_count(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	dprintk("    Need fiemap of %Ld:%Ld\n", _2SECTS(foff), _2SECTS(len));
++	fei->fi_flags		= FIEMAP_FLAG_SYNC;
++	fei->fi_extents_max	= 0;
++	fei->fi_extents_start	= NULL;
++	fei->fi_extents_mapped	= 0;
++	i->i_op->fiemap(i, fei, foff, len + (1 << i->i_sb->s_blocksize_bits) - 1);
++}
++
++/*
++ * extents_get -- Get list of extents for range
++ *
++ * extents_count() must have been called before this routine such that
++ * fi_extents_mapped is known.
++ */
++static boolean_t
++extents_get(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	int			m_space,
++				rval;
++	struct fiemap_extent	*fe;
++	mm_segment_t		old_fs = get_fs();
++	
++	/*
++	 * Now malloc the correct amount of space
++	 * needed. It's possible for the file to have changed
++	 * between calls which would require more space for
++	 * the extents. If that occurs the last extent will
++	 * not have FIEMAP_EXTENT_LAST set and the error will
++	 * be caught in extents_process().
++	 */
++	m_space = fei->fi_extents_mapped * sizeof (struct fiemap_extent);
++	fe = kmalloc(m_space, GFP_KERNEL);
++	if (!fe)
++		return False;
++	memset(fe, 0, m_space);
++	
++	fei->fi_extents_max	= fei->fi_extents_mapped;
++	fei->fi_extents_mapped	= 0;
++	fei->fi_extents_start	= fe;
++	
++	set_fs(KERNEL_DS);
++	rval = i->i_op->fiemap(i, fei, foff, len +
++	    (1 << i->i_sb->s_blocksize_bits) - 1);
++	set_fs(old_fs);
++	
++	if (rval || !fei->fi_extents_mapped) {
++		dprintk("    No extents. Wanted %d, got %d\n",
++			fei->fi_extents_max, fei->fi_extents_mapped);
++		kfree(fe);
++		fei->fi_extents_start = NULL;
++		return False;
++	} else
++		return True;
++}
++
++/*
++ * extents_process -- runs through the extent returned from the file system and
++ *	 creates block layout entries.
++ */
++static boolean_t
++extents_process(struct fiemap_extent_info *fei, struct list_head *bl_candidates,
++    struct nfsd4_layout_seg *seg, dev_t dev, pnfs_blocklayout_layout_t *b)
++{
++	struct fiemap_extent		*fep,
++					*fep_last	= NULL;
++	int				i;
++	pnfs_blocklayout_layout_t	*n;
++	u64				last_end,
++					rval;
++	
++	dprintk("--> %s\n", __func__);
++	for (fep = fei->fi_extents_start, i = 0; i < fei->fi_extents_mapped;
++	    i++, fep++) {
++		
++		BUG_ON(!fep->fe_physical);
++		/*
++		 * Deal with corner cases of hoel-y files.
++		 */
++		if (fep_last && ((fep_last->fe_logical + fep_last->fe_length) !=
++				 fep->fe_logical)) {
++			
++			/*
++			 * If the last extent doesn't end logically
++			 * at the beginning of the current we've got
++			 * hole and need to create a pNFS extent.
++			 */
++			dprintk("    Got a hole at %Ld:%Ld \n", 
++			    _2SECTS(fep_last->fe_logical),
++			    _2SECTS(fep_last->fe_length));
++			last_end = fep_last->fe_logical + fep_last->fe_length;
++			rval = bll_alloc_holey(bl_candidates, last_end,
++			    fep->fe_logical - last_end, dev);
++			if (!rval)
++				return False;
++			seg->length += rval;
++		}
++		
++		n = bll_alloc(fep->fe_logical, fep->fe_length,
++		    BLOCK_LAYOUT_NEW, bl_candidates);
++		if (unlikely(n == NULL)) {
++			dprintk("%s: bll_alloc failed\n", __func__);
++			return False;
++		}
++		
++		n->bll_soff = fep->fe_physical;
++		n->bll_es = seg->iomode == IOMODE_READ ?
++		    PNFS_BLOCK_READ_DATA : PNFS_BLOCK_READWRITE_DATA;
++		n->bll_vol_id.sbid = 0;
++		n->bll_vol_id.devid = dev;
++		seg->length += fep->fe_length;
++		print_bll(n, "New extent");
++		fep_last = fep;
++	}
++	dprintk("<-- %s (i=%d)\n", __func__, i);
++	
++	return True;
++}
++
++static void
++extents_cleanup(struct fiemap_extent_info *fei)
++{
++	if (fei->fi_extents_start) {
++		kfree(fei->fi_extents_start);
++		fei->fi_extents_start = NULL;
++	}
++}
++
++/*
++ * device_slice -- check to see if device is a slice or DM
++ */
++static boolean_t
++device_slice(dev_t devid)
++{
++	struct block_device	*bd	= open_by_devnum(devid, FMODE_READ);
++	boolean_t		rval	= False;
++	
++	if (bd) {
++		if (bd->bd_disk->minors > 1)
++			rval = True;
++		blkdev_put(bd, FMODE_READ);
++	}
++	return rval;
++}
++
++/*
++ * device_dm -- check to see if device is a Device Mapper volume.
++ *
++ * Returns 1 for DM or 0 if not
++ */
++static boolean_t
++device_dm(dev_t devid)
++{
++	boolean_t		rval = False;
++	bl_comm_msg_t		msg;
++	bl_comm_res_t		*res;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_DMCHK;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Failed upcall to check on DM status\n");
++	} else if (res->u.dm_vol) {
++		rval = True;
++		dprintk("Device is DM volume\n");
++	} else
++		dprintk("Device is not DM volume\n");
++	kfree(res);
++	
++	return rval;
++}
++
++static boolean_t
++layout_inode_add(struct inode *i, bl_layout_rec_t **p)
++{
++	bl_layout_rec_t		*r	= NULL;
++
++	if (!i->i_op->fiemap || !i->i_op->fallocate) {
++		printk("pNFS: file system doesn't support required fiemap or"
++		    "fallocate methods\n");
++		return False;
++	}
++	
++	r = kmalloc(sizeof (*r), GFP_KERNEL);
++	if (!r)
++		goto error;
++
++	r->blr_rdev	= i->i_sb->s_dev;
++	r->blr_inode	= i;
++	r->blr_orig_size = i->i_size;
++	r->blr_ext_size	= 0;
++	r->blr_recalled	= 0;
++	INIT_LIST_HEAD(&r->blr_layouts);
++	spin_lock_init(&r->blr_lock);
++	spin_lock(&layout_hashtbl_lock);
++	list_add_tail(&r->blr_hash, &layout_hash);
++	spin_unlock(&layout_hashtbl_lock);
++	*p = r;
++	return True;
++	
++error:
++	if (r)
++		kfree(r);
++	return False;
++}
++
++static bl_layout_rec_t *
++__layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	if (!list_empty(&layout_hash)) {
++		list_for_each_entry(r, &layout_hash, blr_hash) {
++			if ((r->blr_inode->i_ino == i->i_ino) &&
++			    (r->blr_rdev == i->i_sb->s_dev)) {
++				return r;
++			}
++		}
++	}
++	return NULL;
++}
++
++static bl_layout_rec_t *
++layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	spin_unlock(&layout_hashtbl_lock);
++	
++	return r;
++}
++
++static void
++layout_inode_del(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		if (list_empty(&r->blr_layouts)) {
++			list_del(&r->blr_hash);
++			spin_unlock(&r->blr_lock);
++			kfree(r);
++		} else {
++			spin_unlock(&r->blr_lock);
++		}
++	} else {
++		dprintk("%s: failed to find inode [0x%x:%lu] in table for delete\n",
++			__func__, i->i_sb->s_dev, i->i_ino);
++	}
++	spin_unlock(&layout_hashtbl_lock);
++}
++
++/*
++ * map_state2name -- converts state in ascii string.
++ *
++ * Used for debug messages only.
++ */
++static char *
++map_state2name(enum pnfs_block_extent_state4 s)
++{
++	switch (s) {
++	case PNFS_BLOCK_READWRITE_DATA:	return "     RW";
++	case PNFS_BLOCK_READ_DATA:	return "     RO";
++	case PNFS_BLOCK_INVALID_DATA:	return "INVALID";
++	case PNFS_BLOCK_NONE_DATA:	return "   NONE";
++	default:
++		BUG();
++	}
++}
++
++static pnfs_blocklayout_devinfo_t *
++bld_alloc(struct list_head *volumes, int type)
++{
++	pnfs_blocklayout_devinfo_t *bld;
++	
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		return NULL;
++
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type = type;
++	list_add_tail(&bld->bld_list, volumes);
++
++	return bld;
++}
++
++static void
++bld_free(pnfs_blocklayout_devinfo_t *bld)
++{
++	list_del(&bld->bld_list);
++	kfree(bld);
++}
++
++static void
++print_bll(pnfs_blocklayout_layout_t *b, char *text)
++{
++	dprintk("    BLL: %s\n", text);
++	dprintk("    foff %Lu, soff %Lu, len %Lu, state %s\n",
++	    _2SECTS(b->bll_foff), _2SECTS(b->bll_soff), _2SECTS(b->bll_len),
++	    map_state2name(b->bll_es));
++}
++
++static inline void
++bll_collapse(bl_layout_rec_t *r, pnfs_blocklayout_layout_t *c)
++{
++	pnfs_blocklayout_layout_t	*n;
++	int				dbg_count	= 0;
++	u64				endpoint;
++	
++	BUG_ON(c->bll_es == PNFS_BLOCK_NONE_DATA);
++	while (c->bll_list.next != &r->blr_layouts) {
++		n = list_entry(c->bll_list.next,
++			       struct pnfs_blocklayout_layout, bll_list);
++		endpoint = BLL_S_END(c);
++		if ((n->bll_soff >= c->bll_soff) &&
++		    (n->bll_soff < endpoint)) {
++			if (endpoint < BLL_S_END(n)) {
++				/*
++				 * The following is possible.
++				 *
++				 * 
++				 * Existing: +---+                 +---+
++				 *      New: +-----------------------+
++				 * The client request merge entries together
++				 * but didn't require picking up all of the
++				 * last entry. So, we still need to delete
++				 * the last entry and add the remaining space
++				 * to the new entry.
++				 */
++				c->bll_len += BLL_S_END(n) - endpoint;
++			}
++			dbg_count++;
++			list_del(&n->bll_list);
++			kfree(n);
++		} else {
++			break;
++		}
++	}
++	/* ---- Debug only, remove before integration ---- */
++	if (dbg_count)
++		dprintk("  Collapsed %d cache entries between %Lu(s) and %Lu(s)\n",
++			dbg_count, _2SECTS(c->bll_soff), _2SECTS(BLL_S_END(c)));
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc(u64 offset, u64 len, enum bl_cache_state state, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = kmalloc(sizeof (*n), GFP_KERNEL);
++	if (n) {
++		memset(n, 0, sizeof (*n));
++		n->bll_foff		= offset;
++		n->bll_len		= len;
++		n->bll_cache_state	= state;
++		if (h)
++			list_add_tail(&n->bll_list, h);
++	}
++	return n;
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc_dup(pnfs_blocklayout_layout_t *b, enum bl_cache_state c,
++	      struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = bll_alloc(b->bll_foff, b->bll_len, c, h);
++	if (n) {
++		n->bll_es			= b->bll_es;
++		n->bll_soff			= b->bll_soff;
++		n->bll_vol_id.devid		= b->bll_vol_id.devid;
++	}
++	return n;
++}
++
++static inline boolean_t
++layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++		enum pnfs_block_extent_state4 *s)
++{
++	/* ---- Normal case ---- */
++	*s = b->bll_es;
++	
++	switch (b->bll_es) {
++	case PNFS_BLOCK_READWRITE_DATA:
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_READ_DATA;
++		/* ---- Any use is permitted. ---- */
++		break;
++	case PNFS_BLOCK_READ_DATA:
++		/* ---- Committed as read only data. ---- */
++		if (iomode == IOMODE_RW)
++			return False;
++		break;
++	case PNFS_BLOCK_INVALID_DATA:
++		/* ---- Blocks have been allocated, but not initialized ---- */
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_NONE_DATA;
++		break;
++	case PNFS_BLOCK_NONE_DATA:
++		/* ---- Hole-y file. No backing store avail. ---- */
++		if (iomode != IOMODE_READ)
++			return False;
++		break;
++	default:
++		BUG();
++	}
++	return True;
++}
++
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
+@@ -104,7 +104,8 @@ again:
+ 			continue;
+ 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 			continue;
+-		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
++		if (memcmp(state->stateid.u.data, stateid->u.data,
++			   sizeof(state->stateid.u.data)) != 0)
+ 			continue;
+ 		get_nfs_open_context(ctx);
+ 		spin_unlock(&inode->i_lock);
+@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct
+ 	if (delegation != NULL) {
+ 		spin_lock(&delegation->lock);
+ 		if (delegation->inode != NULL) {
+-			memcpy(delegation->stateid.data, res->delegation.data,
+-			       sizeof(delegation->stateid.data));
++			memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			       sizeof(delegation->stateid.u.data));
+ 			delegation->type = res->delegation_type;
+ 			delegation->maxsize = res->maxsize;
+ 			oldcred = delegation->cred;
+@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach
+ 	if (delegation == NULL)
+ 		goto nomatch;
+ 	spin_lock(&delegation->lock);
+-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+-				sizeof(delegation->stateid.data)) != 0)
++	if (stateid != NULL && memcmp(delegation->stateid.u.data,
++				      stateid->u.data,
++				      sizeof(delegation->stateid.u.data)) != 0)
+ 		goto nomatch_unlock;
+ 	list_del_rcu(&delegation->super_list);
+ 	delegation->inode = NULL;
+@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inod
+ 	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+-	memcpy(delegation->stateid.data, res->delegation.data,
+-			sizeof(delegation->stateid.data));
++	memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			sizeof(delegation->stateid.u.data));
+ 	delegation->type = res->delegation_type;
+ 	delegation->maxsize = res->maxsize;
+ 	delegation->change_attr = nfsi->change_attr;
+@@ -471,9 +473,7 @@ void nfs_expire_unreferenced_delegations
+ /*
+  * Asynchronous delegation recall!
+  */
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid))
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+ {
+ 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ 	struct nfs_delegation *delegation;
+@@ -481,7 +481,7 @@ int nfs_async_inode_return_delegation(st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(NFS_I(inode)->delegation);
+ 
+-	if (!validate_stateid(delegation, stateid)) {
++	if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+ 		rcu_read_unlock();
+ 		return -ENOENT;
+ 	}
+@@ -562,7 +562,8 @@ int nfs4_copy_delegation_stateid(nfs4_st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(nfsi->delegation);
+ 	if (delegation != NULL) {
+-		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
++		memcpy(dst->u.data, delegation->stateid.u.data,
++		       sizeof(dst->u.data));
+ 		ret = 1;
+ 	}
+ 	rcu_read_unlock();
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
+--- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
+@@ -34,9 +34,7 @@ enum {
+ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ int nfs_inode_return_delegation(struct inode *inode);
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid));
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+ 
+ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
+@@ -17,11 +17,19 @@
+ #include <linux/module.h>
+ #include <linux/exportfs.h>
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++#if defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
++#endif
+ #include <linux/nfsd/syscall.h>
+ #include <net/ipv6.h>
+ 
+ #include "nfsd.h"
+ #include "nfsfh.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
+ 
+@@ -352,6 +360,40 @@ static int svc_export_upcall(struct cach
+ 	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static struct pnfsd_cb_operations pnfsd_cb_op = {
++	.cb_layout_recall = nfsd_layout_recall_cb,
++	.cb_device_notify = nfsd_device_notify_cb,
++
++	.cb_get_state = nfs4_pnfs_cb_get_state,
++	.cb_change_state = nfs4_pnfs_cb_change_state,
++};
++
++#if defined(CONFIG_SPNFS)
++static struct pnfs_export_operations spnfs_export_ops = {
++	.layout_type = spnfs_layout_type,
++	.get_device_info = spnfs_getdeviceinfo,
++	.get_device_iter = spnfs_getdeviceiter,
++	.layout_get = spnfs_layoutget,
++	.layout_return = spnfs_layoutreturn,
++};
++
++static struct pnfs_export_operations spnfs_ds_export_ops = {
++	.get_state = spnfs_get_state,
++};
++
++#if defined(CONFIG_SPNFS_BLOCK)
++static struct pnfs_export_operations bl_export_ops = {
++	.layout_type = bl_layout_type,
++	.get_device_info = bl_getdeviceinfo,
++	.get_device_iter = bl_getdeviceiter,
++	.layout_get = bl_layoutget,
++	.layout_return = bl_layoutreturn,
++};
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_SPNFS */
++#endif /* CONFIG_PNFSD */
++
+ static struct svc_export *svc_export_update(struct svc_export *new,
+ 					    struct svc_export *old);
+ static struct svc_export *svc_export_lookup(struct svc_export *);
+@@ -395,6 +437,47 @@ static int check_export(struct inode *in
+ 		return -EINVAL;
+ 	}
+ 
++#if !defined(CONFIG_SPNFS)
++	if (inode->i_sb->s_pnfs_op &&
++	    (!inode->i_sb->s_pnfs_op->layout_type ||
++	     !inode->i_sb->s_pnfs_op->get_device_info ||
++	     !inode->i_sb->s_pnfs_op->layout_get)) {
++		dprintk("exp_export: export of invalid fs pnfs export ops.\n");
++		return -EINVAL;
++	}
++#endif /* CONFIG_SPNFS */
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	if (!inode->i_sb->s_pnfs_op)
++		pnfsd_lexp_init(inode);
++	return 0;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(inode, *flags)) {
++		dprintk("set pnfs block export structure... \n");
++		inode->i_sb->s_pnfs_op = &bl_export_ops;
++	} else
++#endif /* CONFIG_SPNFS_BLOCK */
++	/*
++	 * spnfs_enabled() indicates we're an MDS.
++	 * XXX Better to check an export time option as well.
++	 */
++	if (spnfs_enabled()) {
++		dprintk("set spnfs export structure...\n");
++		inode->i_sb->s_pnfs_op = &spnfs_export_ops;
++	} else {
++		dprintk("%s spnfs not in use\n", __func__);
++
++		/*
++		 * get_state is needed if we're a DS using spnfs.
++		 * XXX Better to check an export time option instead.
++		 */
++		inode->i_sb->s_pnfs_op = &spnfs_ds_export_ops;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	return 0;
+ 
+ }
+@@ -586,6 +669,8 @@ static int svc_export_parse(struct cache
+ 					if (exp.ex_uuid == NULL)
+ 						err = -ENOMEM;
+ 				}
++			} else if (strcmp(buf, "pnfs") == 0) {
++				exp.ex_pnfs = 1;
+ 			} else if (strcmp(buf, "secinfo") == 0)
+ 				err = secinfo_parse(&mesg, buf, &exp);
+ 			else
+@@ -660,6 +745,8 @@ static int svc_export_show(struct seq_fi
+ 				seq_printf(m, "%02x", exp->ex_uuid[i]);
+ 			}
+ 		}
++		if (exp->ex_pnfs)
++			seq_puts(m, ",pnfs");
+ 		show_secinfo(m, exp);
+ 	}
+ 	seq_puts(m, ")\n");
+@@ -687,6 +774,7 @@ static void svc_export_init(struct cache
+ 	new->ex_fslocs.locations = NULL;
+ 	new->ex_fslocs.locations_count = 0;
+ 	new->ex_fslocs.migrated = 0;
++	new->ex_pnfs = 0;
+ }
+ 
+ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+@@ -699,6 +787,7 @@ static void export_update(struct cache_h
+ 	new->ex_anon_uid = item->ex_anon_uid;
+ 	new->ex_anon_gid = item->ex_anon_gid;
+ 	new->ex_fsid = item->ex_fsid;
++	new->ex_pnfs = item->ex_pnfs;
+ 	new->ex_uuid = item->ex_uuid;
+ 	item->ex_uuid = NULL;
+ 	new->ex_pathname = item->ex_pathname;
+@@ -1635,8 +1724,17 @@ nfsd_export_init(void)
+ 	if (rv)
+ 		return rv;
+ 	rv = cache_register(&svc_expkey_cache);
+-	if (rv)
++	if (rv) {
+ 		cache_unregister(&svc_export_cache);
++		goto out;
++	}
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = THIS_MODULE;
++	pnfsd_cb_ctl.cb_op = &pnfsd_cb_op;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
++out:
+ 	return rv;
+ 
+ }
+@@ -1664,6 +1762,12 @@ nfsd_export_shutdown(void)
+ 
+ 	exp_writelock();
+ 
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = NULL;
++	pnfsd_cb_ctl.cb_op = NULL;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
+ 	cache_unregister(&svc_expkey_cache);
+ 	cache_unregister(&svc_export_cache);
+ 	svcauth_unix_purge();
+diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
+--- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
+@@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
+ 	.rpc_release = nfs_direct_read_release,
+ };
+ 
++static long nfs_direct_read_execute(struct nfs_read_data *data,
++				    struct rpc_task_setup *task_setup_data,
++				    struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	nfs_fattr_init(&data->fattr);
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	NFS_PROTO(inode)->read_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct read call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
+  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
+@@ -283,7 +315,6 @@ static ssize_t nfs_direct_read_schedule_
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+ 	size_t rsize = NFS_SERVER(inode)->rsize;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -343,26 +374,9 @@ static ssize_t nfs_direct_read_schedule_
+ 		data->res.fattr = &data->fattr;
+ 		data->res.eof = 0;
+ 		data->res.count = bytes;
+-		nfs_fattr_init(&data->fattr);
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct read call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_read_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+@@ -448,12 +462,15 @@ static void nfs_direct_free_writedata(st
+ }
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg);
++
+ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+ {
+ 	struct inode *inode = dreq->inode;
+ 	struct list_head *p;
+ 	struct nfs_write_data *data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = dreq->ctx->cred,
+ 	};
+@@ -487,25 +504,7 @@ static void nfs_direct_write_reschedule(
+ 		 * Reuse data->task; data->args should not have changed
+ 		 * since the original request was sent.
+ 		 */
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		/*
+-		 * We're called via an RPC callback, so BKL is already held.
+-		 */
+-		task = rpc_run_task(&task_setup_data);
+-		if (!IS_ERR(task))
+-			rpc_put_task(task);
+-
+-		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				data->args.count,
+-				(unsigned long long)data->args.offset);
++		nfs_direct_write_execute(data, &task_setup_data, &msg);
+ 	}
+ 
+ 	if (put_dreq(dreq))
+@@ -548,10 +547,31 @@ static const struct rpc_call_ops nfs_com
+ 	.rpc_release = nfs_direct_commit_release,
+ };
+ 
++static long nfs_direct_commit_execute(struct nfs_direct_req *dreq,
++				      struct nfs_write_data *data,
++				      struct rpc_task_setup *task_setup_data,
++				      struct rpc_message *msg)
++{
++	struct rpc_task *task;
++
++	NFS_PROTO(data->inode)->commit_setup(data, msg);
++
++	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
++	dreq->commit_data = NULL;
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++	return 0;
++}
++
+ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+ {
+ 	struct nfs_write_data *data = dreq->commit_data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+@@ -579,16 +599,7 @@ static void nfs_direct_commit_schedule(s
+ 	data->res.verf = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	NFS_PROTO(data->inode)->commit_setup(data, &msg);
+-
+-	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+-	dreq->commit_data = NULL;
+-
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (!IS_ERR(task))
+-		rpc_put_task(task);
++	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
+ }
+ 
+ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+@@ -690,6 +701,36 @@ static const struct rpc_call_ops nfs_wri
+ 	.rpc_release = nfs_direct_write_release,
+ };
+ 
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++	NFS_PROTO(inode)->write_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+@@ -705,7 +746,6 @@ static ssize_t nfs_direct_write_schedule
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -771,24 +811,8 @@ static ssize_t nfs_direct_write_schedule
+ 		data->res.verf = &data->verf;
+ 		nfs_fattr_init(&data->fattr);
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct write call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_write_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
+--- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
+@@ -79,3 +79,52 @@ config NFSD_V4
+ 	  available from http://linux-nfs.org/.
+ 
+ 	  If unsure, say N.
++
++config PNFSD
++	bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
++	depends on NFSD_V4 && EXPERIMENTAL
++	select EXPORTFS_FILE_LAYOUT
++	help
++	  This option enables support for the parallel NFS features of the
++	  minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
++	  in the kernel's NFS server.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFSD_LOCAL_EXPORT
++	bool "Enable pNFS support for exporting local filesystems for debugging purposes"
++	depends on PNFSD
++	help
++	  Say Y here if you want your pNFS server to export local file systems
++	  over the files layout type.  With this option the MDS (metadata
++	  server) functions also as a single DS (data server).  This is mostly
++	  useful for development and debugging purposes.
++
++	  If unsure, say N.
++
++config SPNFS
++	bool "Provide spNFS server support (EXPERIMENTAL)"
++	depends on PNFSD
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS server support.
++
++	  If unsure, say N.
++
++config SPNFS_LAYOUTSEGMENTS
++	bool "Allow spNFS to return partial file layouts (EXPERIMENTAL)"
++	depends on SPNFS
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS to be able to return layout segments.
++
++	  If unsure, say N.
++
++config SPNFS_BLOCK
++	bool "Provide Block Layout server support (EXPERIMENTAL)"
++	depends on SPNFS
++	select EXPORTFS_BLOCK_LAYOUT
++	help
++	  Say Y here if you want spNFS block layout support
++
++	  If unsure, say N.
+diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
+--- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
+@@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ 			   nfs4acl.o nfs4callback.o nfs4recover.o
++nfsd-$(CONFIG_PNFSD)	+= nfs4pnfsd.o nfs4pnfsdlm.o nfs4pnfsds.o
++nfsd-$(CONFIG_PNFSD_LOCAL_EXPORT) += pnfsd_lexp.o
++nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
++nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
+@@ -40,7 +40,6 @@
+ 
+ #define NFSPROC4_CB_NULL 0
+ #define NFSPROC4_CB_COMPOUND 1
+-#define NFS4_STATEID_SIZE 16
+ 
+ /* Index of predefined Linux callback client operations */
+ 
+@@ -48,11 +47,17 @@ enum {
+ 	NFSPROC4_CLNT_CB_NULL = 0,
+ 	NFSPROC4_CLNT_CB_RECALL,
+ 	NFSPROC4_CLNT_CB_SEQUENCE,
++#if defined(CONFIG_PNFSD)
++	NFSPROC4_CLNT_CB_LAYOUT,
++	NFSPROC4_CLNT_CB_DEVICE,
++#endif
+ };
+ 
+ enum nfs_cb_opnum4 {
+ 	OP_CB_RECALL            = 4,
++	OP_CB_LAYOUT            = 5,
+ 	OP_CB_SEQUENCE          = 11,
++	OP_CB_DEVICE            = 14,
+ };
+ 
+ #define NFS4_MAXTAGLEN		20
+@@ -78,6 +83,19 @@ enum nfs_cb_opnum4 {
+ #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
++#define NFS4_enc_cb_layout_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 3 +                         \
++					enc_nfs4_fh_sz + 4)
++#define NFS4_dec_cb_layout_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
++#define NFS4_enc_cb_device_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 6)
++#define NFS4_dec_cb_device_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
+ 
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+@@ -94,6 +112,10 @@ xdr_writemem(__be32 *p, const void *ptr,
+ }
+ 
+ #define WRITE32(n)               *p++ = htonl(n)
++#define WRITE64(n)               do {				\
++	*p++ = htonl((u32)((n) >> 32));				\
++	*p++ = htonl((u32)(n));					\
++} while (0)
+ #define WRITEMEM(ptr,nbytes)     do {                           \
+ 	p = xdr_writemem(p, ptr, nbytes);                       \
+ } while (0)
+@@ -204,6 +226,16 @@ nfs_cb_stat_to_errno(int stat)
+  */
+ 
+ static void
++encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
++{
++	__be32 *p;
++
++	RESERVE_SPACE(sizeof(stateid_t));
++	WRITE32(sid->si_generation);
++	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
++}
++
++static void
+ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+ {
+ 	__be32 * p;
+@@ -228,10 +260,10 @@ encode_cb_recall(struct xdr_stream *xdr,
+ 	__be32 *p;
+ 	int len = dp->dl_fh.fh_size;
+ 
+-	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
++	RESERVE_SPACE(4);
+ 	WRITE32(OP_CB_RECALL);
+-	WRITE32(dp->dl_stateid.si_generation);
+-	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
++	encode_stateid(xdr, &dp->dl_stateid);
++	RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
+ 	WRITE32(0); /* truncate optimization not implemented */
+ 	WRITE32(len);
+ 	WRITEMEM(&dp->dl_fh.fh_base, len);
+@@ -259,6 +291,111 @@ encode_cb_sequence(struct xdr_stream *xd
+ 	hdr->nops++;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++#include "pnfsd.h"
++
++static void
++encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(20);
++	WRITE32(OP_CB_LAYOUT);
++	WRITE32(clr->cb.cbl_seg.layout_type);
++	WRITE32(clr->cb.cbl_seg.iomode);
++	WRITE32(clr->cb.cbl_layoutchanged);
++	WRITE32(clr->cb.cbl_recall_type);
++	if (unlikely(clr->cb.cbl_recall_type == RETURN_FSID)) {
++		struct nfs4_fsid fsid = clr->cb.cbl_fsid;
++
++		RESERVE_SPACE(16);
++		WRITE64(fsid.major);
++		WRITE64(fsid.minor);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"fsid 0x%llx-0x%llx\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type, fsid.major, fsid.minor);
++	} else if (clr->cb.cbl_recall_type == RETURN_FILE) {
++		int len = clr->clr_file->fi_fhlen;
++		stateid_t *cbl_sid = (stateid_t *)&clr->cb.cbl_sid;
++
++		RESERVE_SPACE(20 + len);
++		WRITE32(len);
++		WRITEMEM(clr->clr_file->fi_fhval, len);
++		WRITE64(clr->cb.cbl_seg.offset);
++		WRITE64(clr->cb.cbl_seg.length);
++		encode_stateid(xdr, cbl_sid);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"offset %lld length %lld stateid " STATEID_FMT "\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type,
++			clr->cb.cbl_seg.offset, clr->cb.cbl_seg.length,
++			STATEID_VAL(cbl_sid));
++	} else {
++		dprintk("%s: type %x iomode %d changed %d recall_type %d\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type);
++	}
++	hdr->nops++;
++}
++
++static void
++encode_cb_device(struct xdr_stream *xdr, struct nfs4_notify_device *nd,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++	int i;
++	int len					= nd->nd_list->cbd_len;
++	struct nfsd4_pnfs_cb_dev_item *cbd	= nd->nd_list->cbd_list;
++
++	dprintk("NFSD %s: --> num %d\n", __func__, len);
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(8);
++	WRITE32(OP_CB_DEVICE);
++
++	/* notify4 cnda_changes<>; */
++	WRITE32(len);
++	for (i = 0; i < len; i++) {
++		dprintk("%s: nt %d lt %d devid x%llx-x%llx im %d i %d\n",
++			__func__, cbd[i].cbd_notify_type,
++			cbd[i].cbd_layout_type,
++			cbd[i].cbd_devid.sbid,
++			cbd[i].cbd_devid.devid,
++			cbd[i].cbd_immediate, i);
++
++		BUG_ON(cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		       cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_DELETE);
++		RESERVE_SPACE(32);
++		/* bitmap4         notify_mask; */
++		WRITE32(1);
++		WRITE32(cbd[i].cbd_notify_type);
++		/* opaque     notify_vals<>; */
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
++			WRITE32(24);
++		else
++			WRITE32(20);
++		WRITE32(cbd[i].cbd_layout_type);
++		WRITE64(cbd[i].cbd_devid.sbid);
++		WRITE64(cbd[i].cbd_devid.devid);
++
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) {
++			RESERVE_SPACE(4);
++			WRITE32(cbd[i].cbd_immediate);
++		}
++	}
++	hdr->nops++;
++}
++#endif /* CONFIG_PNFSD */
++
+ static int
+ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+ {
+@@ -288,6 +425,45 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_layoutrecall *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_layout(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++
++static int
++nfs4_xdr_enc_cb_device(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_notify_device *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_device(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++#endif /* CONFIG_PNFSD */
+ 
+ static int
+ decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+@@ -403,6 +579,48 @@ out:
+ 	return status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_LAYOUT);
++out:
++	return status;
++}
++
++static int
++nfs4_xdr_dec_cb_device(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_DEVICE);
++out:
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * RPC procedure tables
+  */
+@@ -420,6 +638,10 @@ out:
+ static struct rpc_procinfo     nfs4_cb_procedures[] = {
+     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
++#if defined(CONFIG_PNFSD)
++    PROC(CB_LAYOUT,    COMPOUND,   enc_cb_layout,      dec_cb_layout),
++    PROC(CB_DEVICE,    COMPOUND,   enc_cb_device,      dec_cb_device),
++#endif
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
+@@ -606,10 +828,9 @@ out:
+  * TODO: cb_sequence should support referring call lists, cachethis, multiple
+  * slots, and mark callback channel down on communication errors.
+  */
+-static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_prepare_sequence(struct rpc_task *task,
++				      struct nfs4_client *clp)
+ {
+-	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+ 	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+ 	int status = 0;
+@@ -629,11 +850,15 @@ static void nfsd4_cb_prepare(struct rpc_
+ 	rpc_call_start(task);
+ }
+ 
+-static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_recall_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
++	nfsd4_cb_prepare_sequence(task, dp->dl_client);
++}
+ 
++static void nfsd4_cb_done_sequence(struct rpc_task *task,
++				   struct nfs4_client *clp)
++{
+ 	dprintk("%s: minorversion=%d\n", __func__,
+ 		clp->cl_cb_conn.cb_minorversion);
+ 
+@@ -657,7 +882,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	struct nfs4_client *clp = dp->dl_client;
+ 	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+-	nfsd4_cb_done(task, calldata);
++	nfsd4_cb_done_sequence(task, clp);
+ 
+ 	if (current_rpc_client == NULL) {
+ 		/* We're shutting down; give up. */
+@@ -688,7 +913,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+ 		task->tk_status = 0;
+-		rpc_restart_call(task);
++		rpc_restart_call_prepare(task);
+ 		return;
+ 	} else {
+ 		atomic_set(&clp->cl_cb_set, 0);
+@@ -704,7 +929,7 @@ static void nfsd4_cb_recall_release(void
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+-	.rpc_call_prepare = nfsd4_cb_prepare,
++	.rpc_call_prepare = nfsd4_cb_recall_prepare,
+ 	.rpc_call_done = nfsd4_cb_recall_done,
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+@@ -781,3 +1006,173 @@ void nfsd4_cb_recall(struct nfs4_delegat
+ {
+ 	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
++
++#if defined(CONFIG_PNFSD)
++static void nfsd4_cb_layout_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	nfsd4_cb_prepare_sequence(task, clr->clr_client);
++}
++
++static void nfsd4_cb_layout_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	struct nfs4_client *clp = clr->clr_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	if (!task->tk_status)
++		return;
++
++	printk("%s: clp %p cb_client %p fp %p failed with status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       clr->clr_file,
++	       task->tk_status);
++
++	switch (task->tk_status) {
++	case -EIO:
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++		/* FIXME:
++		 * The pnfs standard states that we need to only expire
++		 * the client after at-least "lease time" .eg lease-time * 2
++		 * when failing to communicate a recall
++		 */
++		break;
++	case -NFS4ERR_DELAY:
++		/* Pole the client until it's done with the layout */
++		rpc_delay(task, HZ/100); /* 10 mili-seconds */
++		task->tk_status = 0;
++		rpc_restart_call_prepare(task);
++		break;
++	case -NFS4ERR_NOMATCHING_LAYOUT:
++		task->tk_status = 0;
++		nomatching_layout(clr);
++	}
++}
++
++static void nfsd4_cb_layout_release(void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	kfree(clr->clr_args);
++	clr->clr_args = NULL;
++	put_layoutrecall(clr);
++}
++
++static const struct rpc_call_ops nfsd4_cb_layout_ops = {
++	.rpc_call_prepare = nfsd4_cb_layout_prepare,
++	.rpc_call_done = nfsd4_cb_layout_done,
++	.rpc_release = nfsd4_cb_layout_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfs4_client *clp = clr->clr_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_LAYOUT],
++		.rpc_cred = callback_cred
++	};
++	int status;
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	clr->clr_args = args;
++	args->args_op = clr;
++	msg.rpc_argp = args;
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_layout_ops, clr);
++out:
++	if (status) {
++		kfree(args);
++		put_layoutrecall(clr);
++	}
++	dprintk("NFSD: nfsd4_cb_layout: status %d\n", status);
++	return status;
++}
++
++static void nfsd4_cb_device_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	nfsd4_cb_prepare_sequence(task, cbnd->nd_client);
++}
++
++static void nfsd4_cb_device_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	struct nfs4_client *clp = cbnd->nd_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	dprintk("%s: clp %p cb_client %p: status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       task->tk_status);
++
++	if (task->tk_status == -EIO) {
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++	}
++}
++
++static void nfsd4_cb_device_release(void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	kfree(cbnd->nd_args);
++	cbnd->nd_args = NULL;
++	kfree(cbnd);
++}
++
++static const struct rpc_call_ops nfsd4_cb_device_ops = {
++	.rpc_call_prepare = nfsd4_cb_device_prepare,
++	.rpc_call_done = nfsd4_cb_device_done,
++	.rpc_release = nfsd4_cb_device_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_notify_device(struct nfs4_notify_device *cbnd)
++{
++	struct nfs4_client *clp = cbnd->nd_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_DEVICE],
++		.rpc_cred = callback_cred
++	};
++	int status = -EIO;
++
++	dprintk("%s: clp %p\n", __func__, clp);
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	args->args_op = cbnd;
++	msg.rpc_argp = args;
++
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_device_ops, cbnd);
++out:
++	if (status)
++		kfree(args);
++	dprintk("%s: status %d\n", __func__, status);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
+@@ -0,0 +1,1679 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *****************************************************************************/
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Globals */
++static u32 current_layoutid = 1;
++
++/*
++ * Currently used for manipulating the layout state.
++ */
++static DEFINE_SPINLOCK(layout_lock);
++
++#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_SMP)
++#  define BUG_ON_UNLOCKED_LAYOUT() BUG_ON(!spin_is_locked(&layout_lock))
++#else
++#  define BUG_ON_UNLOCKED_LAYOUT()
++#endif
++
++/*
++ * Layout state - NFSv4.1 pNFS
++ */
++static struct kmem_cache *pnfs_layout_slab;
++static struct kmem_cache *pnfs_layoutrecall_slab;
++
++/* hash table for nfsd4_pnfs_deviceid.sbid */
++#define SBID_HASH_BITS	8
++#define SBID_HASH_SIZE	(1 << SBID_HASH_BITS)
++#define SBID_HASH_MASK	(SBID_HASH_SIZE - 1)
++
++struct sbid_tracker {
++	u64 id;
++	struct super_block *sb;
++	struct list_head hash;
++};
++
++static u64 current_sbid;
++static struct list_head sbid_hashtbl[SBID_HASH_SIZE];
++
++static inline unsigned long
++sbid_hashval(struct super_block *sb)
++{
++	return hash_ptr(sb, SBID_HASH_BITS);
++}
++
++static inline struct sbid_tracker *
++alloc_sbid(void)
++{
++	return kmalloc(sizeof(struct sbid_tracker), GFP_KERNEL);
++}
++
++static void
++destroy_sbid(struct sbid_tracker *sbid)
++{
++	spin_lock(&layout_lock);
++	list_del(&sbid->hash);
++	spin_unlock(&layout_lock);
++	kfree(sbid);
++}
++
++void
++nfsd4_free_pnfs_slabs(void)
++{
++	int i;
++	struct sbid_tracker *sbid;
++
++	nfsd4_free_slab(&pnfs_layout_slab);
++	nfsd4_free_slab(&pnfs_layoutrecall_slab);
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		while (!list_empty(&sbid_hashtbl[i])) {
++			sbid = list_first_entry(&sbid_hashtbl[i],
++						struct sbid_tracker,
++						hash);
++			destroy_sbid(sbid);
++		}
++	}
++}
++
++int
++nfsd4_init_pnfs_slabs(void)
++{
++	int i;
++
++	pnfs_layout_slab = kmem_cache_create("pnfs_layouts",
++			sizeof(struct nfs4_layout), 0, 0, NULL);
++	if (pnfs_layout_slab == NULL)
++		return -ENOMEM;
++	pnfs_layoutrecall_slab = kmem_cache_create("pnfs_layoutrecalls",
++			sizeof(struct nfs4_layoutrecall), 0, 0, NULL);
++	if (pnfs_layoutrecall_slab == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		INIT_LIST_HEAD(&sbid_hashtbl[i]);
++	}
++
++	return 0;
++}
++
++/* XXX: Need to implement the notify types and track which
++ * clients have which devices. */
++void pnfs_set_device_notify(clientid_t *clid, unsigned int types)
++{
++	struct nfs4_client *clp;
++	dprintk("%s: -->\n", __func__);
++
++	nfs4_lock_state();
++	/* Indicate that client has a device so we can only notify
++	 * the correct clients */
++	clp = find_confirmed_client(clid);
++	if (clp) {
++		atomic_inc(&clp->cl_deviceref);
++		dprintk("%s: Incr device count (clnt %p) to %d\n",
++			__func__, clp, atomic_read(&clp->cl_deviceref));
++	}
++	nfs4_unlock_state();
++}
++
++/* Clear notifications for this client
++ * XXX: Do we need to loop through a clean up all
++ *      krefs when nfsd cleans up the client? */
++void pnfs_clear_device_notify(struct nfs4_client *clp)
++{
++	atomic_dec(&clp->cl_deviceref);
++	dprintk("%s: Decr device count (clnt %p) to %d\n",
++		__func__, clp, atomic_read(&clp->cl_deviceref));
++}
++
++static struct nfs4_layout_state *
++alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
++			stateid_t *stateid)
++{
++	struct nfs4_layout_state *new;
++
++	/* FIXME: use a kmem_cache */
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return new;
++	get_nfs4_file(fp);
++	INIT_LIST_HEAD(&new->ls_perfile);
++	INIT_LIST_HEAD(&new->ls_layouts);
++	kref_init(&new->ls_ref);
++	new->ls_client = clp;
++	new->ls_file = fp;
++	new->ls_stateid.si_boot = stateid->si_boot;
++	new->ls_stateid.si_stateownerid = 0; /* identifies layout stateid */
++	new->ls_stateid.si_generation = 1;
++	spin_lock(&layout_lock);
++	new->ls_stateid.si_fileid = current_layoutid++;
++	list_add(&new->ls_perfile, &fp->fi_layout_states);
++	spin_unlock(&layout_lock);
++	return new;
++}
++
++static inline void
++get_layout_state(struct nfs4_layout_state *ls)
++{
++	kref_get(&ls->ls_ref);
++}
++
++static void
++destroy_layout_state_common(struct nfs4_layout_state *ls)
++{
++	struct nfs4_file *fp = ls->ls_file;
++
++	dprintk("pNFS %s: ls %p fp %p clp %p\n", __func__, ls, fp,
++		ls->ls_client);
++	BUG_ON(!list_empty(&ls->ls_layouts));
++	kfree(ls);
++	put_nfs4_file(fp);
++}
++
++static void
++destroy_layout_state(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	spin_lock(&layout_lock);
++	list_del(&ls->ls_perfile);
++	spin_unlock(&layout_lock);
++	destroy_layout_state_common(ls);
++}
++
++static void
++destroy_layout_state_locked(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	list_del(&ls->ls_perfile);
++	destroy_layout_state_common(ls);
++}
++
++static inline void
++put_layout_state(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state);
++}
++
++static inline void
++put_layout_state_locked(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state_locked);
++}
++
++/*
++ * Search the fp->fi_layout_state list for a layout state with the clientid.
++ * If not found, then this is a 'first open/delegation/lock stateid' from
++ * the client for this file.
++ * Called under the layout_lock.
++ */
++static struct nfs4_layout_state *
++find_get_layout_state(struct nfs4_client *clp, struct nfs4_file *fp)
++{
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_for_each_entry(ls, &fp->fi_layout_states, ls_perfile) {
++		if (ls->ls_client == clp) {
++			dprintk("pNFS %s: before GET ls %p ls_ref %d\n",
++				__func__, ls,
++				atomic_read(&ls->ls_ref.refcount));
++			get_layout_state(ls);
++			return ls;
++		}
++	}
++	return NULL;
++}
++
++static __be32
++verify_stateid(struct nfs4_file *fp, stateid_t *stateid)
++{
++	struct nfs4_stateid *local = NULL;
++	struct nfs4_delegation *temp = NULL;
++
++	/* check if open or lock stateid */
++	local = find_stateid(stateid, RD_STATE);
++	if (local)
++		return 0;
++	temp = find_delegation_stateid(fp->fi_inode, stateid);
++	if (temp)
++		return 0;
++	return nfserr_bad_stateid;
++}
++
++/*
++ * nfs4_preocess_layout_stateid ()
++ *
++ * We have looked up the nfs4_file corresponding to the current_fh, and
++ * confirmed the clientid. Pull the few tests from nfs4_preprocess_stateid_op()
++ * that make sense with a layout stateid.
++ *
++ * Called with the state_lock held
++ * Returns zero and stateid is updated, or error.
++ *
++ * Note: the struct nfs4_layout_state pointer is only set by layoutget.
++ */
++static __be32
++nfs4_process_layout_stateid(struct nfs4_client *clp, struct nfs4_file *fp,
++			    stateid_t *stateid, struct nfs4_layout_state **lsp)
++{
++	struct nfs4_layout_state *ls = NULL;
++	__be32 status = 0;
++
++	dprintk("--> %s clp %p fp %p \n", __func__, clp, fp);
++
++	dprintk("%s: operation stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		goto out;
++
++	/* Is this the first use of this layout ? */
++	spin_lock(&layout_lock);
++	ls = find_get_layout_state(clp, fp);
++	spin_unlock(&layout_lock);
++	if (!ls) {
++		/* Only alloc layout state on layoutget (which sets lsp). */
++		if (!lsp) {
++			dprintk("%s ERROR: Not layoutget & no layout stateid\n",
++				__func__);
++			status = nfserr_bad_stateid;
++			goto out;
++		}
++		dprintk("%s Initial stateid for layout: file %p client %p\n",
++			__func__, fp, clp);
++
++		/* verify input stateid */
++		status = verify_stateid(fp, stateid);
++		if (status) {
++			dprintk("%s ERROR: invalid open/deleg/lock stateid\n",
++				__func__);
++			goto out;
++		}
++		ls = alloc_init_layout_state(clp, fp, stateid);
++		if (!ls) {
++			dprintk("%s pNFS ERROR: no memory for layout state\n",
++				__func__);
++			status = nfserr_resource;
++			goto out;
++		}
++	} else {
++		dprintk("%s Not initial stateid. Layout state %p file %p\n",
++			__func__, ls, fp);
++
++		/* BAD STATEID */
++		status = nfserr_bad_stateid;
++		if (memcmp(&ls->ls_stateid.si_opaque, &stateid->si_opaque,
++			sizeof(stateid_opaque_t)) != 0) {
++
++			/* if a LAYOUTGET operation and stateid is a valid
++			 * open/deleg/lock stateid, accept it as a parallel
++			 * initial layout stateid
++			 */
++			if (lsp && ((verify_stateid(fp, stateid)) == 0)) {
++				dprintk("%s parallel initial layout state\n",
++					__func__);
++				goto update;
++			}
++
++			dprintk("%s ERROR bad opaque in stateid 1\n", __func__);
++			goto out_put;
++		}
++
++		/* stateid is a valid layout stateid for this file. */
++		if (stateid->si_generation > ls->ls_stateid.si_generation) {
++			dprintk("%s bad stateid 1\n", __func__);
++			goto out_put;
++		}
++update:
++		update_stateid(&ls->ls_stateid);
++		dprintk("%s Updated ls_stateid to %d on layoutstate %p\n",
++			__func__, ls->ls_stateid.si_generation, ls);
++	}
++	status = 0;
++	/* Set the stateid to be encoded */
++	memcpy(stateid, &ls->ls_stateid, sizeof(stateid_t));
++
++	/* Return the layout state if requested */
++	if (lsp) {
++		get_layout_state(ls);
++		*lsp = ls;
++	}
++	dprintk("%s: layout stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(&ls->ls_stateid));
++out_put:
++	dprintk("%s PUT LO STATE:\n", __func__);
++	put_layout_state(ls);
++out:
++	dprintk("<-- %s status %d\n", __func__, htonl(status));
++
++	return status;
++}
++
++static inline struct nfs4_layout *
++alloc_layout(void)
++{
++	return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
++}
++
++static inline void
++free_layout(struct nfs4_layout *lp)
++{
++	kmem_cache_free(pnfs_layout_slab, lp);
++}
++
++static void
++init_layout(struct nfs4_layout_state *ls,
++	    struct nfs4_layout *lp,
++	    struct nfs4_file *fp,
++	    struct nfs4_client *clp,
++	    struct svc_fh *current_fh,
++	    struct nfsd4_layout_seg *seg)
++{
++	dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__,
++		ls, lp, clp, fp, fp->fi_inode);
++
++	get_nfs4_file(fp);
++	lp->lo_client = clp;
++	lp->lo_file = fp;
++	get_layout_state(ls);
++	lp->lo_state = ls;
++	memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
++	spin_lock(&layout_lock);
++	list_add_tail(&lp->lo_perstate, &ls->ls_layouts);
++	list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
++	list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
++	spin_unlock(&layout_lock);
++	dprintk("pNFS %s end\n", __func__);
++}
++
++static void
++dequeue_layout(struct nfs4_layout *lp)
++{
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del(&lp->lo_perclnt);
++	list_del(&lp->lo_perfile);
++	list_del(&lp->lo_perstate);
++}
++
++static void
++destroy_layout(struct nfs4_layout *lp)
++{
++	struct nfs4_client *clp;
++	struct nfs4_file *fp;
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	clp = lp->lo_client;
++	fp = lp->lo_file;
++	ls = lp->lo_state;
++	dprintk("pNFS %s: lp %p clp %p fp %p ino %p ls_layouts empty %d\n",
++		__func__, lp, clp, fp, fp->fi_inode,
++		list_empty(&ls->ls_layouts));
++
++	kmem_cache_free(pnfs_layout_slab, lp);
++	/* release references taken by init_layout */
++	put_layout_state_locked(ls);
++	put_nfs4_file(fp);
++}
++
++void fs_layout_return(struct super_block *sb, struct inode *ino,
++		      struct nfsd4_pnfs_layoutreturn *lrp, int flags,
++		      void *recall_cookie)
++{
++	int ret;
++
++	if (unlikely(!sb->s_pnfs_op->layout_return))
++		return;
++
++	lrp->lr_flags = flags;
++	lrp->args.lr_cookie = recall_cookie;
++
++	if (!ino) /* FSID or ALL */
++		ino = sb->s_root->d_inode;
++
++	ret = sb->s_pnfs_op->layout_return(ino, &lrp->args);
++	dprintk("%s: inode %lu iomode=%d offset=0x%llx length=0x%llx "
++		"cookie = %p flags 0x%x status=%d\n",
++		__func__, ino->i_ino, lrp->args.lr_seg.iomode,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length,
++		recall_cookie, flags, ret);
++}
++
++static u64
++alloc_init_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	struct sbid_tracker *new = alloc_sbid();
++	unsigned long hash_idx = sbid_hashval(sb);
++	u64 id = 0;
++
++	if (likely(new)) {
++		spin_lock(&layout_lock);
++		id = ++current_sbid;
++		new->id = (id << SBID_HASH_BITS) | (hash_idx & SBID_HASH_MASK);
++		id = new->id;
++		BUG_ON(id == 0);
++		new->sb = sb;
++
++		list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash)
++			if (sbid->sb == sb) {
++				kfree(new);
++				id = sbid->id;
++				spin_unlock(&layout_lock);
++				return id;
++			}
++		list_add(&new->hash, &sbid_hashtbl[hash_idx]);
++		spin_unlock(&layout_lock);
++	}
++	return id;
++}
++
++struct super_block *
++find_sbid_id(u64 id)
++{
++	struct sbid_tracker *sbid;
++	struct super_block *sb = NULL;
++	unsigned long hash_idx = id & SBID_HASH_MASK;
++	int pos = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->id != id)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		sb = sbid->sb;
++		break;
++	}
++	spin_unlock(&layout_lock);
++	return sb;
++}
++
++u64
++find_create_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	unsigned long hash_idx = sbid_hashval(sb);
++	int pos = 0;
++	u64 id = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->sb != sb)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		id = sbid->id;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	if (!id)
++		id = alloc_init_sbid(sb);
++
++	return id;
++}
++
++/*
++ * Create a layoutrecall structure
++ * An optional layoutrecall can be cloned (except for the layoutrecall lists)
++ */
++static struct nfs4_layoutrecall *
++alloc_init_layoutrecall(struct nfsd4_pnfs_cb_layout *cbl,
++			struct nfs4_client *clp,
++			struct nfs4_file *lrfile)
++{
++	struct nfs4_layoutrecall *clr;
++
++	dprintk("NFSD %s\n", __func__);
++	clr = kmem_cache_alloc(pnfs_layoutrecall_slab, GFP_KERNEL);
++	if (clr == NULL)
++		return clr;
++
++	dprintk("NFSD %s -->\n", __func__);
++
++	memset(clr, 0, sizeof(*clr));
++	if (lrfile)
++		get_nfs4_file(lrfile);
++	clr->clr_client = clp;
++	clr->clr_file = lrfile;
++	clr->cb = *cbl;
++
++	kref_init(&clr->clr_ref);
++	INIT_LIST_HEAD(&clr->clr_perclnt);
++
++	dprintk("NFSD %s return %p\n", __func__, clr);
++	return clr;
++}
++
++static void
++get_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	kref_get(&clr->clr_ref);
++}
++
++static void
++destroy_layoutrecall(struct kref *kref)
++{
++	struct nfs4_layoutrecall *clr =
++			container_of(kref, struct nfs4_layoutrecall, clr_ref);
++	dprintk("pNFS %s: clr %p fp %p clp %p\n", __func__, clr,
++		clr->clr_file, clr->clr_client);
++	BUG_ON(!list_empty(&clr->clr_perclnt));
++	if (clr->clr_file)
++		put_nfs4_file(clr->clr_file);
++	kmem_cache_free(pnfs_layoutrecall_slab, clr);
++}
++
++int
++put_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	return kref_put(&clr->clr_ref, destroy_layoutrecall);
++}
++
++void *
++layoutrecall_done(struct nfs4_layoutrecall *clr)
++{
++	void *recall_cookie = clr->cb.cbl_cookie;
++	struct nfs4_layoutrecall *parent = clr->parent;
++
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del_init(&clr->clr_perclnt);
++	put_layoutrecall(clr);
++
++	if (parent && !put_layoutrecall(parent))
++		recall_cookie = NULL;
++
++	return recall_cookie;
++}
++
++/*
++ * get_state() and cb_get_state() are
++ */
++void
++release_pnfs_ds_dev_list(struct nfs4_stateid *stp)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	while (!list_empty(&stp->st_pnfs_ds_id)) {
++		ddp = list_entry(stp->st_pnfs_ds_id.next,
++				 struct pnfs_ds_dev_entry, dd_dev_entry);
++		list_del(&ddp->dd_dev_entry);
++		kfree(ddp);
++	}
++}
++
++static int
++nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 dsid)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	ddp = kmalloc(sizeof(*ddp), GFP_KERNEL);
++	if (!ddp)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&ddp->dd_dev_entry);
++	list_add(&ddp->dd_dev_entry, &stp->st_pnfs_ds_id);
++	ddp->dd_dsid = dsid;
++	return 0;
++}
++
++/*
++ * are two octet ranges overlapping?
++ * start1            last1
++ *   |-----------------|
++ *                start2            last2
++ *                  |----------------|
++ */
++static inline int
++lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 last1 = last_byte_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 last2 = last_byte_offset(start2, l2->length);
++	int ret;
++
++	/* if last1 == start2 there's a single byte overlap */
++	ret = (last2 >= start1) && (last1 >= start2);
++	dprintk("%s: l1 %llu:%lld l2 %llu:%lld ret=%d\n", __func__,
++		l1->offset, l1->length, l2->offset, l2->length, ret);
++	return ret;
++}
++
++static inline int
++same_fsid_major(struct nfs4_fsid *fsid, u64 major)
++{
++	return fsid->major == major;
++}
++
++static inline int
++same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
++{
++	return same_fsid_major(fsid, current_fh->fh_export->ex_fsid);
++}
++
++/*
++ * find a layout recall conflicting with the specified layoutget
++ */
++static int
++is_layout_recalled(struct nfs4_client *clp,
++		   struct svc_fh *current_fh,
++		   struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layoutrecall *clr;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (clr, &clp->cl_layoutrecalls, clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != seg->layout_type)
++			continue;
++		if (clr->cb.cbl_recall_type == RETURN_ALL)
++			goto found;
++		if (clr->cb.cbl_recall_type == RETURN_FSID) {
++			if (same_fsid(&clr->cb.cbl_fsid, current_fh))
++				goto found;
++			else
++				continue;
++		}
++		BUG_ON(clr->cb.cbl_recall_type != RETURN_FILE);
++		if (clr->cb.cbl_seg.clientid == seg->clientid &&
++		    lo_seg_overlapping(&clr->cb.cbl_seg, seg))
++			goto found;
++	}
++	spin_unlock(&layout_lock);
++	return 0;
++found:
++	spin_unlock(&layout_lock);
++	return 1;
++}
++
++/*
++ * are two octet ranges overlapping or adjacent?
++ */
++static inline int
++lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	/* is end1 == start2 ranges are adjacent */
++	return (end2 >= start1) && (end1 >= start2);
++}
++
++static void
++extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lg_start = lg->offset;
++	u64 lg_end = end_offset(lg_start, lg->length);
++
++	/* lo already covers lg? */
++	if (lo_start <= lg_start && lg_end <= lo_end)
++		return;
++
++	/* extend start offset */
++	if (lo_start > lg_start)
++		lo_start = lg_start;
++
++	/* extend end offset */
++	if (lo_end < lg_end)
++		lo_end = lg_end;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ?
++		      lo_end : lo_end - lo_start;
++}
++
++static struct nfs4_layout *
++merge_layout(struct nfs4_file *fp,
++	     struct nfs4_client *clp,
++	     struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layout *lp = NULL;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
++		if (lp->lo_seg.layout_type == seg->layout_type &&
++		    lp->lo_seg.clientid == seg->clientid &&
++		    lp->lo_seg.iomode == seg->iomode &&
++		    lo_seg_mergeable(&lp->lo_seg, seg)) {
++			extend_layout(&lp->lo_seg, seg);
++			break;
++		}
++	spin_unlock(&layout_lock);
++
++	return lp;
++}
++
++__be32
++nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
++		     struct exp_xdr_stream *xdr)
++{
++	u32 status;
++	__be32 nfserr;
++	struct inode *ino = lgp->lg_fhp->fh_dentry->d_inode;
++	struct super_block *sb = ino->i_sb;
++	int can_merge;
++	struct nfs4_file *fp;
++	struct nfs4_client *clp;
++	struct nfs4_layout *lp = NULL;
++	struct nfs4_layout_state *ls = NULL;
++	struct nfsd4_pnfs_layoutget_arg args = {
++		.lg_minlength = lgp->lg_minlength,
++		.lg_fh = &lgp->lg_fhp->fh_handle,
++	};
++	struct nfsd4_pnfs_layoutget_res res = {
++		.lg_seg = lgp->lg_seg,
++	};
++
++	dprintk("NFSD: %s Begin\n", __func__);
++
++	args.lg_sbid = find_create_sbid(sb);
++	if (!args.lg_sbid) {
++		nfserr = nfserr_layouttrylater;
++		goto out;
++	}
++
++	can_merge = sb->s_pnfs_op->can_merge_layouts != NULL &&
++		    sb->s_pnfs_op->can_merge_layouts(lgp->lg_seg.layout_type);
++
++	nfs4_lock_state();
++	fp = find_alloc_file(ino, lgp->lg_fhp);
++	clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
++	dprintk("pNFS %s: fp %p clp %p \n", __func__, fp, clp);
++	if (!fp || !clp) {
++		nfserr = nfserr_inval;
++		goto out_unlock;
++	}
++
++	/* Check decoded layout stateid */
++	nfserr = nfs4_process_layout_stateid(clp, fp, &lgp->lg_sid, &ls);
++	if (nfserr)
++		goto out_unlock;
++
++	if (is_layout_recalled(clp, lgp->lg_fhp, &lgp->lg_seg)) {
++		nfserr = nfserr_recallconflict;
++		goto out;
++	}
++
++	/* pre-alloc layout in case we can't merge after we call
++	 * the file system
++	 */
++	lp = alloc_layout();
++	if (!lp) {
++		nfserr = nfserr_layouttrylater;
++		goto out_unlock;
++	}
++
++	dprintk("pNFS %s: pre-export type 0x%x maxcount %Zd "
++		"iomode %u offset %llu length %llu\n",
++		__func__, lgp->lg_seg.layout_type,
++		exp_xdr_qbytes(xdr->end - xdr->p),
++		lgp->lg_seg.iomode, lgp->lg_seg.offset, lgp->lg_seg.length);
++
++	/* FIXME: need to eliminate the use of the state lock */
++	nfs4_unlock_state();
++	status = sb->s_pnfs_op->layout_get(ino, xdr, &args, &res);
++	nfs4_lock_state();
++
++	dprintk("pNFS %s: post-export status %u "
++		"iomode %u offset %llu length %llu\n",
++		__func__, status, res.lg_seg.iomode,
++		res.lg_seg.offset, res.lg_seg.length);
++
++	/*
++	 * The allowable error codes for the layout_get pNFS export
++	 * operations vector function (from the file system) can be
++	 * expanded as needed to include other errors defined for
++	 * the RFC 5561 LAYOUTGET operation.
++	 */
++	switch (status) {
++	case 0:
++		nfserr = NFS4_OK;
++		break;
++	case NFS4ERR_ACCESS:
++	case NFS4ERR_BADIOMODE:
++		/* No support for LAYOUTIOMODE4_RW layouts */
++	case NFS4ERR_BADLAYOUT:
++		/* No layout matching loga_minlength rules */
++	case NFS4ERR_INVAL:
++	case NFS4ERR_IO:
++	case NFS4ERR_LAYOUTTRYLATER:
++	case NFS4ERR_LAYOUTUNAVAILABLE:
++	case NFS4ERR_LOCKED:
++	case NFS4ERR_NOSPC:
++	case NFS4ERR_RECALLCONFLICT:
++	case NFS4ERR_SERVERFAULT:
++	case NFS4ERR_TOOSMALL:
++		/* Requested layout too big for loga_maxcount */
++	case NFS4ERR_WRONG_TYPE:
++		/* Not a regular file */
++		nfserr = cpu_to_be32(status);
++		goto out_freelayout;
++	default:
++		BUG();
++		nfserr = nfserr_serverfault;
++	}
++
++	lgp->lg_seg = res.lg_seg;
++	lgp->lg_roc = res.lg_return_on_close;
++
++	/* SUCCESS!
++	 * Can the new layout be merged into an existing one?
++	 * If so, free unused layout struct
++	 */
++	if (can_merge && merge_layout(fp, clp, &res.lg_seg))
++		goto out_freelayout;
++
++	/* Can't merge, so let's initialize this new layout */
++	init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
++out_unlock:
++	if (ls)
++		put_layout_state(ls);
++	if (fp)
++		put_nfs4_file(fp);
++	nfs4_unlock_state();
++out:
++	dprintk("pNFS %s: lp %p exit nfserr %u\n", __func__, lp,
++		be32_to_cpu(nfserr));
++	return nfserr;
++out_freelayout:
++	free_layout(lp);
++	goto out_unlock;
++}
++
++static void
++trim_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lr)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lr_start = lr->offset;
++	u64 lr_end = end_offset(lr_start, lr->length);
++
++	dprintk("%s:Begin lo %llu:%lld lr %llu:%lld\n", __func__,
++		lo->offset, lo->length, lr->offset, lr->length);
++
++	/* lr fully covers lo? */
++	if (lr_start <= lo_start && lo_end <= lr_end) {
++		lo->length = 0;
++		goto out;
++	}
++
++	/*
++	 * split not supported yet. retain layout segment.
++	 * remains must be returned by the client
++	 * on the final layout return.
++	 */
++	if (lo_start < lr_start && lr_end < lo_end) {
++		dprintk("%s: split not supported\n", __func__);
++		goto out;
++	}
++
++	if (lo_start < lr_start)
++		lo_end = lr_start - 1;
++	else /* lr_end < lo_end */
++		lo_start = lr_end + 1;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ? lo_end : lo_end - lo_start;
++out:
++	dprintk("%s:End lo %llu:%lld\n", __func__, lo->offset, lo->length);
++}
++
++static int
++pnfs_return_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp,
++			 struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	dprintk("%s: clp %p fp %p\n", __func__, clp, fp);
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &fp->fi_layouts, lo_perfile) {
++		dprintk("%s: lp %p client %p,%p lo_type %x,%x iomode %d,%d\n",
++			__func__, lp,
++			lp->lo_client, clp,
++			lp->lo_seg.layout_type, lrp->args.lr_seg.layout_type,
++			lp->lo_seg.iomode, lrp->args.lr_seg.iomode);
++		if (lp->lo_client != clp ||
++		    lp->lo_seg.layout_type != lrp->args.lr_seg.layout_type ||
++		    (lp->lo_seg.iomode != lrp->args.lr_seg.iomode &&
++		     lrp->args.lr_seg.iomode != IOMODE_ANY) ||
++		     !lo_seg_overlapping(&lp->lo_seg, &lrp->args.lr_seg))
++			continue;
++		layouts_found++;
++		trim_layout(&lp->lo_seg, &lrp->args.lr_seg);
++		if (!lp->lo_seg.length) {
++			lrp->lrs_present = 0;
++			dequeue_layout(lp);
++			destroy_layout(lp);
++		}
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++pnfs_return_client_layouts(struct nfs4_client *clp,
++			   struct nfsd4_pnfs_layoutreturn *lrp, u64 ex_fsid)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &clp->cl_layouts, lo_perclnt) {
++		if (lrp->args.lr_seg.layout_type != lp->lo_seg.layout_type ||
++		   (lrp->args.lr_seg.iomode != lp->lo_seg.iomode &&
++		    lrp->args.lr_seg.iomode != IOMODE_ANY))
++			continue;
++
++		if (lrp->args.lr_return_type == RETURN_FSID &&
++		    !same_fsid_major(&lp->lo_file->fi_fsid, ex_fsid))
++			continue;
++
++		layouts_found++;
++		dequeue_layout(lp);
++		destroy_layout(lp);
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++recall_return_perfect_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode ||
++	    clr->cb.cbl_recall_type != lrp->args.lr_return_type)
++		return 0;
++
++	return (clr->cb.cbl_recall_type == RETURN_FILE &&
++		clr->clr_file == fp &&
++		clr->cb.cbl_seg.offset == lrp->args.lr_seg.offset &&
++		clr->cb.cbl_seg.length == lrp->args.lr_seg.length) ||
++
++		(clr->cb.cbl_recall_type == RETURN_FSID &&
++		 same_fsid(&clr->cb.cbl_fsid, current_fh)) ||
++
++		clr->cb.cbl_recall_type == RETURN_ALL;
++}
++
++static int
++recall_return_partial_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	/* iomode matching? */
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode &&
++	    clr->cb.cbl_seg.iomode != IOMODE_ANY &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY)
++		return 0;
++
++	if (clr->cb.cbl_recall_type == RETURN_ALL ||
++	    lrp->args.lr_return_type == RETURN_ALL)
++		return 1;
++
++	/* fsid matches? */
++	if (clr->cb.cbl_recall_type == RETURN_FSID ||
++	    lrp->args.lr_return_type == RETURN_FSID)
++		return same_fsid(&clr->cb.cbl_fsid, current_fh);
++
++	/* file matches, range overlapping? */
++	return clr->clr_file == fp &&
++	       lo_seg_overlapping(&clr->cb.cbl_seg, &lrp->args.lr_seg);
++}
++
++int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
++			    struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status = 0;
++	int layouts_found = 0;
++	struct inode *ino = current_fh->fh_dentry->d_inode;
++	struct nfs4_file *fp = NULL;
++	struct nfs4_client *clp;
++	struct nfs4_layoutrecall *clr, *nextclr;
++	u64 ex_fsid = current_fh->fh_export->ex_fsid;
++	void *recall_cookie = NULL;
++
++	dprintk("NFSD: %s\n", __func__);
++
++	nfs4_lock_state();
++	clp = find_confirmed_client((clientid_t *)&lrp->args.lr_seg.clientid);
++	if (!clp)
++		goto out;
++
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		fp = find_file(ino);
++		if (!fp) {
++			printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
++				"ino %p:%lu\n",
++				__func__, ino, ino ? ino->i_ino : 0L);
++			goto out;
++		}
++
++		/* Check the stateid */
++		dprintk("%s PROCESS LO_STATEID inode %p\n", __func__, ino);
++		status = nfs4_process_layout_stateid(clp, fp, &lrp->lr_sid,
++						     NULL);
++		if (status)
++			goto out_put_file;
++
++		/* update layouts */
++		layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
++		/* optimize for the all-empty case */
++		if (list_empty(&fp->fi_layouts))
++			recall_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++	} else {
++		layouts_found = pnfs_return_client_layouts(clp, lrp, ex_fsid);
++	}
++
++	dprintk("pNFS %s: clp %p fp %p layout_type 0x%x iomode %d "
++		"return_type %d fsid 0x%llx offset %llu length %llu: "
++		"layouts_found %d\n",
++		__func__, clp, fp, lrp->args.lr_seg.layout_type,
++		lrp->args.lr_seg.iomode, lrp->args.lr_return_type,
++		ex_fsid,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);
++
++	/* update layoutrecalls
++	 * note: for RETURN_{FSID,ALL}, fp may be NULL
++	 */
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (clr, nextclr, &clp->cl_layoutrecalls,
++				  clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != lrp->args.lr_seg.layout_type)
++			continue;
++
++		if (recall_return_perfect_match(clr, lrp, fp, current_fh))
++			recall_cookie = layoutrecall_done(clr);
++		else if (layouts_found &&
++			 recall_return_partial_match(clr, lrp, fp, current_fh))
++			clr->clr_time = CURRENT_TIME;
++	}
++	spin_unlock(&layout_lock);
++
++out_put_file:
++	if (fp)
++		put_nfs4_file(fp);
++out:
++	nfs4_unlock_state();
++
++	/* call exported filesystem layout_return (ignore return-code) */
++	fs_layout_return(sb, ino, lrp, 0, recall_cookie);
++
++	dprintk("pNFS %s: exit status %d \n", __func__, status);
++	return status;
++}
++
++/*
++ * PNFS Metadata server export operations callback for get_state
++ *
++ * called by the cluster fs when it receives a get_state() from a data
++ * server.
++ * returns status, or pnfs_get_state* with pnfs_get_state->status set.
++ *
++ */
++int
++nfs4_pnfs_cb_get_state(struct super_block *sb, struct pnfs_get_state *arg)
++{
++	struct nfs4_stateid *stp;
++	int flags = LOCK_STATE | OPEN_STATE; /* search both hash tables */
++	int status = -EINVAL;
++	struct inode *ino;
++	struct nfs4_delegation *dl;
++	stateid_t *stid = (stateid_t *)&arg->stid;
++
++	dprintk("NFSD: %s sid=" STATEID_FMT " ino %llu\n", __func__,
++		STATEID_VAL(stid), arg->ino);
++
++	nfs4_lock_state();
++	stp = find_stateid(stid, flags);
++	if (!stp) {
++		ino = iget_locked(sb, arg->ino);
++		if (!ino)
++			goto out;
++
++		if (ino->i_state & I_NEW) {
++			iget_failed(ino);
++			goto out;
++		}
++
++		dl = find_delegation_stateid(ino, stid);
++		if (dl)
++			status = 0;
++
++		iput(ino);
++	} else {
++		/* XXX ANDROS: marc removed nfs4_check_fh - how come? */
++
++		/* arg->devid is the Data server id, set by the cluster fs */
++		status = nfs4_add_pnfs_ds_dev(stp, arg->dsid);
++		if (status)
++			goto out;
++
++		arg->access = stp->st_access_bmap;
++		*(clientid_t *)&arg->clid =
++			stp->st_stateowner->so_client->cl_clientid;
++	}
++out:
++	nfs4_unlock_state();
++	return status;
++}
++
++static int
++cl_has_file_layout(struct nfs4_client *clp, struct nfs4_file *lrfile,
++		   stateid_t *lsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++	struct nfs4_layout_state *ls;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt) {
++		if (lp->lo_file != lrfile)
++			continue;
++
++		ls = find_get_layout_state(clp, lrfile);
++		if (!ls) {
++			/* This shouldn't happen as the file should have a
++			 * layout stateid if it has a layout.
++			 */
++			printk(KERN_ERR "%s: file %p has no layout stateid\n",
++				__func__, lrfile);
++			WARN_ON(1);
++			break;
++		}
++		update_stateid(&ls->ls_stateid);
++		memcpy(lsid, &ls->ls_stateid, sizeof(stateid_t));
++		put_layout_state_locked(ls);
++		found = 1;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	return found;
++}
++
++static int
++cl_has_fsid_layout(struct nfs4_client *clp, struct nfs4_fsid *fsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++
++	/* note: minor version unused */
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt)
++		if (lp->lo_file->fi_fsid.major == fsid->major) {
++			found = 1;
++			break;
++		}
++	spin_unlock(&layout_lock);
++	return found;
++}
++
++static int
++cl_has_any_layout(struct nfs4_client *clp)
++{
++	return !list_empty(&clp->cl_layouts);
++}
++
++static int
++cl_has_layout(struct nfs4_client *clp, struct nfsd4_pnfs_cb_layout *cbl,
++	      struct nfs4_file *lrfile, stateid_t *lsid)
++{
++	switch (cbl->cbl_recall_type) {
++	case RETURN_FILE:
++		return cl_has_file_layout(clp, lrfile, lsid);
++	case RETURN_FSID:
++		return cl_has_fsid_layout(clp, &cbl->cbl_fsid);
++	default:
++		return cl_has_any_layout(clp);
++	}
++}
++
++/*
++ * Called without the layout_lock.
++ */
++void
++nomatching_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfsd4_pnfs_layoutreturn lr = {
++		.args.lr_return_type = clr->cb.cbl_recall_type,
++		.args.lr_seg = clr->cb.cbl_seg,
++	};
++	struct inode *inode;
++	void *recall_cookie;
++
++	if (clr->clr_file) {
++		inode = igrab(clr->clr_file->fi_inode);
++		if (WARN_ON(!inode))
++			return;
++	} else {
++		inode = NULL;
++	}
++
++	dprintk("%s: clp %p fp %p: simulating layout_return\n", __func__,
++		clr->clr_client, clr->clr_file);
++
++	if (clr->cb.cbl_recall_type == RETURN_FILE)
++		pnfs_return_file_layouts(clr->clr_client, clr->clr_file, &lr);
++	else
++		pnfs_return_client_layouts(clr->clr_client, &lr,
++					   clr->cb.cbl_fsid.major);
++
++	spin_lock(&layout_lock);
++	recall_cookie = layoutrecall_done(clr);
++	spin_unlock(&layout_lock);
++
++	fs_layout_return(clr->clr_sb, inode, &lr, LR_FLAG_INTERN,
++			 recall_cookie);
++	iput(inode);
++}
++
++void pnfs_expire_client(struct nfs4_client *clp)
++{
++	for (;;) {
++		struct nfs4_layoutrecall *lrp = NULL;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layoutrecalls)) {
++			lrp = list_entry(clp->cl_layoutrecalls.next,
++					 struct nfs4_layoutrecall, clr_perclnt);
++			get_layoutrecall(lrp);
++		}
++		spin_unlock(&layout_lock);
++		if (!lrp)
++			break;
++
++		dprintk("%s: lrp %p, fp %p\n", __func__, lrp, lrp->clr_file);
++		BUG_ON(lrp->clr_client != clp);
++		nomatching_layout(lrp);
++		put_layoutrecall(lrp);
++	}
++
++	for (;;) {
++		struct nfs4_layout *lp = NULL;
++		struct inode *inode = NULL;
++		struct nfsd4_pnfs_layoutreturn lr;
++		bool empty = false;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layouts)) {
++			lp = list_entry(clp->cl_layouts.next,
++					struct nfs4_layout, lo_perclnt);
++			inode = igrab(lp->lo_file->fi_inode);
++			memset(&lr, 0, sizeof(lr));
++			lr.args.lr_return_type = RETURN_FILE;
++			lr.args.lr_seg = lp->lo_seg;
++			empty = list_empty(&lp->lo_file->fi_layouts);
++			BUG_ON(lp->lo_client != clp);
++			dequeue_layout(lp);
++			destroy_layout(lp); /* do not access lp after this */
++		}
++		spin_unlock(&layout_lock);
++		if (!lp)
++			break;
++
++		if (WARN_ON(!inode))
++			break;
++
++		dprintk("%s: inode %lu lp %p clp %p\n", __func__, inode->i_ino,
++			lp, clp);
++
++		fs_layout_return(inode->i_sb, inode, &lr, LR_FLAG_EXPIRE,
++				 empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL);
++		iput(inode);
++	}
++}
++
++struct create_recall_list_arg {
++	struct nfsd4_pnfs_cb_layout *cbl;
++	struct nfs4_file *lrfile;
++	struct list_head *todolist;
++	unsigned todo_count;
++};
++
++/*
++ * look for matching layout for the given client
++ * and add a pending layout recall to the todo list
++ * if found any.
++ * returns:
++ *   0 if layouts found or negative error.
++ */
++static int
++lo_recall_per_client(struct nfs4_client *clp, void *p)
++{
++	stateid_t lsid;
++	struct nfs4_layoutrecall *pending;
++	struct create_recall_list_arg *arg = p;
++
++	memset(&lsid, 0, sizeof(lsid));
++	if (!cl_has_layout(clp, arg->cbl, arg->lrfile, &lsid))
++		return 0;
++
++	/* Matching put done by layoutreturn */
++	pending = alloc_init_layoutrecall(arg->cbl, clp, arg->lrfile);
++	/* out of memory, drain todo queue */
++	if (!pending)
++		return -ENOMEM;
++
++	*(stateid_t *)&pending->cb.cbl_sid = lsid;
++	list_add(&pending->clr_perclnt, arg->todolist);
++	arg->todo_count++;
++	return 0;
++}
++
++/* Create a layoutrecall structure for each client based on the
++ * original structure. */
++int
++create_layout_recall_list(struct list_head *todolist, unsigned *todo_len,
++			  struct nfsd4_pnfs_cb_layout *cbl,
++			  struct nfs4_file *lrfile)
++{
++	struct nfs4_client *clp;
++	struct create_recall_list_arg arg = {
++		.cbl = cbl,
++		.lrfile = lrfile,
++		.todolist = todolist,
++	};
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* If client given by fs, just do single client */
++	if (cbl->cbl_seg.clientid) {
++		clp = find_confirmed_client(
++				(clientid_t *)&cbl->cbl_seg.clientid);
++		if (!clp) {
++			status = -ENOENT;
++			dprintk("%s: clientid %llx not found\n", __func__,
++				(unsigned long long)cbl->cbl_seg.clientid);
++			goto out;
++		}
++
++		status = lo_recall_per_client(clp, &arg);
++	} else {
++		/* Check all clients for layout matches */
++		status = filter_confirmed_clients(lo_recall_per_client, &arg);
++	}
++
++out:
++	*todo_len = arg.todo_count;
++	dprintk("%s: <-- list len %u status %d\n", __func__, *todo_len, status);
++	return status;
++}
++
++/*
++ * Recall layouts asynchronously
++ * Called with state lock.
++ */
++static int
++spawn_layout_recall(struct super_block *sb, struct list_head *todolist,
++		    unsigned todo_len)
++{
++	struct nfs4_layoutrecall *pending;
++	struct nfs4_layoutrecall *parent = NULL;
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	if (todo_len > 1) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++
++		parent = alloc_init_layoutrecall(&pending->cb, NULL,
++						 pending->clr_file);
++		if (unlikely(!parent)) {
++			/* We want forward progress. If parent cannot be
++			 * allocated take the first one as parent but don't
++			 * execute it.  Caller must check for -EAGAIN, if so
++			 * When the partial recalls return,
++			 * nfsd_layout_recall_cb should be called again.
++			 */
++			list_del_init(&pending->clr_perclnt);
++			if (todo_len > 2) {
++				parent = pending;
++			} else {
++				parent = NULL;
++				put_layoutrecall(pending);
++			}
++			--todo_len;
++				status = -ENOMEM;
++		}
++	}
++
++	while (!list_empty(todolist)) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++		list_del_init(&pending->clr_perclnt);
++		dprintk("%s: clp %p cb_client %p fp %p\n", __func__,
++			pending->clr_client,
++			pending->clr_client->cl_cb_client,
++			pending->clr_file);
++		if (unlikely(!pending->clr_client->cl_cb_client)) {
++			printk(KERN_INFO
++				"%s: clientid %08x/%08x has no callback path\n",
++				__func__,
++				pending->clr_client->cl_clientid.cl_boot,
++				pending->clr_client->cl_clientid.cl_id);
++			put_layoutrecall(pending);
++			continue;
++		}
++
++		pending->clr_time = CURRENT_TIME;
++		pending->clr_sb = sb;
++		if (parent) {
++			/* If we created a parent its initial ref count is 1.
++			 * We will need to de-ref it eventually. So we just
++			 * don't increment on behalf of the last one.
++			 */
++			if (todo_len != 1)
++				get_layoutrecall(parent);
++		}
++		pending->parent = parent;
++		get_layoutrecall(pending);
++		/* Add to list so corresponding layoutreturn can find req */
++		list_add(&pending->clr_perclnt,
++			 &pending->clr_client->cl_layoutrecalls);
++
++		nfsd4_cb_layout(pending);
++		--todo_len;
++	}
++
++	return status;
++}
++
++/*
++ * Spawn a thread to perform a recall layout
++ *
++ */
++int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,
++			  struct nfsd4_pnfs_cb_layout *cbl)
++{
++	int status;
++	struct nfs4_file *lrfile = NULL;
++	struct list_head todolist;
++	unsigned todo_len = 0;
++
++	dprintk("NFSD nfsd_layout_recall_cb: inode %p cbl %p\n", inode, cbl);
++	BUG_ON(!cbl);
++	BUG_ON(cbl->cbl_recall_type != RETURN_FILE &&
++	       cbl->cbl_recall_type != RETURN_FSID &&
++	       cbl->cbl_recall_type != RETURN_ALL);
++	BUG_ON(cbl->cbl_recall_type == RETURN_FILE && !inode);
++	BUG_ON(cbl->cbl_seg.iomode != IOMODE_READ &&
++	       cbl->cbl_seg.iomode != IOMODE_RW &&
++	       cbl->cbl_seg.iomode != IOMODE_ANY);
++
++	if (nfsd_serv == NULL) {
++		dprintk("NFSD nfsd_layout_recall_cb: nfsd_serv == NULL\n");
++		return -ENOENT;
++	}
++
++	nfs4_lock_state();
++	status = -ENOENT;
++	if (inode) {
++		lrfile = find_file(inode);
++		if (!lrfile) {
++			dprintk("NFSD nfsd_layout_recall_cb: "
++				"nfs4_file not found\n");
++			goto err;
++		}
++		if (cbl->cbl_recall_type == RETURN_FSID)
++			cbl->cbl_fsid = lrfile->fi_fsid;
++	}
++
++	INIT_LIST_HEAD(&todolist);
++
++	/* If no cookie provided by FS, return a default one */
++	if (!cbl->cbl_cookie)
++		cbl->cbl_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++
++	status = create_layout_recall_list(&todolist, &todo_len, cbl, lrfile);
++	if (list_empty(&todolist)) {
++		status = -ENOENT;
++	} else {
++		/* process todolist even if create_layout_recall_list
++		 * returned an error */
++		int status2 = spawn_layout_recall(sb, &todolist, todo_len);
++		if (status2)
++			status = status2;
++	}
++
++err:
++	nfs4_unlock_state();
++	if (lrfile)
++		put_nfs4_file(lrfile);
++	return (todo_len && status) ? -EAGAIN : status;
++}
++
++struct create_device_notify_list_arg {
++	struct list_head *todolist;
++	struct nfsd4_pnfs_cb_dev_list *ndl;
++};
++
++static int
++create_device_notify_per_cl(struct nfs4_client *clp, void *p)
++{
++	struct nfs4_notify_device *cbnd;
++	struct create_device_notify_list_arg *arg = p;
++
++	if (atomic_read(&clp->cl_deviceref) <= 0)
++		return 0;
++
++	cbnd = kmalloc(sizeof(*cbnd), GFP_KERNEL);
++	if (!cbnd)
++		return -ENOMEM;
++
++	cbnd->nd_list = arg->ndl;
++	cbnd->nd_client = clp;
++	list_add(&cbnd->nd_perclnt, arg->todolist);
++	return 0;
++}
++
++/* Create a list of clients to send device notifications. */
++int
++create_device_notify_list(struct list_head *todolist,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	int status;
++	struct create_device_notify_list_arg arg = {
++		.todolist = todolist,
++		.ndl = ndl,
++	};
++
++	nfs4_lock_state();
++	status = filter_confirmed_clients(create_device_notify_per_cl, &arg);
++	nfs4_unlock_state();
++
++	return status;
++}
++
++/*
++ * For each client that a device, send a device notification.
++ * XXX: Need to track which clients have which devices.
++ */
++int nfsd_device_notify_cb(struct super_block *sb,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	struct nfs4_notify_device *cbnd;
++	unsigned int notify_num = 0;
++	int status2, status = 0;
++	struct list_head todolist;
++
++	BUG_ON(!ndl || ndl->cbd_len == 0 || !ndl->cbd_list);
++
++	dprintk("NFSD %s: cbl %p len %u\n", __func__, ndl, ndl->cbd_len);
++
++	if (nfsd_serv == NULL)
++		return -ENOENT;
++
++	INIT_LIST_HEAD(&todolist);
++
++	status = create_device_notify_list(&todolist, ndl);
++
++	while (!list_empty(&todolist)) {
++		cbnd = list_entry(todolist.next, struct nfs4_notify_device,
++				  nd_perclnt);
++		list_del_init(&cbnd->nd_perclnt);
++		status2 = nfsd4_cb_notify_device(cbnd);
++		pnfs_clear_device_notify(cbnd->nd_client);
++		if (status2) {
++			kfree(cbnd);
++			status = status2;
++		}
++		notify_num++;
++	}
++
++	dprintk("NFSD %s: status %d clients %u\n",
++		__func__, status, notify_num);
++	return status;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
+@@ -0,0 +1,461 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++
++#include <linux/nfs4.h>
++#include <linux/nfsd/const.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/sunrpc/clnt.h>
++
++#include "nfsfh.h"
++#include "nfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Just use a linked list. Do not expect more than 32 dlm_device_entries
++ * the first implementation will just use one device per cluster file system
++ */
++
++static LIST_HEAD(dlm_device_list);
++static DEFINE_SPINLOCK(dlm_device_list_lock);
++
++struct dlm_device_entry {
++	struct list_head	dlm_dev_list;
++	char			disk_name[DISK_NAME_LEN];
++	int			num_ds;
++	char			ds_list[NFSD_DLM_DS_LIST_MAX];
++};
++
++static struct dlm_device_entry *
++_nfsd4_find_pnfs_dlm_device(char *disk_name)
++{
++	struct dlm_device_entry *dlm_pdev;
++
++	dprintk("--> %s  disk name %s\n", __func__, disk_name);
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
++		dprintk("%s Look for dlm_pdev %s\n", __func__,
++			dlm_pdev->disk_name);
++		if (!memcmp(dlm_pdev->disk_name, disk_name, strlen(disk_name))) {
++			spin_unlock(&dlm_device_list_lock);
++			return dlm_pdev;
++		}
++	}
++	spin_unlock(&dlm_device_list_lock);
++	return NULL;
++}
++
++static struct dlm_device_entry *
++nfsd4_find_pnfs_dlm_device(struct super_block *sb) {
++	char dname[BDEVNAME_SIZE];
++
++	bdevname(sb->s_bdev, dname);
++	return _nfsd4_find_pnfs_dlm_device(dname);
++}
++
++ssize_t
++nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen)
++{
++	char *pos = buf;
++	ssize_t size = 0;
++	struct dlm_device_entry *dlm_pdev;
++	int ret = -EINVAL;
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list)
++	{
++		int advanced;
++		advanced = snprintf(pos, buflen - size, "%s:%s\n", dlm_pdev->disk_name, dlm_pdev->ds_list);
++		if (advanced >= buflen - size)
++			goto out;
++		size += advanced;
++		pos += advanced;
++	}
++	ret = size;
++
++out:
++	spin_unlock(&dlm_device_list_lock);
++	return ret;
++}
++
++bool nfsd4_validate_pnfs_dlm_device(char *ds_list, int *num_ds)
++{
++	char *start = ds_list;
++
++	*num_ds = 0;
++
++	while (*start) {
++		struct sockaddr_storage tempAddr;
++		int ipLen = strcspn(start, ",");
++
++		if (!rpc_pton(start, ipLen, (struct sockaddr *)&tempAddr, sizeof(tempAddr)))
++			return false;
++		(*num_ds)++;
++		start += ipLen + 1;
++	}
++	return true;
++}
++
++/*
++ * pnfs_dlm_device string format:
++ *     block-device-path:<ds1 ipv4 address>,<ds2 ipv4 address>
++ *
++ * Examples
++ *     /dev/sda:192.168.1.96,192.168.1.97' creates a data server list with
++ *     two data servers for the dlm cluster file system mounted on /dev/sda.
++ *
++ *     /dev/sda:192.168.1.96,192.168.1.100'
++ *     replaces the data server list for /dev/sda
++ *
++ *     Only the deviceid == 1 is supported. Can add device id to
++ *     pnfs_dlm_device string when needed.
++ *
++ *     Only the round robin each data server once stripe index is supported.
++ */
++int
++nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
++
++{
++	struct dlm_device_entry *new, *found;
++	char *bufp = pnfs_dlm_device;
++	char *endp = bufp + strlen(bufp);
++	int err = -ENOMEM;
++
++	dprintk("--> %s len %d\n", __func__, len);
++
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return err;
++
++	err = -EINVAL;
++	/* disk_name */
++	/* FIXME: need to check for valid disk_name. search superblocks?
++	 * check for slash dev slash ?
++	 */
++	len = strcspn(bufp, ":");
++	if (len > DISK_NAME_LEN)
++		goto out_free;
++	memcpy(new->disk_name, bufp, len);
++
++	err = -EINVAL;
++	bufp += len + 1;
++	if (bufp >= endp)
++		goto out_free;
++
++	/* data server list */
++	/* FIXME: need to check for comma separated valid ip format */
++	len = strcspn(bufp, ":");
++	if (len > NFSD_DLM_DS_LIST_MAX)
++		goto out_free;
++	memcpy(new->ds_list, bufp, len);
++
++
++	/*  validate the ips */
++	if (!nfsd4_validate_pnfs_dlm_device(new->ds_list, &(new->num_ds)))
++		goto out_free;
++
++	dprintk("%s disk_name %s num_ds %d ds_list %s\n", __func__,
++		new->disk_name, new->num_ds, new->ds_list);
++
++	found = _nfsd4_find_pnfs_dlm_device(new->disk_name);
++	if (found) {
++		/* FIXME: should compare found->ds_list with new->ds_list
++		 * and if it is different, kick off a CB_NOTIFY change
++		 * deviceid.
++		 */
++		dprintk("%s pnfs_dlm_device %s:%s already in cache "
++			" replace ds_list with new ds_list %s\n", __func__,
++			found->disk_name, found->ds_list, new->ds_list);
++		memset(found->ds_list, 0, DISK_NAME_LEN);
++		memcpy(found->ds_list, new->ds_list, strlen(new->ds_list));
++		found->num_ds = new->num_ds;
++		kfree(new);
++	} else {
++		dprintk("%s Adding pnfs_dlm_device %s:%s\n", __func__,
++				new->disk_name, new->ds_list);
++		spin_lock(&dlm_device_list_lock);
++		list_add(&new->dlm_dev_list, &dlm_device_list);
++		spin_unlock(&dlm_device_list_lock);
++	}
++	dprintk("<-- %s Success\n", __func__);
++	return 0;
++
++out_free:
++	kfree(new);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++void nfsd4_pnfs_dlm_shutdown(void)
++{
++	struct dlm_device_entry *dlm_pdev, *next;
++
++	dprintk("--> %s\n", __func__);
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry_safe (dlm_pdev, next, &dlm_device_list,
++				  dlm_dev_list) {
++		list_del(&dlm_pdev->dlm_dev_list);
++		kfree(dlm_pdev);
++	}
++	spin_unlock(&dlm_device_list_lock);
++}
++
++static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
++				     u32 layout_type,
++				     struct nfsd4_pnfs_dev_iter_res *res)
++{
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		printk(KERN_ERR "%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return -ENOTSUPP;
++	}
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++	return 0;
++}
++
++static int nfsd4_pnfs_dlm_getdevinfo(struct super_block *sb,
++				     struct exp_xdr_stream *xdr,
++				     u32 layout_type,
++				     const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err, len, i = 0;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_devaddr *daddr;
++	struct dlm_device_entry *dlm_pdev;
++	char   *bufp;
++
++	err = -ENOTSUPP;
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		dprintk("%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return err;
++	}
++
++	/* We only hand out a deviceid of 1 in LAYOUTGET, so a GETDEVICEINFO
++	 * with a gdia_device_id != 1 is invalid.
++	 */
++	err = -EINVAL;
++	if (devid->devid != 1) {
++		dprintk("%s: WARNING: didn't receive a deviceid of "
++			"1 (got: 0x%llx)\n", __func__, devid->devid);
++		return err;
++	}
++
++	/*
++	 * If the DS list has not been established, return -EINVAL
++	 */
++	dlm_pdev = nfsd4_find_pnfs_dlm_device(sb);
++	if (!dlm_pdev) {
++		dprintk("%s: DEBUG: disk %s Not Found\n", __func__,
++			sb->s_bdev->bd_disk->disk_name);
++		return err;
++	}
++
++	dprintk("%s: Found disk %s with DS list |%s|\n",
++		__func__, dlm_pdev->disk_name, dlm_pdev->ds_list);
++
++	memset(&fdev, '\0', sizeof(fdev));
++	fdev.fl_device_length = dlm_pdev->num_ds;
++
++	err = -ENOMEM;
++	len = sizeof(*fdev.fl_device_list) * fdev.fl_device_length;
++	fdev.fl_device_list = kzalloc(len, GFP_KERNEL);
++	if (!fdev.fl_device_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a device list "
++			"buffer for %d DSes.\n", __func__, i);
++		fdev.fl_device_length = 0;
++		goto out;
++	}
++
++	/* Set a simple stripe indicie */
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = kzalloc(sizeof(u32) *
++				     fdev.fl_stripeindices_length, GFP_KERNEL);
++
++	if (!fdev.fl_stripeindices_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a stripeindices "
++			"list buffer for %d DSes.\n", __func__, i);
++		goto out;
++	}
++	for (i = 0; i < fdev.fl_stripeindices_length; i++)
++		fdev.fl_stripeindices_list[i] = i;
++
++	/* Transfer the data server list with a single multipath entry */
++	bufp = dlm_pdev->ds_list;
++	for (i = 0; i < fdev.fl_device_length; i++) {
++		daddr = kmalloc(sizeof(*daddr), GFP_KERNEL);
++		if (!daddr) {
++			printk(KERN_ERR "%s: ERROR: unable to kmalloc a device "
++				"addr buffer.\n", __func__);
++			goto out;
++		}
++
++		daddr->r_netid.data = "tcp";
++		daddr->r_netid.len = 3;
++
++		len = strcspn(bufp, ",");
++		daddr->r_addr.data = kmalloc(len + 4, GFP_KERNEL);
++		memcpy(daddr->r_addr.data, bufp, len);
++		/*
++		 * append the port number.  interpreted as two more bytes
++		 * beyond the quad: ".8.1" -> 0x08.0x01 -> 0x0801 = port 2049.
++		 */
++		memcpy(daddr->r_addr.data + len, ".8.1", 4);
++		daddr->r_addr.len = len + 4;
++
++		fdev.fl_device_list[i].fl_multipath_length = 1;
++		fdev.fl_device_list[i].fl_multipath_list = daddr;
++
++		dprintk("%s: encoding DS |%s|\n", __func__, bufp);
++
++		bufp += len + 1;
++	}
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	for (i = 0; i < fdev.fl_device_length; i++)
++		kfree(fdev.fl_device_list[i].fl_multipath_list);
++	kfree(fdev.fl_device_list);
++	kfree(fdev.fl_stripeindices_list);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize >= NFSSVC_MAXBLKSIZE)
++		return blocksize;
++	return NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++}
++
++/*
++ * Look up inode block device in pnfs_dlm_device list.
++ * Hash on the inode->i_ino and number of data servers.
++ */
++static int dlm_ino_hash(struct inode *ino)
++{
++	struct dlm_device_entry *de;
++	u32 hash_mask = 0;
++
++	/* If can't find the inode block device in the pnfs_dlm_deivce list
++	 * then don't hand out a layout
++	 */
++	de = nfsd4_find_pnfs_dlm_device(ino->i_sb);
++	if (!de)
++		return -1;
++	hash_mask = de->num_ds - 1;
++	return ino->i_ino & hash_mask;
++}
++
++static enum nfsstat4 nfsd4_pnfs_dlm_layoutget(struct inode *inode,
++			   struct exp_xdr_stream *xdr,
++			   const struct nfsd4_pnfs_layoutget_arg *args,
++			   struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++	int index;
++	enum nfsstat4 rc = NFS4_OK;
++
++	dprintk("%s: LAYOUT_GET\n", __func__);
++
++	/* DLM exported file systems only support layouts for READ */
++	if (res->lg_seg.iomode == IOMODE_RW)
++		return NFS4ERR_BADIOMODE;
++
++	index = dlm_ino_hash(inode);
++	dprintk("%s first stripe index %d i_ino %lu\n", __func__, index,
++		inode->i_ino);
++	if (index < 0)
++		return NFS4ERR_LAYOUTUNAVAILABLE;
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	/* Always give out whole file layouts */
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	/* Always give out READ ONLY layouts */
++	res->lg_seg.iomode = IOMODE_READ;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = false;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = args->lg_sbid;
++	layout->device_id.devid = 1;                                /*FSFTEMP*/
++	layout->lg_first_stripe_index = index;                      /*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	memcpy(fhp, args->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++/* For use by DLM cluster file systems exported by pNFSD */
++const struct pnfs_export_operations pnfs_dlm_export_ops = {
++	.layout_type = nfsd4_pnfs_dlm_layouttype,
++	.get_device_info = nfsd4_pnfs_dlm_getdevinfo,
++	.get_device_iter = nfsd4_pnfs_dlm_getdeviter,
++	.layout_get = nfsd4_pnfs_dlm_layoutget,
++};
++EXPORT_SYMBOL(pnfs_dlm_export_ops);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
+@@ -0,0 +1,620 @@
++/*
++*  linux/fs/nfsd/nfs4pnfsds.c
++*
++*  Copyright (c) 2005 The Regents of the University of Michigan.
++*  All rights reserved.
++*
++*  Andy Adamson <andros@umich.edu>
++*
++*  Redistribution and use in source and binary forms, with or without
++*  modification, are permitted provided that the following conditions
++*  are met:
++*
++*  1. Redistributions of source code must retain the above copyright
++*     notice, this list of conditions and the following disclaimer.
++*  2. Redistributions in binary form must reproduce the above copyright
++*     notice, this list of conditions and the following disclaimer in the
++*     documentation and/or other materials provided with the distribution.
++*  3. Neither the name of the University nor the names of its
++*     contributors may be used to endorse or promote products derived
++*     from this software without specific prior written permission.
++*
++*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*
++*/
++#if defined(CONFIG_PNFSD)
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++#include <linux/param.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/debug.h>
++#include <linux/nfs4.h>
++#include <linux/exportfs.h>
++#include <linux/sched.h>
++
++#include "nfsd.h"
++#include "pnfsd.h"
++#include "state.h"
++
++/*
++ *******************
++ *   	 PNFS
++ *******************
++ */
++/*
++ * Hash tables for pNFS Data Server state
++ *
++ * mds_nodeid:	list of struct pnfs_mds_id one per Metadata server (MDS) using
++ *		this data server (DS).
++ *
++ * mds_clid_hashtbl[]: uses clientid_hashval(), hash of all clientids obtained
++ *			from any MDS.
++ *
++ * ds_stid_hashtbl[]: uses stateid_hashval(), hash of all stateids obtained
++ *			from any MDS.
++ *
++ */
++/* Hash tables for clientid state */
++#define CLIENT_HASH_BITS                 4
++#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
++#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
++
++#define clientid_hashval(id) \
++	((id) & CLIENT_HASH_MASK)
++
++/* hash table for pnfs_ds_stateid */
++#define STATEID_HASH_BITS              10
++#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
++#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
++
++#define stateid_hashval(owner_id, file_id)  \
++	(((owner_id) + (file_id)) & STATEID_HASH_MASK)
++
++static struct list_head mds_id_tbl;
++static struct list_head mds_clid_hashtbl[CLIENT_HASH_SIZE];
++static struct list_head ds_stid_hashtbl[STATEID_HASH_SIZE];
++
++static inline void put_ds_clientid(struct pnfs_ds_clientid *dcp);
++static inline void put_ds_mdsid(struct pnfs_mds_id *mdp);
++
++/* Mutex for data server state.  Needs to be separate from
++ * mds state mutex since a node can be both mds and ds */
++static DEFINE_MUTEX(ds_mutex);
++static struct thread_info *ds_mutex_owner;
++
++static void
++ds_lock_state(void)
++{
++	mutex_lock(&ds_mutex);
++	ds_mutex_owner = current_thread_info();
++}
++
++static void
++ds_unlock_state(void)
++{
++	BUG_ON(ds_mutex_owner != current_thread_info());
++	ds_mutex_owner = NULL;
++	mutex_unlock(&ds_mutex);
++}
++
++static int
++cmp_clid(const clientid_t *cl1, const clientid_t *cl2)
++{
++	return (cl1->cl_boot == cl2->cl_boot) &&
++	       (cl1->cl_id == cl2->cl_id);
++}
++
++void
++nfs4_pnfs_state_init(void)
++{
++	int i;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&mds_clid_hashtbl[i]);
++
++	for (i = 0; i < STATEID_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&ds_stid_hashtbl[i]);
++
++	INIT_LIST_HEAD(&mds_id_tbl);
++}
++
++static struct pnfs_mds_id *
++find_pnfs_mds_id(u32 mdsid)
++{
++	struct pnfs_mds_id *local = NULL;
++
++	dprintk("pNFSD: %s\n", __func__);
++	list_for_each_entry(local, &mds_id_tbl, di_hash) {
++		if (local->di_mdsid == mdsid)
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_clientid *
++find_pnfs_ds_clientid(const clientid_t *clid)
++{
++	struct pnfs_ds_clientid *local = NULL;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = clientid_hashval(clid->cl_id);
++	list_for_each_entry(local, &mds_clid_hashtbl[hashval], dc_hash) {
++		if (cmp_clid(&local->dc_mdsclid, clid))
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_stateid *
++find_pnfs_ds_stateid(stateid_t *stid)
++{
++	struct pnfs_ds_stateid *local = NULL;
++	u32 st_id = stid->si_stateownerid;
++	u32 f_id = stid->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_for_each_entry(local, &ds_stid_hashtbl[hashval], ds_hash)
++		if ((local->ds_stid.si_stateownerid == st_id) &&
++				(local->ds_stid.si_fileid == f_id) &&
++				(local->ds_stid.si_boot == stid->si_boot)) {
++			stateid_t *sid = &local->ds_stid;
++			dprintk("NFSD: %s <-- %p ds_flags %lx " STATEID_FMT "\n",
++				__func__, local, local->ds_flags,
++				STATEID_VAL(sid));
++			return local;
++		}
++	return NULL;
++}
++
++static void
++release_ds_mdsid(struct kref *kref)
++{
++	struct pnfs_mds_id *mdp =
++		container_of(kref, struct pnfs_mds_id, di_ref);
++	dprintk("pNFSD: %s\n", __func__);
++
++	list_del(&mdp->di_hash);
++	list_del(&mdp->di_mdsclid);
++	kfree(mdp);
++}
++
++static void
++release_ds_clientid(struct kref *kref)
++{
++	struct pnfs_ds_clientid *dcp =
++		container_of(kref, struct pnfs_ds_clientid, dc_ref);
++	struct pnfs_mds_id *mdp;
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(dcp->dc_mdsid);
++	if (mdp)
++		put_ds_mdsid(mdp);
++
++	list_del(&dcp->dc_hash);
++	list_del(&dcp->dc_stateid);
++	list_del(&dcp->dc_permdsid);
++	kfree(dcp);
++}
++
++static void
++release_ds_stateid(struct kref *kref)
++{
++	struct pnfs_ds_stateid *dsp =
++		container_of(kref, struct pnfs_ds_stateid, ds_ref);
++	struct pnfs_ds_clientid *dcp;
++	dprintk("pNFS %s: dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid(&dsp->ds_mdsclid);
++	if (dcp)
++		put_ds_clientid(dcp);
++
++	list_del(&dsp->ds_hash);
++	list_del(&dsp->ds_perclid);
++	kfree(dsp);
++}
++
++static inline void
++put_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_put(&dcp->dc_ref, release_ds_clientid);
++}
++
++static inline void
++get_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_get(&dcp->dc_ref);
++}
++
++static inline void
++put_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_put(&mdp->di_ref, release_ds_mdsid);
++}
++
++static inline void
++get_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_get(&mdp->di_ref);
++}
++
++static inline void
++put_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_put(&dsp->ds_ref, release_ds_stateid);
++}
++
++static inline void
++get_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_get(&dsp->ds_ref);
++}
++
++void
++nfs4_pnfs_state_shutdown(void)
++{
++	struct pnfs_ds_stateid *dsp;
++	int i;
++
++	dprintk("pNFSD %s: -->\n", __func__);
++
++	ds_lock_state();
++	for (i = 0; i < STATEID_HASH_SIZE; i++) {
++		while (!list_empty(&ds_stid_hashtbl[i])) {
++			dsp = list_entry(ds_stid_hashtbl[i].next,
++					 struct pnfs_ds_stateid, ds_hash);
++			put_ds_stateid(dsp);
++		}
++	}
++	ds_unlock_state();
++}
++
++static struct pnfs_mds_id *
++alloc_init_mds_id(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = kmalloc(sizeof(*mdp), GFP_KERNEL);
++	if (!mdp)
++		return NULL;
++	INIT_LIST_HEAD(&mdp->di_hash);
++	INIT_LIST_HEAD(&mdp->di_mdsclid);
++	list_add(&mdp->di_hash, &mds_id_tbl);
++	mdp->di_mdsid = gsp->dsid;
++	mdp->di_mdsboot = 0;
++	kref_init(&mdp->di_ref);
++	return mdp;
++}
++
++static struct pnfs_ds_clientid *
++alloc_init_ds_clientid(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++	struct pnfs_ds_clientid *dcp;
++	clientid_t *clid = (clientid_t *)&gsp->clid;
++	unsigned int hashval = clientid_hashval(clid->cl_id);
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(gsp->dsid);
++	if (!mdp) {
++		mdp = alloc_init_mds_id(gsp);
++		if (!mdp)
++			return NULL;
++	} else {
++		get_ds_mdsid(mdp);
++	}
++
++	dcp = kmalloc(sizeof(*dcp), GFP_KERNEL);
++	if (!dcp)
++		return NULL;
++
++	INIT_LIST_HEAD(&dcp->dc_hash);
++	INIT_LIST_HEAD(&dcp->dc_stateid);
++	INIT_LIST_HEAD(&dcp->dc_permdsid);
++	list_add(&dcp->dc_hash, &mds_clid_hashtbl[hashval]);
++	list_add(&dcp->dc_permdsid, &mdp->di_mdsclid);
++	dcp->dc_mdsclid = *clid;
++	kref_init(&dcp->dc_ref);
++	dcp->dc_mdsid = gsp->dsid;
++	return dcp;
++}
++
++static struct pnfs_ds_stateid *
++alloc_init_ds_stateid(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct pnfs_ds_stateid *dsp;
++	u32 st_id = stidp->si_stateownerid;
++	u32 f_id  = stidp->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	dsp = kmalloc(sizeof(*dsp), GFP_KERNEL);
++	if (!dsp)
++		return dsp;
++
++	INIT_LIST_HEAD(&dsp->ds_hash);
++	INIT_LIST_HEAD(&dsp->ds_perclid);
++	memcpy(&dsp->ds_stid, stidp, sizeof(stateid_t));
++	fh_copy_shallow(&dsp->ds_fh, &cfh->fh_handle);
++	dsp->ds_access = 0;
++	dsp->ds_status = 0;
++	dsp->ds_flags = 0L;
++	kref_init(&dsp->ds_ref);
++	set_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	clear_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	init_waitqueue_head(&dsp->ds_waitq);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_add(&dsp->ds_hash, &ds_stid_hashtbl[hashval]);
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++static int
++update_ds_stateid(struct pnfs_ds_stateid *dsp, struct svc_fh *cfh,
++		  struct pnfs_get_state *gsp)
++{
++	struct pnfs_ds_clientid *dcp;
++	int new = 0;
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid((clientid_t *)&gsp->clid);
++	if (!dcp) {
++		dcp = alloc_init_ds_clientid(gsp);
++		if (!dcp)
++			return 1;
++		new = 1;
++	}
++	if (test_bit(DS_STATEID_NEW, &dsp->ds_flags)) {
++		list_add(&dsp->ds_perclid, &dcp->dc_stateid);
++		if (!new)
++			get_ds_clientid(dcp);
++	}
++
++	memcpy(&dsp->ds_stid, &gsp->stid, sizeof(stateid_t));
++	dsp->ds_access = gsp->access;
++	dsp->ds_status = 0;
++	dsp->ds_verifier[0] = gsp->verifier[0];
++	dsp->ds_verifier[1] = gsp->verifier[1];
++	memcpy(&dsp->ds_mdsclid, &gsp->clid, sizeof(clientid_t));
++	set_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	clear_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	return 0;
++}
++
++int
++nfs4_pnfs_cb_change_state(struct pnfs_get_state *gs)
++{
++	stateid_t *stid = (stateid_t *)&gs->stid;
++	struct pnfs_ds_stateid *dsp;
++
++	dprintk("pNFSD: %s stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stid));
++
++	ds_lock_state();
++	dsp = find_pnfs_ds_stateid(stid);
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	if (dsp)
++		return 0;
++	return -ENOENT;
++}
++
++/* Retrieves and validates stateid.
++ * If stateid exists and its fields match, return it.
++ * If stateid exists but either the generation or
++ * ownerids don't match, check with mds to see if it is valid.
++ * If the stateid doesn't exist, the first thread creates a
++ * invalid *marker* stateid, then checks to see if the
++ * stateid exists on the mds.  If so, it validates the *marker*
++ * stateid and updates its fields.  Subsequent threads that
++ * find the *marker* stateid wait until it is valid or an error
++ * occurs.
++ * Called with ds_state_lock.
++ */
++static struct pnfs_ds_stateid *
++nfsv4_ds_get_state(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct inode *ino = cfh->fh_dentry->d_inode;
++	struct super_block *sb;
++	struct pnfs_ds_stateid *dsp = NULL;
++	struct pnfs_get_state gs = {
++		.access = 0,
++	};
++	int status = 0, waiter = 0;
++
++	dprintk("pNFSD: %s -->\n", __func__);
++
++	dsp = find_pnfs_ds_stateid(stidp);
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags) &&
++	    (stidp->si_generation == dsp->ds_stid.si_generation))
++		goto out_noput;
++
++	sb = ino->i_sb;
++	if (!sb || !sb->s_pnfs_op->get_state)
++		goto out_noput;
++
++	/* Uninitialize current state if it exists yet it doesn't match.
++	 * If it is already invalid, another thread is checking state */
++	if (dsp) {
++		if (!test_and_clear_bit(DS_STATEID_VALID, &dsp->ds_flags))
++			waiter = 1;
++	} else {
++		dsp = alloc_init_ds_stateid(cfh, stidp);
++		if (!dsp)
++			goto out_noput;
++	}
++
++	dprintk("pNFSD: %s Starting loop\n", __func__);
++	get_ds_stateid(dsp);
++	while (!test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		ds_unlock_state();
++
++		/* Another thread is checking the state */
++		if (waiter) {
++			dprintk("pNFSD: %s waiting\n", __func__);
++			wait_event_interruptible_timeout(dsp->ds_waitq,
++				(test_bit(DS_STATEID_VALID, &dsp->ds_flags) ||
++				 test_bit(DS_STATEID_ERROR, &dsp->ds_flags)),
++				 msecs_to_jiffies(1024));
++			dprintk("pNFSD: %s awake\n", __func__);
++			ds_lock_state();
++			if (test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++				goto out;
++
++			continue;
++		}
++
++		/* Validate stateid on mds */
++		dprintk("pNFSD: %s Checking state on MDS\n", __func__);
++		memcpy(&gs.stid, stidp, sizeof(stateid_t));
++		status = sb->s_pnfs_op->get_state(ino, &cfh->fh_handle, &gs);
++		dprintk("pNFSD: %s from MDS status %d\n", __func__, status);
++		ds_lock_state();
++		/* if !status and stateid is valid, update id and mark valid */
++		if (status || update_ds_stateid(dsp, cfh, &gs)) {
++			set_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++			/* remove invalid stateid from list */
++			put_ds_stateid(dsp);
++			wake_up(&dsp->ds_waitq);
++			goto out;
++		}
++
++		wake_up(&dsp->ds_waitq);
++	}
++out:
++	if (dsp)
++		put_ds_stateid(dsp);
++out_noput:
++	if (dsp)
++		dprintk("pNFSD: %s <-- dsp %p ds_flags %lx " STATEID_FMT "\n",
++			__func__, dsp, dsp->ds_flags, STATEID_VAL(&dsp->ds_stid));
++	/* If error, return null */
++	if (dsp && test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++		dsp = NULL;
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++int
++nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *cfh, stateid_t *stateid)
++{
++	struct pnfs_ds_stateid *dsp;
++	int status = 0;
++
++	dprintk("pNFSD: %s --> " STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	/* Must release state lock while verifying stateid on mds */
++	nfs4_unlock_state();
++	ds_lock_state();
++	dsp = nfsv4_ds_get_state(cfh, stateid);
++	if (dsp) {
++		get_ds_stateid(dsp);
++		dprintk("pNFSD: %s Found " STATEID_FMT "\n", __func__,
++			STATEID_VAL(&dsp->ds_stid));
++
++		dprintk("NFSD: %s: dsp %p fh_size %u:%u "
++			"fh [%08x:%08x:%08x:%08x]:[%08x:%08x:%08x:%08x] "
++			"gen %x:%x\n",
++			__func__, dsp,
++			cfh->fh_handle.fh_size, dsp->ds_fh.fh_size,
++			((unsigned *)&cfh->fh_handle.fh_base)[0],
++			((unsigned *)&cfh->fh_handle.fh_base)[1],
++			((unsigned *)&cfh->fh_handle.fh_base)[2],
++			((unsigned *)&cfh->fh_handle.fh_base)[3],
++			((unsigned *)&dsp->ds_fh.fh_base)[0],
++			((unsigned *)&dsp->ds_fh.fh_base)[1],
++			((unsigned *)&dsp->ds_fh.fh_base)[2],
++			((unsigned *)&dsp->ds_fh.fh_base)[3],
++			stateid->si_generation, dsp->ds_stid.si_generation);
++	}
++
++	if (!dsp ||
++	    (cfh->fh_handle.fh_size != dsp->ds_fh.fh_size) ||
++	    (memcmp(&cfh->fh_handle.fh_base, &dsp->ds_fh.fh_base,
++		    dsp->ds_fh.fh_size) != 0) ||
++	    (stateid->si_generation > dsp->ds_stid.si_generation))
++		status = nfserr_bad_stateid;
++	else if (stateid->si_generation < dsp->ds_stid.si_generation)
++		status = nfserr_old_stateid;
++
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++	nfs4_lock_state();
++	dprintk("pNFSD: %s <-- status %d\n", __func__, be32_to_cpu(status));
++	return status;
++}
++
++void
++nfs4_ds_get_verifier(stateid_t *stateid, struct super_block *sb, u32 *p)
++{
++	struct pnfs_ds_stateid *dsp = NULL;
++
++	dprintk("pNFSD: %s --> stid %p\n", __func__, stateid);
++
++	ds_lock_state();
++	if (stateid != NULL) {
++		dsp = find_pnfs_ds_stateid(stateid);
++		if (dsp)
++			get_ds_stateid(dsp);
++	}
++
++	/* XXX: Should we fetch the stateid or wait if some other
++	 * thread is currently retrieving the stateid ? */
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		*p++ = dsp->ds_verifier[0];
++		*p++ = dsp->ds_verifier[1];
++		put_ds_stateid(dsp);
++	} else {
++		/* must be on MDS */
++		ds_unlock_state();
++		sb->s_pnfs_op->get_verifier(sb, p);
++		ds_lock_state();
++		p += 2;
++	}
++	ds_unlock_state();
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
+@@ -34,10 +34,14 @@
+  */
+ #include <linux/file.h>
+ #include <linux/slab.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "cache.h"
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_PROC
+ 
+@@ -372,6 +376,24 @@ nfsd4_open(struct svc_rqst *rqstp, struc
+ 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ 	 */
+ 	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
++#if defined(CONFIG_SPNFS)
++	if (!status && spnfs_enabled()) {
++		struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
++
++		status = spnfs_open(inode, open);
++		if (status) {
++			dprintk(
++			     "nfsd: pNFS could not be enabled for inode: %lu\n",
++			     inode->i_ino);
++			/*
++			 * XXX When there's a failure then need to indicate to
++			 * future ops that no pNFS is available.  Should I save
++			 * the status in the inode?  It's kind of a big hammer.
++			 * But there may be no stripes available?
++			 */
++		}
++	}
++#endif /* CONFIG_SPNFS */
+ out:
+ 	if (open->op_stateowner) {
+ 		nfs4_get_stateowner(open->op_stateowner);
+@@ -454,16 +476,30 @@ nfsd4_access(struct svc_rqst *rqstp, str
+ 			   &access->ac_supported);
+ }
+ 
++static void
++nfsd4_get_verifier(struct super_block *sb, nfs4_verifier *verf)
++{
++	u32 *p = (u32 *)verf->data;
++
++#if defined(CONFIG_PNFSD)
++	if (sb->s_pnfs_op && sb->s_pnfs_op->get_verifier) {
++		nfs4_ds_get_verifier(NULL, sb, p);
++		return;
++	}
++#endif /* CONFIG_PNFSD */
++
++	*p++ = nfssvc_boot.tv_sec;
++	*p++ = nfssvc_boot.tv_usec;
++}
++
+ static __be32
+ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	     struct nfsd4_commit *commit)
+ {
+ 	__be32 status;
+ 
+-	u32 *p = (u32 *)commit->co_verf.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+-
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &commit->co_verf);
+ 	status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+ 			     commit->co_count);
+ 	if (status == nfserr_symlink)
+@@ -816,7 +852,6 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ {
+ 	stateid_t *stateid = &write->wr_stateid;
+ 	struct file *filp = NULL;
+-	u32 *p;
+ 	__be32 status = nfs_ok;
+ 	unsigned long cnt;
+ 
+@@ -838,13 +873,49 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ 
+ 	cnt = write->wr_buflen;
+ 	write->wr_how_written = write->wr_stable_how;
+-	p = (u32 *)write->wr_verifier.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+ 
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &write->wr_verifier);
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(cstate->current_fh.fh_dentry->d_inode, 0)) {
++                status = bl_layoutrecall(cstate->current_fh.fh_dentry->d_inode,
++		    RETURN_FILE, write->wr_offset, write->wr_buflen);
++                if (!status) {
++                        status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++                }
++        } else
++#endif
++		
++	if (spnfs_enabled()) {
++		status = spnfs_write(cstate->current_fh.fh_dentry->d_inode,
++			write->wr_offset, write->wr_buflen, write->wr_vlen,
++			rqstp);
++		if (status == nfs_ok) {
++			/* DMXXX: HACK to get filesize set */
++			/* write one byte at offset+length-1 */
++			struct kvec k[1];
++			char zero = 0;
++			unsigned long cnt = 1;
++
++			k[0].iov_base = (void *)&zero;
++			k[0].iov_len = 1;
++			nfsd_write(rqstp, &cstate->current_fh, filp,
++				   write->wr_offset+write->wr_buflen-1, k, 1,
++				   &cnt, &write->wr_how_written);
++		}
++	} else /* we're not an MDS */
++		status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++#else
+ 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
+ 			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+ 			     &cnt, &write->wr_how_written);
++#endif /* CONFIG_SPNFS */
++
+ 	if (filp)
+ 		fput(filp);
+ 
+@@ -935,6 +1006,306 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+ 	return status == nfserr_same ? nfs_ok : status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++static __be32
++nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
++		    unsigned int layout_type)
++{
++	int status, type;
++
++	/* check to see if pNFS  is supported. */
++	status = nfserr_layoutunavailable;
++	if (exp && exp->ex_pnfs == 0) {
++		dprintk("%s: Underlying file system "
++			"is not exported over pNFS\n", __func__);
++		goto out;
++	}
++	if (!sb->s_pnfs_op || !sb->s_pnfs_op->layout_type) {
++		dprintk("%s: Underlying file system "
++			"does not support pNFS\n", __func__);
++		goto out;
++	}
++
++	type = sb->s_pnfs_op->layout_type(sb);
++
++	/* check to see if requested layout type is supported. */
++	status = nfserr_unknown_layouttype;
++	if (!type)
++		dprintk("BUG: %s: layout_type 0 is reserved and must not be "
++			"used by filesystem\n", __func__);
++	else if (type != layout_type)
++		dprintk("%s: requested layout type %d "
++		       "does not match supported type %d\n",
++			__func__, layout_type, type);
++	else
++		status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevlist(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevlist *gdlp)
++{
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++	int status;
++
++	dprintk("%s: type %u maxdevices %u cookie %llu verf %llu\n",
++		__func__, gdlp->gd_layout_type, gdlp->gd_maxdevices,
++		gdlp->gd_cookie, gdlp->gd_verf);
++
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* We must be able to encode at list one device */
++	if (!gdlp->gd_maxdevices)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     gdlp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Do nothing if underlying file system does not support
++	 * getdevicelist */
++	if (!sb->s_pnfs_op->get_device_iter) {
++		status = nfserr_notsupp;
++		goto out;
++	}
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdlp->gd_fhp = &cstate->current_fh;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevinfo(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevinfo *gdp)
++{
++	struct super_block *sb;
++	int status;
++	clientid_t clid;
++
++	dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
++	       __func__, gdp->gd_layout_type, gdp->gd_devid.sbid,
++	       gdp->gd_devid.devid, gdp->gd_maxcount);
++
++	status = nfserr_inval;
++	sb = find_sbid_id(gdp->gd_devid.sbid);
++	dprintk("%s: sb %p\n", __func__, sb);
++	if (!sb) {
++		status = nfserr_noent;
++		goto out;
++	}
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, NULL, gdp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdp->gd_sb = sb;
++
++	/* Update notifications */
++	copy_clientid(&clid, cstate->session);
++	pnfs_set_device_notify(&clid, gdp->gd_notify_types);
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutget(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutget *lgp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lgp->lg_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_badiomode;
++	if (lgp->lg_seg.iomode != IOMODE_READ &&
++	    lgp->lg_seg.iomode != IOMODE_RW) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lgp->lg_seg.iomode);
++		goto out;
++	}
++
++	/* Set up arguments so layout can be retrieved at encode time */
++	lgp->lg_fhp = current_fh;
++	copy_clientid((clientid_t *)&lgp->lg_seg.clientid, cstate->session);
++	status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutcommit(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	int status;
++	struct inode *ino = NULL;
++	struct iattr ia;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	dprintk("NFSD: nfsd4_layoutcommit \n");
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	ino = current_fh->fh_dentry->d_inode;
++	if (!ino)
++		goto out;
++
++	status = nfserr_inval;
++	sb = ino->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lcp->args.lc_seg.layout_type);
++	if (status)
++		goto out;
++
++	/* This will only extend the file length.  Do a quick
++	 * check to see if there is any point in waiting for the update
++	 * locks.
++	 * TODO: Is this correct for all back ends?
++	 */
++	dprintk("%s:new offset: %d new size: %llu old size: %lld\n",
++		__func__, lcp->args.lc_newoffset, lcp->args.lc_last_wr + 1,
++		ino->i_size);
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lcp->args.lc_seg.clientid, cstate->session);
++	lcp->res.lc_size_chg = 0;
++	if (sb->s_pnfs_op->layout_commit) {
++		status = sb->s_pnfs_op->layout_commit(ino, &lcp->args, &lcp->res);
++		dprintk("%s:layout_commit result %d\n", __func__, status);
++	} else {
++		fh_lock(current_fh);
++		if ((lcp->args.lc_newoffset == 0) ||
++		    ((lcp->args.lc_last_wr + 1) <= ino->i_size)) {
++			status = 0;
++			lcp->res.lc_size_chg = 0;
++			fh_unlock(current_fh);
++			goto out;
++		}
++
++		/* Try our best to update the file size */
++		dprintk("%s: Modifying file size\n", __func__);
++		ia.ia_valid = ATTR_SIZE;
++		ia.ia_size = lcp->args.lc_last_wr + 1;
++		status = notify_change(current_fh->fh_dentry, &ia);
++		fh_unlock(current_fh);
++		dprintk("%s:notify_change result %d\n", __func__, status);
++	}
++
++	if (!status && lcp->res.lc_size_chg &&
++	    EX_ISSYNC(current_fh->fh_export)) {
++		dprintk("%s: Synchronously writing inode size %llu\n",
++			__func__, ino->i_size);
++		write_inode_now(ino, 1);
++		lcp->res.lc_newsize = i_size_read(ino);
++	}
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutreturn(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lrp->args.lr_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	if (lrp->args.lr_return_type != RETURN_FILE &&
++	    lrp->args.lr_return_type != RETURN_FSID &&
++	    lrp->args.lr_return_type != RETURN_ALL) {
++		dprintk("pNFS %s: invalid return_type %d\n", __func__,
++			lrp->args.lr_return_type);
++		goto out;
++	}
++
++	status = nfserr_inval;
++	if (lrp->args.lr_seg.iomode != IOMODE_READ &&
++	    lrp->args.lr_seg.iomode != IOMODE_RW &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lrp->args.lr_seg.iomode);
++		goto out;
++	}
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lrp->args.lr_seg.clientid, cstate->session);
++	lrp->lrs_present = (lrp->args.lr_return_type == RETURN_FILE);
++	status = nfs4_pnfs_return_layout(sb, current_fh, lrp);
++out:
++	dprintk("pNFS %s: status %d return_type 0x%x lrs_present %d\n",
++		__func__, status, lrp->args.lr_return_type, lrp->lrs_present);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * NULL call.
+  */
+@@ -1317,6 +1688,29 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH,
+ 		.op_name = "OP_RECLAIM_COMPLETE",
+ 	},
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICELIST] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevlist,
++		.op_name = "OP_GETDEVICELIST",
++	},
++	[OP_GETDEVICEINFO] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevinfo,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_GETDEVICEINFO",
++	},
++	[OP_LAYOUTGET] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutget,
++		.op_name = "OP_LAYOUTGET",
++	},
++	[OP_LAYOUTCOMMIT] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
++		.op_name = "OP_LAYOUTCOMMIT",
++	},
++	[OP_LAYOUTRETURN] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
++		.op_name = "OP_LAYOUTRETURN",
++	},
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
+@@ -42,6 +42,8 @@
+ #include "xdr4.h"
+ #include "vfs.h"
+ 
++#include "pnfsd.h"
++
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+@@ -60,8 +62,6 @@ static u64 current_sessionid = 1;
+ #define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+ 
+ /* forward declarations */
+-static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+-static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+ static void nfs4_set_recdir(char *recdir);
+ 
+@@ -69,6 +69,7 @@ static void nfs4_set_recdir(char *recdir
+ 
+ /* Currently used for almost all code touching nfsv4 state: */
+ static DEFINE_MUTEX(client_mutex);
++struct task_struct *client_mutex_owner;
+ 
+ /*
+  * Currently used for the del_recall_lru and file hash table.  In an
+@@ -86,11 +87,21 @@ void
+ nfs4_lock_state(void)
+ {
+ 	mutex_lock(&client_mutex);
++	client_mutex_owner = current;
++}
++
++#define BUG_ON_UNLOCKED_STATE() BUG_ON(client_mutex_owner != current)
++
++void
++nfs4_bug_on_unlocked_state(void)
++{
++	BUG_ON(client_mutex_owner != current);
+ }
+ 
+ void
+ nfs4_unlock_state(void)
+ {
++	client_mutex_owner = NULL;
+ 	mutex_unlock(&client_mutex);
+ }
+ 
+@@ -109,7 +120,7 @@ opaque_hashval(const void *ptr, int nbyt
+ 
+ static struct list_head del_recall_lru;
+ 
+-static inline void
++inline void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+ 	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+@@ -120,7 +131,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ 	}
+ }
+ 
+-static inline void
++inline void
+ get_nfs4_file(struct nfs4_file *fi)
+ {
+ 	atomic_inc(&fi->fi_ref);
+@@ -230,7 +241,10 @@ nfs4_close_delegation(struct nfs4_delega
+ 	 * but we want to remove the lease in any case. */
+ 	if (dp->dl_flock)
+ 		vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(filp);
++	nfs4_lock_state();
+ }
+ 
+ /* Called under the state lock. */
+@@ -266,8 +280,8 @@ static DEFINE_SPINLOCK(client_lock);
+  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+  * used in reboot/reset lease grace period processing
+  *
+- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+- * setclientid_confirmed info. 
++ * conf_id_hashtbl[], and conf_str_hashtbl[] hold
++ * confirmed setclientid_confirmed info.
+  *
+  * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+  * setclientid info.
+@@ -292,6 +306,7 @@ static void unhash_generic_stateid(struc
+ 	list_del(&stp->st_hash);
+ 	list_del(&stp->st_perfile);
+ 	list_del(&stp->st_perstateowner);
++	release_pnfs_ds_dev_list(stp);
+ }
+ 
+ static void free_generic_stateid(struct nfs4_stateid *stp)
+@@ -345,7 +360,10 @@ static void release_open_stateid(struct 
+ {
+ 	unhash_generic_stateid(stp);
+ 	release_stateid_lockowners(stp);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(stp->st_vfs_file);
++	nfs4_lock_state();
+ 	free_generic_stateid(stp);
+ }
+ 
+@@ -739,6 +757,8 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
++	BUG_ON_UNLOCKED_STATE();
++
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -758,6 +778,7 @@ expire_client(struct nfs4_client *clp)
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
++	pnfs_expire_client(clp);
+ 	nfsd4_set_callback_client(clp, NULL);
+ 	if (clp->cl_cb_conn.cb_xprt)
+ 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+@@ -770,6 +791,13 @@ expire_client(struct nfs4_client *clp)
+ 	spin_unlock(&client_lock);
+ }
+ 
++void expire_client_lock(struct nfs4_client *clp)
++{
++	nfs4_lock_state();
++	expire_client(clp);
++	nfs4_unlock_state();
++}
++
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+ {
+ 	memcpy(target->cl_verifier.data, source->data,
+@@ -859,6 +887,11 @@ static struct nfs4_client *create_client
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++	INIT_LIST_HEAD(&clp->cl_layoutrecalls);
++	atomic_set(&clp->cl_deviceref, 0);
++#endif /* CONFIG_PNFSD */
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
+ 	clp->cl_time = get_seconds();
+@@ -908,7 +941,7 @@ move_to_confirmed(struct nfs4_client *cl
+ 	renew_client(clp);
+ }
+ 
+-static struct nfs4_client *
++struct nfs4_client *
+ find_confirmed_client(clientid_t *clid)
+ {
+ 	struct nfs4_client *clp;
+@@ -978,6 +1011,24 @@ find_unconfirmed_client_by_str(const cha
+ 	return NULL;
+ }
+ 
++int
++filter_confirmed_clients(int (* func)(struct nfs4_client *, void *),
++			 void *arg)
++{
++	struct nfs4_client *clp, *next;
++	int i, status = 0;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		list_for_each_entry_safe (clp, next, &conf_str_hashtbl[i],
++					  cl_strhash) {
++			status = func(clp, arg);
++			if (status)
++				break;
++		}
++
++	return status;
++}
++
+ static void
+ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
+ {
+@@ -1110,8 +1161,12 @@ nfsd4_replay_cache_entry(struct nfsd4_co
+ static void
+ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
+ {
+-	/* pNFS is not supported */
++#if defined(CONFIG_PNFSD)
++	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
++				  EXCHGID4_FLAG_USE_PNFS_DS;
++#else  /* CONFIG_PNFSD */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
++#endif /* CONFIG_PNFSD */
+ 
+ 	/* Referrals are supported, Migration is not. */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
+@@ -1301,6 +1356,13 @@ nfsd4_create_session(struct svc_rqst *rq
+ 	struct nfsd4_clid_slot *cs_slot = NULL;
+ 	int status = 0;
+ 
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	/* XXX hack to get local ip address */
++	memcpy(&pnfsd_lexp_addr, &rqstp->rq_xprt->xpt_local,
++		sizeof(pnfsd_lexp_addr));
++	pnfs_lexp_addr_len = rqstp->rq_xprt->xpt_locallen;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
+ 	nfs4_lock_state();
+ 	unconf = find_unconfirmed_client(&cr_ses->clientid);
+ 	conf = find_confirmed_client(&cr_ses->clientid);
+@@ -1340,25 +1402,26 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(rqstp->rq_xprt);
+-			rpc_copy_addr(
+-				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+-				sa);
+-			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+-			unconf->cl_cb_conn.cb_minorversion =
+-				cstate->minorversion;
+-			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+-			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+-		}
++		if (is_ds_only_session(unconf->cl_exchange_flags))
++			cr_ses->flags &= ~SESSION4_BACK_CHAN;
++
+ 		conf = unconf;
+ 	} else {
+ 		status = nfserr_stale_clientid;
+ 		goto out;
+ 	}
+ 
++	if (cr_ses->flags & SESSION4_BACK_CHAN) {
++		conf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++		svc_xprt_get(rqstp->rq_xprt);
++		rpc_copy_addr((struct sockaddr *)&conf->cl_cb_conn.cb_addr, sa);
++		conf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
++		conf->cl_cb_conn.cb_minorversion = cstate->minorversion;
++		conf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
++		conf->cl_cb_seq_nr = 1;
++		nfsd4_probe_callback(conf, &conf->cl_cb_conn);
++	}
++
+ 	/*
+ 	 * We do not support RDMA or persistent sessions
+ 	 */
+@@ -1746,7 +1809,7 @@ out:
+ 
+ /* OPEN Share state helper functions */
+ static inline struct nfs4_file *
+-alloc_init_file(struct inode *ino)
++alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
+ {
+ 	struct nfs4_file *fp;
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1760,6 +1823,16 @@ alloc_init_file(struct inode *ino)
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++#if defined(CONFIG_PNFSD)
++		INIT_LIST_HEAD(&fp->fi_layouts);
++		INIT_LIST_HEAD(&fp->fi_layout_states);
++		fp->fi_fsid.major = current_fh->fh_export->ex_fsid;
++		fp->fi_fsid.minor = 0;
++		fp->fi_fhlen = current_fh->fh_handle.fh_size;
++		BUG_ON(fp->fi_fhlen > sizeof(fp->fi_fhval));
++		memcpy(fp->fi_fhval, &current_fh->fh_handle.fh_base,
++		       fp->fi_fhlen);
++#endif /* CONFIG_PNFSD */
+ 		spin_lock(&recall_lock);
+ 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ 		spin_unlock(&recall_lock);
+@@ -1768,7 +1841,7 @@ alloc_init_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
+-static void
++void
+ nfsd4_free_slab(struct kmem_cache **slab)
+ {
+ 	if (*slab == NULL)
+@@ -1784,6 +1857,7 @@ nfsd4_free_slabs(void)
+ 	nfsd4_free_slab(&file_slab);
+ 	nfsd4_free_slab(&stateid_slab);
+ 	nfsd4_free_slab(&deleg_slab);
++	nfsd4_free_pnfs_slabs();
+ }
+ 
+ static int
+@@ -1805,6 +1879,8 @@ nfsd4_init_slabs(void)
+ 			sizeof(struct nfs4_delegation), 0, 0, NULL);
+ 	if (deleg_slab == NULL)
+ 		goto out_nomem;
++	if (nfsd4_init_pnfs_slabs())
++		goto out_nomem;
+ 	return 0;
+ out_nomem:
+ 	nfsd4_free_slabs();
+@@ -1878,6 +1954,9 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners);
+ 	INIT_LIST_HEAD(&stp->st_perfile);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+@@ -1919,6 +1998,7 @@ find_openstateowner_str(unsigned int has
+ {
+ 	struct nfs4_stateowner *so = NULL;
+ 
++	BUG_ON_UNLOCKED_STATE();
+ 	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ 		if (same_owner_str(so, &open->op_owner, &open->op_clientid))
+ 			return so;
+@@ -1927,7 +2007,7 @@ find_openstateowner_str(unsigned int has
+ }
+ 
+ /* search file_hashtbl[] for file */
+-static struct nfs4_file *
++struct nfs4_file *
+ find_file(struct inode *ino)
+ {
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1945,6 +2025,18 @@ find_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
++struct nfs4_file *
++find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
++{
++	struct nfs4_file *fp;
++
++	fp = find_file(ino);
++	if (fp)
++		return fp;
++
++	return alloc_init_file(ino, current_fh);
++}
++
+ static inline int access_valid(u32 x, u32 minorversion)
+ {
+ 	if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
+@@ -2503,7 +2595,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+ 			goto out;
+ 		status = nfserr_resource;
+-		fp = alloc_init_file(ino);
++		fp = alloc_init_file(ino, current_fh);
+ 		if (fp == NULL)
+ 			goto out;
+ 	}
+@@ -2730,7 +2822,7 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ 	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
+ }
+ 
+-static int
++int
+ STALE_STATEID(stateid_t *stateid)
+ {
+ 	if (stateid->si_boot == boot_time)
+@@ -2740,6 +2832,16 @@ STALE_STATEID(stateid_t *stateid)
+ 	return 1;
+ }
+ 
++__be32
++nfs4_check_stateid(stateid_t *stateid)
++{
++	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++		return nfserr_bad_stateid;
++	if (STALE_STATEID(stateid))
++		return nfserr_stale_stateid;
++	return 0;
++}
++
+ static inline int
+ access_permit_read(unsigned long access_bmap)
+ {
+@@ -2848,6 +2950,24 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	if (grace_disallows_io(ino))
+ 		return nfserr_grace;
+ 
++#if defined(CONFIG_PNFSD)
++	if (pnfs_fh_is_ds(&current_fh->fh_handle)) {
++		if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++			status = nfserr_bad_stateid;
++		else
++#ifdef CONFIG_GFS2_FS_LOCKING_DLM
++		{
++			dprintk("%s Don't check DS stateid\n", __func__);
++			return 0;
++		}
++#else /* CONFIG_GFS2_FS_LOCKING_DLM */
++			status = nfs4_preprocess_pnfs_ds_stateid(current_fh,
++								 stateid);
++#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
++		goto out;
++	}
++#endif /* CONFIG_PNFSD */
++
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 
+@@ -2924,13 +3044,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 	*stpp = NULL;
+ 	*sopp = NULL;
+ 
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+-		dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
+-		return nfserr_bad_stateid;
+-	}
+-
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		return status;
+ 
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+@@ -3205,11 +3321,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 	nfs4_lock_state();
+-	status = nfserr_bad_stateid;
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+-		goto out;
+-	status = nfserr_stale_stateid;
+-	if (STALE_STATEID(stateid))
++	status = nfs4_check_stateid(stateid);
++	if (status)
+ 		goto out;
+ 	status = nfserr_bad_stateid;
+ 	if (!is_delegation_stateid(stateid))
+@@ -3238,26 +3351,6 @@ out:
+ #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
+ #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+ 
+-static inline u64
+-end_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	end = start + len;
+-	return end >= start ? end: NFS4_MAX_UINT64;
+-}
+-
+-/* last octet in a range */
+-static inline u64
+-last_byte_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	BUG_ON(!len);
+-	end = start + len;
+-	return end > start ? end - 1: NFS4_MAX_UINT64;
+-}
+-
+ #define lockownerid_hashval(id) \
+         ((id) & LOCK_HASH_MASK)
+ 
+@@ -3274,7 +3367,7 @@ static struct list_head lock_ownerid_has
+ static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+ 
+-static struct nfs4_stateid *
++struct nfs4_stateid *
+ find_stateid(stateid_t *stid, int flags)
+ {
+ 	struct nfs4_stateid *local;
+@@ -3303,7 +3396,7 @@ find_stateid(stateid_t *stid, int flags)
+ 	return NULL;
+ }
+ 
+-static struct nfs4_delegation *
++struct nfs4_delegation *
+ find_delegation_stateid(struct inode *ino, stateid_t *stid)
+ {
+ 	struct nfs4_file *fp;
+@@ -3436,6 +3529,9 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	INIT_LIST_HEAD(&stp->st_perfile);
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+@@ -3998,6 +4094,9 @@ nfs4_state_init(void)
+ 	INIT_LIST_HEAD(&client_lru);
+ 	INIT_LIST_HEAD(&del_recall_lru);
+ 	reclaim_str_hashtbl_size = 0;
++#if defined(CONFIG_PNFSD)
++	nfs4_pnfs_state_init();
++#endif /* CONFIG_PNFSD */
+ 	return 0;
+ }
+ 
+@@ -4110,6 +4209,7 @@ __nfs4_state_shutdown(void)
+ 	}
+ 
+ 	nfsd4_shutdown_recdir();
++	nfs4_pnfs_state_shutdown();
+ 	nfs4_init = 0;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
+@@ -47,9 +47,14 @@
+ #include <linux/nfsd_idmap.h>
+ #include <linux/nfs4_acl.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -1234,6 +1239,138 @@ nfsd4_decode_sequence(struct nfsd4_compo
+ 	DECODE_TAIL;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static __be32
++nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16 + sizeof(nfs4_verifier));
++	READ32(gdevl->gd_layout_type);
++	READ32(gdevl->gd_maxdevices);
++	READ64(gdevl->gd_cookie);
++	COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	u32 num;
++	DECODE_HEAD;
++
++	READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
++	READ64(gdev->gd_devid.sbid);
++	READ64(gdev->gd_devid.devid);
++	READ32(gdev->gd_layout_type);
++	READ32(gdev->gd_maxcount);
++	READ32(num);
++	if (num) {
++		READ_BUF(4);
++		READ32(gdev->gd_notify_types);
++	} else {
++		gdev->gd_notify_types = 0;
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_layoutget *lgp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(36);
++	READ32(lgp->lg_signal);
++	READ32(lgp->lg_seg.layout_type);
++	READ32(lgp->lg_seg.iomode);
++	READ64(lgp->lg_seg.offset);
++	READ64(lgp->lg_seg.length);
++	READ64(lgp->lg_minlength);
++	nfsd4_decode_stateid(argp, &lgp->lg_sid);
++	READ_BUF(4);
++	READ32(lgp->lg_maxcount);
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	DECODE_HEAD;
++	u32 timechange;
++
++	READ_BUF(20);
++	READ64(lcp->args.lc_seg.offset);
++	READ64(lcp->args.lc_seg.length);
++	READ32(lcp->args.lc_reclaim);
++	nfsd4_decode_stateid(argp, &lcp->lc_sid);
++	READ_BUF(4);
++	READ32(lcp->args.lc_newoffset);
++	if (lcp->args.lc_newoffset) {
++		READ_BUF(8);
++		READ64(lcp->args.lc_last_wr);
++	} else
++		lcp->args.lc_last_wr = 0;
++	READ_BUF(4);
++	READ32(timechange);
++	if (timechange) {
++		READ_BUF(12);
++		READ64(lcp->args.lc_mtime.seconds);
++		READ32(lcp->args.lc_mtime.nseconds);
++	} else {
++		lcp->args.lc_mtime.seconds = 0;
++		lcp->args.lc_mtime.nseconds = 0;
++	}
++	READ_BUF(8);
++	READ32(lcp->args.lc_seg.layout_type);
++	/* XXX: saving XDR'ed layout update. Since we don't have the
++	 * current_fh yet, and therefore no export_ops, we can't call
++	 * the layout specific decode routines. File and pVFS2
++	 * do not use the layout update....
++	 */
++	READ32(lcp->args.lc_up_len);
++	if (lcp->args.lc_up_len > 0) {
++		READ_BUF(lcp->args.lc_up_len);
++		READMEM(lcp->args.lc_up_layout, lcp->args.lc_up_len);
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16);
++	READ32(lrp->args.lr_reclaim);
++	READ32(lrp->args.lr_seg.layout_type);
++	READ32(lrp->args.lr_seg.iomode);
++	READ32(lrp->args.lr_return_type);
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		READ_BUF(16);
++		READ64(lrp->args.lr_seg.offset);
++		READ64(lrp->args.lr_seg.length);
++		nfsd4_decode_stateid(argp, &lrp->lr_sid);
++		READ_BUF(4);
++		READ32(lrp->args.lrf_body_len);
++		if (lrp->args.lrf_body_len > 0) {
++			READ_BUF(lrp->args.lrf_body_len);
++			READMEM(lrp->args.lrf_body, lrp->args.lrf_body_len);
++		}
++	}
++
++	DECODE_TAIL;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+ {
+@@ -1335,11 +1472,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+@@ -2136,6 +2281,36 @@ out_acl:
+ 		}
+ 		WRITE64(stat.ino);
+ 	}
++#if defined(CONFIG_PNFSD)
++	if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
++		struct super_block *sb = dentry->d_inode->i_sb;
++		int type = 0;
++
++		/* Query the filesystem for supported pNFS layout types.
++		 * Currently, we only support one layout type per file system.
++		 * The export_ops->layout_type() returns the pnfs_layouttype4.
++		 */
++		buflen -= 4;
++		if (buflen < 0)		/* length */
++			goto out_resource;
++
++		if (sb && sb->s_pnfs_op && sb->s_pnfs_op->layout_type)
++			type = sb->s_pnfs_op->layout_type(sb);
++		if (type) {
++			if ((buflen -= 4) < 0)	/* type */
++				goto out_resource;
++			WRITE32(1); 	/* length */
++			WRITE32(type);  /* type */
++		} else
++			WRITE32(0);  /* length */
++	}
++
++	if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		if ((buflen -= 4) < 0)
++			goto out_resource;
++		WRITE32(stat.blksize);
++	}
++#endif /* CONFIG_PNFSD */
+ 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ 		WRITE32(3);
+ 		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+@@ -2366,6 +2541,10 @@ nfsd4_encode_commit(struct nfsd4_compoun
+ 	if (!nfserr) {
+ 		RESERVE_SPACE(8);
+ 		WRITEMEM(commit->co_verf.data, 8);
++		dprintk("NFSD: nfsd4_encode_commit: verifier %x:%x\n",
++			((u32 *)(&commit->co_verf.data))[0],
++			((u32 *)(&commit->co_verf.data))[1]);
++
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -2620,9 +2799,20 @@ nfsd4_encode_read(struct nfsd4_compoundr
+ 	}
+ 	read->rd_vlen = v;
+ 
++#if defined(CONFIG_SPNFS)
++	if (spnfs_enabled())
++		nfserr = spnfs_read(read->rd_fhp->fh_dentry->d_inode,
++				    read->rd_offset, &maxcount, read->rd_vlen,
++				    resp->rqstp);
++	else /* we're not an MDS */
++		nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
++			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
++			&maxcount);
++#else
+ 	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ 			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ 			&maxcount);
++#endif /* CONFIG_SPNFS */
+ 
+ 	if (nfserr == nfserr_symlink)
+ 		nfserr = nfserr_inval;
+@@ -2926,6 +3116,9 @@ nfsd4_encode_write(struct nfsd4_compound
+ 		WRITE32(write->wr_bytes_written);
+ 		WRITE32(write->wr_how_written);
+ 		WRITEMEM(write->wr_verifier.data, 8);
++		dprintk("NFSD: nfsd4_encode_write: verifier %x:%x\n",
++			((u32 *)(&write->wr_verifier.data))[0],
++			((u32 *)(&write->wr_verifier.data))[1]);
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -3069,6 +3262,343 @@ nfsd4_encode_sequence(struct nfsd4_compo
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/* Uses the export interface to iterate through the available devices
++ * and encodes them on the response stream.
++ */
++static  __be32
++nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
++			      struct nfsd4_pnfs_getdevlist *gdevl,
++			      unsigned int *dev_count)
++{
++	struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
++	__be32 nfserr;
++	int status;
++	__be32 *p;
++	struct nfsd4_pnfs_dev_iter_res res = {
++		.gd_cookie = gdevl->gd_cookie,
++		.gd_verf = gdevl->gd_verf,
++		.gd_eof = 0
++	};
++	u64 sbid;
++
++	dprintk("%s: Begin\n", __func__);
++
++	sbid = find_create_sbid(sb);
++	*dev_count = 0;
++	do {
++		status = sb->s_pnfs_op->get_device_iter(sb,
++							gdevl->gd_layout_type,
++							&res);
++		if (status) {
++			if (status == -ENOENT) {
++				res.gd_eof = 1;
++				/* return success */
++				break;
++			}
++			nfserr = nfserrno(status);
++			goto out_err;
++		}
++
++		/* Encode device id and layout type */
++		RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
++		WRITE64((__be64)sbid);
++		WRITE64(res.gd_devid);	/* devid minor */
++		ADJUST_ARGS();
++		(*dev_count)++;
++	} while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
++	gdevl->gd_cookie = res.gd_cookie;
++	gdevl->gd_verf = res.gd_verf;
++	gdevl->gd_eof = res.gd_eof;
++	nfserr = nfs_ok;
++out_err:
++	dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
++	return nfserr;
++}
++
++/* Encodes the response of get device list.
++*/
++static __be32
++nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	unsigned int dev_count = 0, lead_count;
++	u32 *p_in = resp->p;
++	__be32 *p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	/* Ensure we have room for cookie, verifier, and devlist len,
++	 * which we will backfill in after we encode as many devices as possible
++	 */
++	lead_count = 8 + sizeof(nfs4_verifier) + 4;
++	RESERVE_SPACE(lead_count);
++	/* skip past these values */
++	p += XDR_QUADLEN(lead_count);
++	ADJUST_ARGS();
++
++	/* Iterate over as many device ids as possible on the xdr stream */
++	nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
++	if (nfserr)
++		goto out_err;
++
++	/* Backfill in cookie, verf and number of devices encoded */
++	p = p_in;
++	WRITE64(gdevl->gd_cookie);
++	WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++	WRITE32(dev_count);
++
++	/* Skip over devices */
++	p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
++	ADJUST_ARGS();
++
++	/* are we at the end of devices? */
++	RESERVE_SPACE(4);
++	WRITE32(gdevl->gd_eof);
++	ADJUST_ARGS();
++
++	dprintk("%s: done.\n", __func__);
++
++	nfserr = nfs_ok;
++out:
++	return nfserr;
++out_err:
++	p = p_in;
++	ADJUST_ARGS();
++	goto out;
++}
++
++/* For a given device id, have the file system retrieve and encode the
++ * associated device.  For file layout, the encoding function is
++ * passed down to the file system.  The file system then has the option
++ * of using this encoding function or one of its own.
++ *
++ * Note: the file system must return the XDR size of struct device_addr4
++ * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
++ * gdir_mincount calculation.
++ */
++static __be32
++nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	struct super_block *sb;
++	int maxcount = 0, type_notify_len = 12;
++	__be32 *p, *p_save = NULL, *p_in = resp->p;
++	struct exp_xdr_stream xdr;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = gdev->gd_sb;
++
++	if (gdev->gd_maxcount != 0) {
++		/* FIXME: this will be bound by the session max response */
++		maxcount = svc_max_payload(resp->rqstp);
++		if (maxcount > gdev->gd_maxcount)
++			maxcount = gdev->gd_maxcount;
++
++		/* Ensure have room for type and notify field */
++		maxcount -= type_notify_len;
++		if (maxcount < 0) {
++			nfserr = -ETOOSMALL;
++			goto toosmall;
++		}
++	}
++
++	RESERVE_SPACE(4);
++	WRITE32(gdev->gd_layout_type);
++	ADJUST_ARGS();
++
++	/* If maxcount is 0 then just update notifications */
++	if (gdev->gd_maxcount == 0)
++		goto handle_notifications;
++
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
++						&gdev->gd_devid);
++	if (nfserr)
++		goto err;
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++	BUG_ON(xdr.p > xdr.end);
++
++	/* Update the xdr stream with the number of bytes encoded
++	 * by the file system.
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++handle_notifications:
++	/* Encode supported device notifications */
++	RESERVE_SPACE(4);
++	if (sb->s_pnfs_op->set_device_notify) {
++		struct pnfs_devnotify_arg dn_args;
++
++		dn_args.dn_layout_type = gdev->gd_layout_type;
++		dn_args.dn_devid = gdev->gd_devid;
++		dn_args.dn_notify_types = gdev->gd_notify_types;
++		nfserr = sb->s_pnfs_op->set_device_notify(sb, &dn_args);
++		if (nfserr)
++			goto err;
++		WRITE32(dn_args.dn_notify_types);
++	} else {
++		WRITE32(0);
++	}
++	ADJUST_ARGS();
++
++out:
++	return nfserrno(nfserr);
++toosmall:
++	dprintk("%s: maxcount too small\n", __func__);
++	RESERVE_SPACE(4);
++	WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
++	ADJUST_ARGS();
++	goto out;
++err:
++	/* Rewind to the beginning */
++	p = p_in;
++	ADJUST_ARGS();
++	if (nfserr == -ETOOSMALL)
++		goto toosmall;
++	printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
++	goto out;
++}
++
++static __be32
++nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
++		       __be32 nfserr,
++		       struct nfsd4_pnfs_layoutget *lgp)
++{
++	int maxcount, leadcount;
++	struct super_block *sb;
++	struct exp_xdr_stream xdr;
++	__be32 *p, *p_save, *p_start = resp->p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = lgp->lg_fhp->fh_dentry->d_inode->i_sb;
++	maxcount = PAGE_SIZE;
++	if (maxcount > lgp->lg_maxcount)
++		maxcount = lgp->lg_maxcount;
++
++	/* Check for space on xdr stream */
++	leadcount = 36 + sizeof(stateid_opaque_t);
++	RESERVE_SPACE(leadcount);
++	/* encode layout metadata after file system encodes layout */
++	p += XDR_QUADLEN(leadcount);
++	ADJUST_ARGS();
++
++	/* Ensure have room for ret_on_close, off, len, iomode, type */
++	maxcount -= leadcount;
++	if (maxcount < 0) {
++		printk(KERN_ERR "%s: buffer too small\n", __func__);
++		nfserr = nfserr_toosmall;
++		goto err;
++	}
++
++	/* Set xdr info so file system can encode layout */
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	/* Retrieve, encode, and merge layout; process stateid */
++	nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
++	if (nfserr)
++		goto err;
++
++	/* Ensure file system returned enough bytes for the client
++	 * to access.
++	 */
++	if (lgp->lg_seg.length < lgp->lg_minlength) {
++		nfserr = nfserr_badlayout;
++		goto err;
++	}
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++
++	/* Rewind to beginning and encode attrs */
++	resp->p = p_start;
++	RESERVE_SPACE(4);
++	WRITE32(lgp->lg_roc);	/* return on close */
++	ADJUST_ARGS();
++	nfsd4_encode_stateid(resp, &lgp->lg_sid);
++	RESERVE_SPACE(28);
++	/* Note: response logr_layout array count, always one for now */
++	WRITE32(1);
++	WRITE64(lgp->lg_seg.offset);
++	WRITE64(lgp->lg_seg.length);
++	WRITE32(lgp->lg_seg.iomode);
++	WRITE32(lgp->lg_seg.layout_type);
++
++	/* Update the xdr stream with the number of bytes written
++	 * by the file system
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++	return nfs_ok;
++err:
++	resp->p = p_start;
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lcp->res.lc_size_chg);
++	ADJUST_ARGS();
++	if (lcp->res.lc_size_chg) {
++		RESERVE_SPACE(8);
++		WRITE64(lcp->res.lc_newsize);
++		ADJUST_ARGS();
++	}
++out:
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lrp->lrs_present != 0);    /* got stateid? */
++	ADJUST_ARGS();
++	if (lrp->lrs_present)
++		nfsd4_encode_stateid(resp, &lrp->lr_sid);
++out:
++	return nfserr;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+ {
+@@ -3129,11 +3659,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
+@@ -13,10 +13,15 @@
+ #include <linux/nfsd/syscall.h>
+ #include <linux/lockd/lockd.h>
+ #include <linux/sunrpc/clnt.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "nfsd.h"
+ #include "cache.h"
+ 
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ /*
+  *	We have a single directory with 9 nodes in it.
+  */
+@@ -49,6 +54,9 @@ enum {
+ 	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
++#ifdef CONFIG_PNFSD
++	NFSD_pnfs_dlm_device,
++#endif
+ };
+ 
+ /*
+@@ -74,6 +82,9 @@ static ssize_t write_leasetime(struct fi
+ static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
++#ifdef CONFIG_PNFSD
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size);
++#endif
+ 
+ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ 	[NFSD_Svc] = write_svc,
+@@ -96,6 +107,9 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
++#ifdef CONFIG_PNFSD
++	[NFSD_pnfs_dlm_device] = write_pnfs_dlm_device,
++#endif
+ };
+ 
+ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+@@ -1349,6 +1363,68 @@ static ssize_t write_recoverydir(struct 
+ 
+ #endif
+ 
++#ifdef CONFIG_PNFSD
++
++static ssize_t __write_pnfs_dlm_device(struct file *file, char *buf,
++				       size_t size)
++{
++	char *mesg = buf;
++	char *pnfs_dlm_device;
++	int max_size = NFSD_PNFS_DLM_DEVICE_MAX;
++	int len, ret = 0;
++
++	if (size > 0) {
++		ret = -EINVAL;
++		if (size > max_size || buf[size-1] != '\n')
++			return ret;
++		buf[size-1] = 0;
++
++		pnfs_dlm_device = mesg;
++		len = qword_get(&mesg, pnfs_dlm_device, size);
++		if (len <= 0)
++			return ret;
++
++		ret = nfsd4_set_pnfs_dlm_device(pnfs_dlm_device, len);
++	} else
++		return nfsd4_get_pnfs_dlm_device_list(buf, SIMPLE_TRANSACTION_LIMIT);
++
++	return ret <= 0 ? ret : strlen(buf);
++}
++
++/**
++ * write_pnfs_dlm_device - Set or report the current pNFS data server list
++ *
++ * Input:
++ *			buf:		ignored
++ *			size:		zero
++ *
++ * OR
++ *
++ * Input:
++ *			buf:		C string containing a block device name,
++ *					a colon, and then a comma separated
++ *					list of pNFS data server IPv4 addresses
++ *			size:		non-zero length of C string in @buf
++ * Output:
++ *	On success:	passed-in buffer filled with '\n'-terminated C
++ *			string containing a block device name, a colon, and
++ *			then a comma separated list of pNFS
++ *			data server IPv4 addresses.
++ *			return code is the size in bytes of the string
++ *	On error:	return code is a negative errno value
++ */
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __write_pnfs_dlm_device(file, buf, size);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
++}
++
++#endif /* CONFIG_PNFSD */
++
+ /*----------------------------------------------------------------------------*/
+ /*
+  *	populating the filesystem.
+@@ -1383,6 +1459,10 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
++#ifdef CONFIG_PNFSD
++		[NFSD_pnfs_dlm_device] = {"pnfs_dlm_device", &transaction_ops,
++					   S_IWUSR|S_IRUSR},
++#endif
+ 		/* last one */ {""}
+ 	};
+ 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
+@@ -1421,6 +1501,9 @@ static int create_proc_exports_entry(voi
+ }
+ #endif
+ 
++#if defined(CONFIG_SPNFS_BLOCK)
++int nfsd_bl_init(void);
++#endif
+ static int __init init_nfsd(void)
+ {
+ 	int retval;
+@@ -1443,6 +1526,15 @@ static int __init init_nfsd(void)
+ 	retval = create_proc_exports_entry();
+ 	if (retval)
+ 		goto out_free_idmap;
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	retval = spnfs_init_proc();
++	if (retval != 0)
++		goto out_free_idmap;
++#if defined(CONFIG_SPNFS_BLOCK)
++	nfsd_bl_init();
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ 	retval = register_filesystem(&nfsd_fs_type);
+ 	if (retval)
+ 		goto out_free_all;
+@@ -1465,7 +1557,22 @@ out_free_stat:
+ 
+ static void __exit exit_nfsd(void)
+ {
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	remove_proc_entry("fs/nfs/spnfs/recall", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/getfh", NULL);
++	remove_proc_entry("fs/nfs/spnfs/config", NULL);
++	remove_proc_entry("fs/nfs/spnfs/ctl", NULL);
++	remove_proc_entry("fs/nfs/spnfs", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutsegsize", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS_LAYOUTSEGMENTS */
++
+ 	nfsd_export_shutdown();
++	nfsd4_pnfs_dlm_shutdown();
+ 	nfsd_reply_cache_shutdown();
+ 	remove_proc_entry("fs/nfs/exports", NULL);
+ 	remove_proc_entry("fs/nfs", NULL);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
+@@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD0
+ 
++#if defined(CONFIG_PNFSD)
++#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
++	(NFSD4_SUPPORTED_ATTRS_WORD1 | FATTR4_WORD1_FS_LAYOUT_TYPES)
++#else /* CONFIG_PNFSD */
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD1
++#endif /* CONFIG_PNFSD */
+ 
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
++	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT | \
++	 FATTR4_WORD2_LAYOUT_BLKSIZE)
+ 
+ static inline u32 nfsd_suppattrs0(u32 minorversion)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
+@@ -10,6 +10,7 @@
+ #include <linux/exportfs.h>
+ 
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
+ #include "nfsd.h"
+ #include "vfs.h"
+ #include "auth.h"
+@@ -139,6 +140,7 @@ static inline __be32 check_pseudo_root(s
+ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ 	struct knfsd_fh	*fh = &fhp->fh_handle;
++	int fsid_type;
+ 	struct fid *fid = NULL, sfid;
+ 	struct svc_export *exp;
+ 	struct dentry *dentry;
+@@ -159,7 +161,8 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 			return error;
+ 		if (fh->fh_auth_type != 0)
+ 			return error;
+-		len = key_len(fh->fh_fsid_type) / 4;
++		fsid_type = pnfs_fh_fsid_type(fh);
++		len = key_len(fsid_type) / 4;
+ 		if (len == 0)
+ 			return error;
+ 		if  (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+@@ -172,7 +175,7 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 		data_left -= len;
+ 		if (data_left < 0)
+ 			return error;
+-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
++		exp = rqst_exp_find(rqstp, fsid_type, fh->fh_auth);
+ 		fid = (struct fid *)(fh->fh_auth + len);
+ 	} else {
+ 		__u32 tfh[2];
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
+@@ -14,6 +14,7 @@ enum nfsd_fsid {
+ 	FSID_UUID8,
+ 	FSID_UUID16,
+ 	FSID_UUID16_INUM,
++	FSID_MAX
+ };
+ 
+ enum fsid_source {
+@@ -205,4 +206,42 @@ fh_unlock(struct svc_fh *fhp)
+ 	}
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/*
++ * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
++ * to a DS by LAYOUTGET.  nfs4_preprocess_stateid_op() uses this to decide how
++ * to handle a given stateid.
++ */
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return fh->fh_fsid_type >= FSID_MAX;
++}
++
++static inline void pnfs_fh_mark_ds(struct knfsd_fh *fh)
++{
++	BUG_ON(fh->fh_version != 1);
++	BUG_ON(pnfs_fh_is_ds(fh));
++	fh->fh_fsid_type += FSID_MAX;
++}
++
++#else  /* CONFIG_PNFSD */
++
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return 0;
++}
++
++#endif /* CONFIG_PNFSD */
++
++/* allows fh_verify() to check the real fsid_type (i.e., not overloaded). */
++static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
++{
++	int fsid_type = fh->fh_fsid_type;
++
++	if (pnfs_fh_is_ds(fh))
++		return fsid_type - FSID_MAX;
++	return fsid_type;
++}
++
+ #endif /* _LINUX_NFSD_FH_INT_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
+@@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
+ 
+ };
+ 
+-u32 nfsd_supported_minorversion;
++u32 nfsd_supported_minorversion = NFSD_SUPPORTED_MINOR_VERSION;
+ 
+ int nfsd_vers(int vers, enum vers_op change)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
+@@ -0,0 +1,143 @@
++/*
++ *  Copyright (c) 2005 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef LINUX_NFSD_PNFSD_H
++#define LINUX_NFSD_PNFSD_H
++
++#include <linux/list.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#include "state.h"
++#include "xdr4.h"
++
++/* outstanding layout stateid */
++struct nfs4_layout_state {
++	struct list_head	ls_perfile;
++	struct list_head	ls_layouts; /* list of nfs4_layouts */
++	struct kref		ls_ref;
++	struct nfs4_client	*ls_client;
++	struct nfs4_file	*ls_file;
++	stateid_t		ls_stateid;
++};
++
++/* outstanding layout */
++struct nfs4_layout {
++	struct list_head		lo_perfile;	/* hash by f_id */
++	struct list_head		lo_perclnt;	/* hash by clientid */
++	struct list_head		lo_perstate;
++	struct nfs4_file		*lo_file;	/* backpointer */
++	struct nfs4_client		*lo_client;
++	struct nfs4_layout_state	*lo_state;
++	struct nfsd4_layout_seg 	lo_seg;
++};
++
++struct pnfs_inval_state {
++	struct knfsd_fh		mdsfh; /* needed only by invalidate all */
++	stateid_t		stid;
++	clientid_t		clid;
++	u32			status;
++};
++
++/* pNFS Data Server state */
++#define DS_STATEID_VALID   0
++#define DS_STATEID_ERROR   1
++#define DS_STATEID_NEW     2
++
++struct pnfs_ds_stateid {
++	struct list_head	ds_hash;        /* ds_stateid hash entry */
++	struct list_head	ds_perclid;     /* per client hash entry */
++	stateid_t		ds_stid;
++	struct knfsd_fh		ds_fh;
++	unsigned long		ds_access;
++	u32			ds_status;      /* from MDS */
++	u32			ds_verifier[2]; /* from MDS */
++	wait_queue_head_t	ds_waitq;
++	unsigned long		ds_flags;
++	struct kref		ds_ref;
++	clientid_t		ds_mdsclid;
++};
++
++struct pnfs_ds_clientid {
++	struct list_head	dc_hash;        /* mds_clid_hashtbl entry */
++	struct list_head	dc_stateid;     /* ds_stateid head */
++	struct list_head	dc_permdsid;    /* per mdsid hash entry */
++	clientid_t		dc_mdsclid;
++	struct kref		dc_ref;
++	uint32_t		dc_mdsid;
++};
++
++struct pnfs_mds_id {
++	struct list_head	di_hash;        /* mds_nodeid list entry */
++	struct list_head	di_mdsclid;     /* mds_clientid head */
++	uint32_t		di_mdsid;
++	time_t			di_mdsboot;	/* mds boot time */
++	struct kref		di_ref;
++};
++
++/* notify device request (from exported filesystem) */
++struct nfs4_notify_device {
++	struct nfsd4_pnfs_cb_dev_list  *nd_list;
++	struct nfs4_client	       *nd_client;
++	struct list_head	        nd_perclnt;
++
++	void				*nd_args;	/* nfsd internal */
++};
++
++u64 find_create_sbid(struct super_block *);
++struct super_block *find_sbid_id(u64);
++__be32 nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
++int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
++					struct nfsd4_pnfs_layoutreturn *);
++int nfs4_pnfs_cb_get_state(struct super_block *, struct pnfs_get_state *);
++int nfs4_pnfs_cb_change_state(struct pnfs_get_state *);
++void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++int put_layoutrecall(struct nfs4_layoutrecall *);
++void nomatching_layout(struct nfs4_layoutrecall *);
++void *layoutrecall_done(struct nfs4_layoutrecall *);
++int nfsd4_cb_layout(struct nfs4_layoutrecall *);
++int nfsd_layout_recall_cb(struct super_block *, struct inode *,
++			  struct nfsd4_pnfs_cb_layout *);
++int nfsd_device_notify_cb(struct super_block *,
++			  struct nfsd4_pnfs_cb_dev_list *);
++int nfsd4_cb_notify_device(struct nfs4_notify_device *);
++void pnfs_set_device_notify(clientid_t *, unsigned int types);
++void pnfs_clear_device_notify(struct nfs4_client *);
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++extern struct sockaddr pnfsd_lexp_addr;
++extern size_t pnfs_lexp_addr_len;
++
++extern void pnfsd_lexp_init(struct inode *);
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#endif /* LINUX_NFSD_PNFSD_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
+@@ -0,0 +1,225 @@
++/*
++ * linux/fs/nfsd/pnfs_lexp.c
++ *
++ * pNFS export of local filesystems.
++ *
++ * Export local file systems over the files layout type.
++ * The MDS (metadata server) functions also as a single DS (data server).
++ * This is mostly useful for development and debugging purposes.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Copyright (C) 2008 Benny Halevy, <bhalevy@panasas.com>
++ *
++ * Initial implementation was based on the pnfs-gfs2 patches done
++ * by David M. Richter <richterd@citi.umich.edu>
++ */
++
++#include <linux/sunrpc/svc_xprt.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++struct sockaddr pnfsd_lexp_addr;
++size_t pnfs_lexp_addr_len;
++
++static int
++pnfsd_lexp_layout_type(struct super_block *sb)
++{
++	int ret = LAYOUT_NFSV4_1_FILES;
++	dprintk("<-- %s: return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++pnfsd_lexp_get_device_iter(struct super_block *sb,
++			   u32 layout_type,
++			   struct nfsd4_pnfs_dev_iter_res *res)
++{
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++
++	dprintk("<-- %s: return 0\n", __func__);
++	return 0;
++}
++
++static int
++pnfsd_lexp_get_device_info(struct super_block *sb,
++			   struct exp_xdr_stream *xdr,
++			   u32 layout_type,
++			   const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_multipath fl_devices[1];
++	u32 fl_stripe_indices[1] = { 0 };
++	struct pnfs_filelayout_devaddr daddr;
++	/* %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x.%03u.%03u */
++	char daddr_buf[8*4 + 2*3 + 10];
++
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	memset(&fdev, '\0', sizeof(fdev));
++
++	if (devid->devid != 1) {
++		printk(KERN_ERR "%s: WARNING: didn't receive a deviceid of 1 "
++			"(got: 0x%llx)\n", __func__, devid->devid);
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* count the number of comma-delimited DS IPs */
++	fdev.fl_device_length = 1;
++	fdev.fl_device_list = fl_devices;
++
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = fl_stripe_indices;
++
++	daddr.r_addr.data = daddr_buf;
++	daddr.r_addr.len = sizeof(daddr_buf);
++	err = __svc_print_netaddr(&pnfsd_lexp_addr, &daddr.r_addr);
++	if (err < 0)
++		goto out;
++	daddr.r_addr.len = err;
++	switch (pnfsd_lexp_addr.sa_family) {
++	case AF_INET:
++		daddr.r_netid.data = "tcp";
++		daddr.r_netid.len = 3;
++		break;
++	case AF_INET6:
++		daddr.r_netid.data = "tcp6";
++		daddr.r_netid.len = 4;
++		break;
++	default:
++		BUG();
++	}
++	fdev.fl_device_list[0].fl_multipath_length = 1;
++	fdev.fl_device_list[0].fl_multipath_list = &daddr;
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	dprintk("<-- %s: return %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize < NFSSVC_MAXBLKSIZE)
++		blocksize = NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++	dprintk("%s: return %d\n", __func__, blocksize);
++	return blocksize;
++}
++
++static enum nfsstat4
++pnfsd_lexp_layout_get(struct inode *inode,
++		      struct exp_xdr_stream *xdr,
++		      const struct nfsd4_pnfs_layoutget_arg *arg,
++		      struct nfsd4_pnfs_layoutget_res *res)
++{
++	enum nfsstat4 rc = NFS4_OK;
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++
++	dprintk("--> %s: inode=%p\n", __func__, inode);
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = true;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = arg->lg_sbid;
++	layout->device_id.devid = 1;				/*FSFTEMP*/
++	layout->lg_first_stripe_index = 0;			/*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	memcpy(fhp, arg->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	dprintk("<-- %s: return %d\n", __func__, rc);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++pnfsd_lexp_layout_commit(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutcommit_arg *args,
++			 struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int
++pnfsd_lexp_layout_return(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int pnfsd_lexp_get_state(struct inode *inode, struct knfsd_fh *fh,
++				struct pnfs_get_state *p)
++{
++	return 0;	/* just use the current stateid */
++}
++
++static struct pnfs_export_operations pnfsd_lexp_ops = {
++	.layout_type = pnfsd_lexp_layout_type,
++	.get_device_info = pnfsd_lexp_get_device_info,
++	.get_device_iter = pnfsd_lexp_get_device_iter,
++	.layout_get = pnfsd_lexp_layout_get,
++	.layout_commit = pnfsd_lexp_layout_commit,
++	.layout_return = pnfsd_lexp_layout_return,
++	.get_state = pnfsd_lexp_get_state,
++};
++
++void
++pnfsd_lexp_init(struct inode *inode)
++{
++	dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
++	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
+@@ -0,0 +1,535 @@
++/*
++ * fs/nfsd/spnfs_com.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ * Based heavily on idmap.c
++ *
++ */
++
++/*
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/nfsd/debug.h>
++
++#include <linux/nfsd4_spnfs.h>
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PROC
++
++static ssize_t   spnfs_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++		     char __user *, size_t);
++static ssize_t   spnfs_pipe_downcall(struct file *, const char __user *,
++		     size_t);
++static void      spnfs_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops spnfs_upcall_ops = {
++	.upcall		= spnfs_pipe_upcall,
++	.downcall	= spnfs_pipe_downcall,
++	.destroy_msg	= spnfs_pipe_destroy_msg,
++};
++
++/* evil global variable */
++struct spnfs *global_spnfs;
++struct spnfs_config *spnfs_config;
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++int spnfs_use_layoutsegments;
++uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++/*
++ * Used by spnfs_enabled()
++ * Tracks if the subsystem has been initialized at some point.  It doesn't
++ * matter if it's not currently initialized.
++ */
++static int spnfs_enabled_at_some_point;
++
++/* call this to start the ball rolling */
++/* code it like we're going to avoid the global variable in the future */
++int
++nfsd_spnfs_new(void)
++{
++	struct spnfs *spnfs = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	if (global_spnfs != NULL)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	spnfs = kzalloc(sizeof(*spnfs), GFP_KERNEL);
++	if (spnfs == NULL){
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	spnfs->spnfs_dentry = rpc_mkpipe(nd.path.dentry, "spnfs", spnfs,
++					 &spnfs_upcall_ops, 0);
++	if (IS_ERR(spnfs->spnfs_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++
++	mutex_init(&spnfs->spnfs_lock);
++	mutex_init(&spnfs->spnfs_plock);
++	init_waitqueue_head(&spnfs->spnfs_wq);
++
++	global_spnfs = spnfs;
++	spnfs_enabled_at_some_point = 1;
++
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(spnfs);
++	return rc;
++}
++
++/* again, code it like we're going to remove the global variable */
++void
++nfsd_spnfs_delete(void)
++{
++	struct spnfs *spnfs = global_spnfs;
++
++	if (!spnfs)
++		return;
++	rpc_unlink(spnfs->spnfs_dentry);
++	rpc_put_mount();
++	global_spnfs = NULL;
++	kfree(spnfs);
++}
++
++/* RPC pipefs upcall/downcall routines */
++/* looks like this code is invoked by the rpc_pipe code */
++/* to handle upcalls on things we've queued elsewhere */
++/* See nfs_idmap_id for an exmaple of enqueueing */
++static ssize_t
++spnfs_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
++    char __user *dst, size_t buflen)
++{
++	char *data = (char *)msg->data + msg->copied;
++	ssize_t mlen = msg->len - msg->copied;
++	ssize_t left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen -= left;
++	msg->copied += mlen;
++	msg->errno = 0;
++	return mlen;
++}
++
++static ssize_t
++spnfs_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
++	struct spnfs *spnfs = (struct spnfs *)rpci->private;
++	struct spnfs_msg *im_in = NULL, *im = &spnfs->spnfs_im;
++	int ret;
++
++	if (mlen != sizeof(struct spnfs_msg))
++		return -ENOSPC;
++
++	im_in = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im_in == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(im_in, src, mlen) != 0)
++		return -EFAULT;
++
++	mutex_lock(&spnfs->spnfs_plock);
++
++	ret = mlen;
++	im->im_status = im_in->im_status;
++	/* If we got an error, terminate now, and wake up pending upcalls */
++	if (!(im_in->im_status & SPNFS_STATUS_SUCCESS)) {
++		wake_up(&spnfs->spnfs_wq);
++		goto out;
++	}
++
++	ret = -EINVAL;
++	/* Did we match the current upcall? */
++	/* DMXXX: do not understand the comment above, from original code */
++	/* DMXXX: when do we _not_ match the current upcall? */
++	/* DMXXX: anyway, let's to a simplistic check */
++	if (im_in->im_type == im->im_type) {
++		/* copy the response into the spnfs struct */
++		memcpy(&im->im_res, &im_in->im_res, sizeof(im->im_res));
++		ret = mlen;
++	} else
++		dprintk("spnfs: downcall type != upcall type\n");
++
++
++	wake_up(&spnfs->spnfs_wq);
++/* DMXXX handle rval processing */
++out:
++	mutex_unlock(&spnfs->spnfs_plock);
++	kfree(im_in);
++	return ret;
++}
++
++static void
++spnfs_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	struct spnfs_msg *im = msg->data;
++	struct spnfs *spnfs = container_of(im, struct spnfs, spnfs_im);
++
++	if (msg->errno >= 0)
++		return;
++	mutex_lock(&spnfs->spnfs_plock);
++	im->im_status = SPNFS_STATUS_FAIL;  /* DMXXX */
++	wake_up(&spnfs->spnfs_wq);
++	mutex_unlock(&spnfs->spnfs_plock);
++}
++
++/* generic upcall.  called by functions in spnfs_ops.c  */
++int
++spnfs_upcall(struct spnfs *spnfs, struct spnfs_msg *upmsg,
++		union spnfs_msg_res *res)
++{
++	struct rpc_pipe_msg msg;
++	struct spnfs_msg *im;
++	DECLARE_WAITQUEUE(wq, current);
++	int ret = -EIO;
++	int rval;
++
++	im = &spnfs->spnfs_im;
++
++	mutex_lock(&spnfs->spnfs_lock);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	memset(im, 0, sizeof(*im));
++	memcpy(im, upmsg, sizeof(*upmsg));
++
++	memset(&msg, 0, sizeof(msg));
++	msg.data = im;
++	msg.len = sizeof(*im);
++
++	add_wait_queue(&spnfs->spnfs_wq, &wq);
++	rval = rpc_queue_upcall(spnfs->spnfs_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&spnfs->spnfs_wq, &wq);
++		goto out;
++	}
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&spnfs->spnfs_plock);
++	schedule();
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&spnfs->spnfs_wq, &wq);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	if (im->im_status & SPNFS_STATUS_SUCCESS) {
++		/* copy our result from the upcall */
++		memcpy(res, &im->im_res, sizeof(*res));
++		ret = 0;
++	}
++
++out:
++	memset(im, 0, sizeof(*im));
++	mutex_unlock(&spnfs->spnfs_plock);
++	mutex_unlock(&spnfs->spnfs_lock);
++	return(ret);
++}
++
++/*
++ * This is used to determine if the spnfsd daemon has been started at
++ * least once since the system came up.  This is used to by the export
++ * mechanism to decide if spnfs is in use.
++ *
++ * Returns non-zero if the spnfsd has initialized the communication pipe
++ * at least once.
++ */
++int spnfs_enabled(void)
++{
++	return spnfs_enabled_at_some_point;
++}
++
++#ifdef CONFIG_PROC_FS
++
++/*
++ * procfs virtual files for user/kernel space communication:
++ *
++ * ctl - currently just an on/off switch...can be expanded
++ * getfh - fd to fh conversion
++ * recall - recall a layout from the command line, for example:
++ *		echo <path> > /proc/fs/spnfs/recall
++ * config - configuration info, e.g., stripe size, num ds, etc.
++ */
++
++/*-------------- start ctl -------------------------*/
++static ssize_t ctl_write(struct file *file, const char __user *buf,
++			 size_t count, loff_t *offset)
++{
++	int cmd, rc;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (cmd) {
++		rc = nfsd_spnfs_new();
++		if (rc != 0)
++			return rc;
++	} else
++		nfsd_spnfs_delete();
++
++	return count;
++}
++
++static const struct file_operations ctl_ops = {
++	.write		= ctl_write,
++};
++/*-------------- end ctl ---------------------------*/
++
++/*-------------- start config -------------------------*/
++static ssize_t config_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	static struct spnfs_config cfg;
++
++	if (copy_from_user(&cfg, buf, count))
++		return -EFAULT;
++
++	spnfs_config = &cfg;
++	return 0;
++}
++
++static const struct file_operations config_ops = {
++	.write		= config_write,
++};
++/*-------------- end config ---------------------------*/
++
++/*-------------- start getfh -----------------------*/
++static int getfh_open(struct inode *inode, struct file *file)
++{
++	file->private_data = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
++	if (file->private_data == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static ssize_t getfh_read(struct file *file, char __user *buf, size_t count,
++			  loff_t *offset)
++{
++	if (copy_to_user(buf, file->private_data, sizeof(struct nfs_fh)))
++		return -EFAULT;
++
++	return count;
++}
++
++static ssize_t getfh_write(struct file *file, const char __user *buf,
++			   size_t count, loff_t *offset)
++{
++	int fd;
++
++	if (copy_from_user((int *)&fd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (spnfs_getfh(fd, file->private_data) != 0)
++		return -EIO;
++
++	return count;
++}
++
++static int getfh_release(struct inode *inode, struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static const struct file_operations getfh_ops = {
++	.open		= getfh_open,
++	.read		= getfh_read,
++	.write		= getfh_write,
++	.release	= getfh_release,
++};
++/*-------------- end getfh ------------------------*/
++
++
++/*-------------- start recall layout --------------*/
++static ssize_t recall_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	char input[128];
++	char *path, *str, *p;
++	int rc;
++	u64 off = 0, len = 0;
++
++	if (count > 128)
++		return -EINVAL;
++
++	if (copy_from_user(input, buf, count))
++		return -EFAULT;
++
++	/* assumes newline-terminated path */
++	p = memchr(input, '\n', count);
++	if (p == NULL)
++		return -EINVAL;
++	*p = '\0';
++
++	/*
++	 * Scan for path and, optionally, an offset and length
++	 * of a layout segment to be recalled; if there are two
++	 * fields, they're assumed to be path and offset.
++	 */
++	p = input;
++	path = strsep(&p, " ");
++	if (path == NULL)
++		return -EINVAL;
++
++	str = strsep(&p, " ");
++	if (str != NULL) {
++		rc = strict_strtoull(str, 10, &off);
++		if (rc != 0)
++			return -EINVAL;
++
++		str = strsep(&p, " ");
++		if (str != NULL) {
++			rc = strict_strtoull(str, 10, &len);
++			if (rc != 0)
++				return -EINVAL;
++		}
++	}
++
++	rc = spnfs_test_layoutrecall(path, off, len);
++	if (rc != 0)
++		return rc;
++
++	return count;
++}
++
++static const struct file_operations recall_ops = {
++	.write		= recall_write,
++};
++/*-------------- end recall layout --------------*/
++
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++/*-------------- start layoutseg -------------------------*/
++static ssize_t layoutseg_write(struct file *file, const char __user *buf,
++			       size_t count, loff_t *offset)
++{
++	char cmd[3];
++
++	if (copy_from_user(cmd, buf, 1))
++		return -EFAULT;
++	if (cmd[0] == '0')
++		spnfs_use_layoutsegments = 0;
++	else
++		spnfs_use_layoutsegments = 1;
++
++	return count;
++}
++
++static const struct file_operations layoutseg_ops = {
++	.write		= layoutseg_write,
++};
++/*-------------- end layoutseg ---------------------------*/
++
++/*-------------- start layoutsegsize -------------------------*/
++static ssize_t layoutsegsize_write(struct file *file, const char __user *buf,
++				   size_t count, loff_t *offset)
++{
++	char cmd[50];
++
++	if (copy_from_user(cmd, buf, 49))
++		return -EFAULT;
++	layoutsegment_size = simple_strtoull(cmd, NULL, 10);
++
++	return count;
++}
++
++static const struct file_operations layoutsegsize_ops = {
++	.write		= layoutsegsize_write,
++};
++/*-------------- end layoutsegsize ---------------------------*/
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++int
++spnfs_init_proc(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = proc_mkdir("fs/spnfs", NULL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry = create_proc_entry("fs/spnfs/ctl", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &ctl_ops;
++
++	entry = create_proc_entry("fs/spnfs/config", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &config_ops;
++
++	entry = create_proc_entry("fs/spnfs/getfh", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &getfh_ops;
++
++	entry = create_proc_entry("fs/spnfs/recall", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &recall_ops;
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++	entry = create_proc_entry("fs/spnfs/layoutseg", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutseg_ops;
++
++	entry = create_proc_entry("fs/spnfs/layoutsegsize", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutsegsize_ops;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	return 0;
++}
++#endif /* CONFIG_PROC_FS */
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
+@@ -0,0 +1,878 @@
++/*
++ * fs/nfsd/spnfs_ops.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ *
++ */
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/namei.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++/* comment out CONFIG_SPNFS_TEST for non-test behaviour */
++/* #define CONFIG_SPNFS_TEST 1 */
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PNFS
++
++/*
++ * The functions that are called from elsewhere in the kernel
++ * to perform tasks in userspace
++ *
++ */
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++extern int spnfs_use_layoutsegments;
++extern uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++extern struct spnfs *global_spnfs;
++
++int
++spnfs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++enum nfsstat4
++spnfs_layoutget(struct inode *inode, struct exp_xdr_stream *xdr,
++		const struct nfsd4_pnfs_layoutget_arg *lg_arg,
++		struct nfsd4_pnfs_layoutget_res *lg_res)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct pnfs_filelayout_layout *flp = NULL;
++	int status, i;
++	enum nfsstat4 nfserr;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	im->im_type = SPNFS_TYPE_LAYOUTGET;
++	im->im_args.layoutget_args.inode = inode->i_ino;
++	im->im_args.layoutget_args.generation = inode->i_generation;
++
++	/* call function to queue the msg for upcall */
++	if (spnfs_upcall(spnfs, im, res) != 0) {
++		dprintk("failed spnfs upcall: layoutget\n");
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto layoutget_cleanup;
++	}
++	status = res->layoutget_res.status;
++	if (status != 0) {
++		/* FIXME? until user mode is fixed, translate system error */
++		switch (status) {
++		case -E2BIG:
++		case -ETOOSMALL:
++			nfserr = NFS4ERR_TOOSMALL;
++			break;
++		case -ENOMEM:
++		case -EAGAIN:
++		case -EINTR:
++			nfserr = NFS4ERR_LAYOUTTRYLATER;
++			break;
++		case -ENOENT:
++			nfserr = NFS4ERR_BADLAYOUT;
++			break;
++ 		default:
++			nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		}
++		dprintk("spnfs layout_get upcall: status=%d nfserr=%u\n",
++			status, nfserr);
++		goto layoutget_cleanup;
++	}
++
++	lg_res->lg_return_on_close = 0;
++#if defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	/* if spnfs_use_layoutsegments & layoutsegment_size == 0, use */
++	/* the amount requested by the client.			      */
++	if (spnfs_use_layoutsegments) {
++		if (layoutsegment_size != 0)
++			lg_res->lg_seg.length = layoutsegment_size;
++	} else
++		lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#else
++	lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	flp = kmalloc(sizeof(struct pnfs_filelayout_layout), GFP_KERNEL);
++	if (flp == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	flp->device_id.sbid = lg_arg->lg_sbid;
++	flp->device_id.devid = res->layoutget_res.devid;
++	flp->lg_layout_type = 1; /* XXX */
++	flp->lg_stripe_type = res->layoutget_res.stripe_type;
++	flp->lg_commit_through_mds = 0;
++	flp->lg_stripe_unit =  res->layoutget_res.stripe_size;
++	flp->lg_first_stripe_index = 0;
++	flp->lg_pattern_offset = 0;
++	flp->lg_fh_length = res->layoutget_res.stripe_count;
++
++	flp->lg_fh_list = kmalloc(flp->lg_fh_length * sizeof(struct knfsd_fh),
++				  GFP_KERNEL);
++	if (flp->lg_fh_list == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	/*
++	 * FIX: Doing an extra copy here.  Should group res.flist's fh_len
++	 * and fh_val into a knfsd_fh structure.
++	 */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		flp->lg_fh_list[i].fh_size = res->layoutget_res.flist[i].fh_len;
++		memcpy(&flp->lg_fh_list[i].fh_base,
++		       res->layoutget_res.flist[i].fh_val,
++		       res->layoutget_res.flist[i].fh_len);
++	}
++
++	/* encode the layoutget body */
++	nfserr = filelayout_encode_layout(xdr, flp);
++
++layoutget_cleanup:
++	if (flp) {
++		if (flp->lg_fh_list)
++			kfree(flp->lg_fh_list);
++		kfree(flp);
++	}
++	kfree(im);
++	kfree(res);
++
++	return nfserr;
++}
++
++int
++spnfs_layoutcommit(void)
++{
++	return 0;
++}
++
++int
++spnfs_layoutreturn(struct inode *inode,
++		   const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	return 0;
++}
++
++int
++spnfs_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block *sb;
++	struct nfsd4_pnfs_cb_layout lr;
++
++	switch (type) {
++	case RETURN_FILE:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for ino = %lu\n",
++			__func__, inode->i_ino);
++		break;
++	case RETURN_FSID:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++			__func__);
++		return 0;
++	case RETURN_ALL:
++		/* XXX figure out how to get a sb since there's no inode ptr */
++		dprintk("%s: recalling all layouts (unimplemented)\n",
++			__func__);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	lr.cbl_recall_type = type;
++	lr.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	lr.cbl_seg.clientid = 0;
++	lr.cbl_seg.offset = offset;
++	lr.cbl_seg.length = len;
++	lr.cbl_seg.iomode = IOMODE_ANY;
++	lr.cbl_layoutchanged = 0;
++
++	nfsd_layout_recall_cb(sb, inode, &lr);
++
++	return 0;
++}
++
++
++int
++spnfs_test_layoutrecall(char *path, u64 offset, u64 len)
++{
++	struct nameidata nd;
++	struct inode *inode;
++	int type, rc;
++
++	dprintk("%s: path=%s, offset=%llu, len=%llu\n",
++		__func__, path, offset, len);
++
++	if (strcmp(path, "all") == 0) {
++		inode = NULL;
++		type = RETURN_ALL;
++	} else {
++		rc = path_lookup(path, 0, &nd);
++		if (rc != 0)
++			return -ENOENT;
++
++		/*
++		 * XXX todo: add a RETURN_FSID scenario here...maybe if
++		 * inode is a dir...
++		 */
++
++		inode = nd.path.dentry->d_inode;
++		type = RETURN_FILE;
++	}
++
++	if (len == 0)
++		len = NFS4_MAX_UINT64;
++
++	rc = spnfs_layoutrecall(inode, type, offset, len);
++
++	if (type != RETURN_ALL)
++		path_put(&nd.path);
++	return rc;
++}
++
++int
++spnfs_getdeviceiter(struct super_block *sb,
++		    u32 layout_type,
++		    struct nfsd4_pnfs_dev_iter_res *gd_res)
++{
++	struct spnfs *spnfs = global_spnfs;   /* XXX keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEITER;
++	im->im_args.getdeviceiter_args.cookie = gd_res->gd_cookie;
++	im->im_args.getdeviceiter_args.verf = gd_res->gd_verf;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceiter_out;
++	}
++	status = res->getdeviceiter_res.status;
++
++	if (res->getdeviceiter_res.eof)
++		gd_res->gd_eof = 1;
++	else {
++		gd_res->gd_devid = res->getdeviceiter_res.devid;
++		gd_res->gd_cookie = res->getdeviceiter_res.cookie;
++		gd_res->gd_verf = res->getdeviceiter_res.verf;
++		gd_res->gd_eof = 0;
++	}
++
++getdeviceiter_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++#ifdef CONFIG_SPNFS_TEST
++/*
++ * Setup the rq_res xdr_buf.  The svc_rqst rq_respages[1] page contains the
++ * 1024 encoded stripe indices.
++ *
++ * Skip the devaddr4 length and encode the indicies count (1024) in the
++ * rq_res.head and set the rq_res.head length.
++ *
++ * Set the rq_res page_len to 4096 (for the 1024 stripe indices).
++ * Set the rq_res xdr_buf tail base to rq_respages[0] just after the
++ * rq_res head to hold the rest of the getdeviceinfo return.
++ *
++ * So rq_respages[rq_resused - 1] contains the rq_res.head and rq_res.tail and
++ * rq_respages[rq_resused] contains the rq_res.pages.
++ */
++static int spnfs_test_indices_xdr(struct pnfs_xdr_info *info,
++				  const struct pnfs_filelayout_device *fdev)
++{
++	struct nfsd4_compoundres *resp = info->resp;
++	struct svc_rqst *rqstp = resp->rqstp;
++	struct xdr_buf *xb = &resp->rqstp->rq_res;
++	__be32 *p;
++
++	p = nfsd4_xdr_reserve_space(resp, 8);
++	p++; /* Fill in length later */
++	*p++ = cpu_to_be32(fdev->fl_stripeindices_length); /* 1024 */
++	resp->p = p;
++
++	xb->head[0].iov_len = (char *)resp->p - (char *)xb->head[0].iov_base;
++	xb->pages = &rqstp->rq_respages[rqstp->rq_resused];
++	xb->page_base = 0;
++	xb->page_len = PAGE_SIZE; /* page of 1024 encoded indices */
++	xb->tail[0].iov_base = resp->p;
++	resp->end = xb->head[0].iov_base + PAGE_SIZE;
++	xb->tail[0].iov_len = (char *)resp->end - (char *)resp->p;
++	return 0;
++}
++/*
++ * Return a stripeindices of length 1024 to test
++ * the pNFS client multipage getdeviceinfo implementation.
++ *
++ * Encode a page of stripe indices.
++ */
++static void spnfs_set_test_indices(struct pnfs_filelayout_device *fldev,
++				  struct spnfs_device *dev,
++				  struct pnfs_devinfo_arg *info)
++{
++	struct svc_rqst *rqstp = info->xdr.resp->rqstp;
++	__be32 *p;
++	int i, j = 0;
++
++	p = (__be32 *)page_address(rqstp->rq_respages[rqstp->rq_resused]);
++	fldev->fl_stripeindices_length = 1024;
++	/* round-robin the data servers device index into the stripe indicie */
++	for (i = 0; i < 1024; i++) {
++		*p++ = cpu_to_be32(j);
++		if (j < dev->dscount - 1)
++			j++;
++		else
++			j = 0;
++	}
++	fldev->fl_stripeindices_list = NULL;
++}
++#endif /* CONFIG_SPNFS_TEST */
++
++int
++spnfs_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    u32 layout_type,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct spnfs *spnfs = global_spnfs;
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct spnfs_device *dev;
++	struct pnfs_filelayout_device *fldev = NULL;
++	struct pnfs_filelayout_multipath *mp = NULL;
++	struct pnfs_filelayout_devaddr *fldap = NULL;
++	int status = 0, i, len;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEINFO;
++	/* XXX FIX: figure out what to do about fsid */
++	im->im_args.getdeviceinfo_args.devid = devid->devid;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceinfo_out;
++	}
++	status = res->getdeviceinfo_res.status;
++	if (status != 0)
++		goto getdeviceinfo_out;
++
++	dev = &res->getdeviceinfo_res.devinfo;
++
++	/* Fill in the device data, i.e., nfs4_1_file_layout_ds_addr4 */
++	fldev = kzalloc(sizeof(struct pnfs_filelayout_device), GFP_KERNEL);
++	if (fldev == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	/*
++	 * Stripe count is the same as data server count for our purposes
++	 */
++	fldev->fl_stripeindices_length = dev->dscount;
++	fldev->fl_device_length = dev->dscount;
++
++	/* Set stripe indices */
++#ifdef CONFIG_SPNFS_TEST
++	spnfs_set_test_indices(fldev, dev, info);
++	fldev->fl_enc_stripe_indices = spnfs_test_indices_xdr;
++#else /* CONFIG_SPNFS_TEST */
++	fldev->fl_stripeindices_list =
++		kmalloc(fldev->fl_stripeindices_length * sizeof(u32),
++			GFP_KERNEL);
++	if (fldev->fl_stripeindices_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_stripeindices_length; i++)
++		fldev->fl_stripeindices_list[i] = i;
++#endif /* CONFIG_SPNFS_TEST */
++
++	/*
++	 * Set the device's data server addresses  No multipath for spnfs,
++	 * so mp length is always 1.
++	 *
++	 */
++	fldev->fl_device_list =
++		kmalloc(fldev->fl_device_length *
++			sizeof(struct pnfs_filelayout_multipath),
++			GFP_KERNEL);
++	if (fldev->fl_device_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_device_length; i++) {
++		mp = &fldev->fl_device_list[i];
++		mp->fl_multipath_length = 1;
++		mp->fl_multipath_list =
++			kmalloc(sizeof(struct pnfs_filelayout_devaddr),
++				GFP_KERNEL);
++		if (mp->fl_multipath_list == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		fldap = mp->fl_multipath_list;
++
++		/*
++		 * Copy the netid into the device address, for example: "tcp"
++		 */
++		len = strlen(dev->dslist[i].netid);
++		fldap->r_netid.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_netid.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_netid.data, dev->dslist[i].netid, len);
++		fldap->r_netid.len = len;
++
++		/*
++		 * Copy the network address into the device address,
++		 * for example: "10.35.9.16.08.01"
++		 */
++		len = strlen(dev->dslist[i].addr);
++		fldap->r_addr.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_addr.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_addr.data, dev->dslist[i].addr, len);
++		fldap->r_addr.len = len;
++	}
++
++	/* encode the device data */
++	status = filelayout_encode_devinfo(xdr, fldev);
++
++getdeviceinfo_out:
++	if (fldev) {
++		kfree(fldev->fl_stripeindices_list);
++		if (fldev->fl_device_list) {
++			for (i = 0; i < fldev->fl_device_length; i++) {
++				fldap =
++				    fldev->fl_device_list[i].fl_multipath_list;
++				kfree(fldap->r_netid.data);
++				kfree(fldap->r_addr.data);
++				kfree(fldap);
++			}
++			kfree(fldev->fl_device_list);
++		}
++		kfree(fldev);
++	}
++
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_setattr(void)
++{
++	return 0;
++}
++
++int
++spnfs_open(struct inode *inode, struct nfsd4_open *open)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	im->im_type = SPNFS_TYPE_OPEN;
++	im->im_args.open_args.inode = inode->i_ino;
++	im->im_args.open_args.generation = inode->i_generation;
++	im->im_args.open_args.create = open->op_create;
++	im->im_args.open_args.createmode = open->op_createmode;
++	im->im_args.open_args.truncate = open->op_truncate;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto open_out;
++	}
++	status = res->open_res.status;
++
++open_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_create(void)
++{
++	return 0;
++}
++
++/*
++ * Invokes the spnfsd with the inode number of the object to remove.
++ * The file has already been removed on the MDS, so all the spnsfd
++ * daemon does is remove the stripes.
++ * Returns 0 on success otherwise error code
++ */
++int
++spnfs_remove(unsigned long ino, unsigned long generation)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	im->im_type = SPNFS_TYPE_REMOVE;
++	im->im_args.remove_args.inode = ino;
++	im->im_args.remove_args.generation = generation;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto remove_out;
++	}
++	status = res->remove_res.status;
++
++remove_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++static int
++read_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	 struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_read(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		if (err == 0)
++			break;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++     struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDONLY | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0 ; vnum < vlen ; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = read_one(inode, offset + bytecount, iolen,
++			       (char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err < 0) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		if (err < iolen) {
++			bytecount += err;
++			goto read_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++read_out:
++	*lenp = bytecount;
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++	return status;
++}
++
++__be32
++spnfs_read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++	   struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return read(inode, offset, lenp, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++static int
++write_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	  struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_write(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++write(struct inode *inode, loff_t offset, size_t len, int vlen,
++      struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDWR | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto write_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0; vnum < vlen; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = write_one(inode, offset + bytecount, iolen,
++				(char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err != iolen) {
++			dprintk("spnfs_write: err=%d expected %Zd\n", err, len);
++			status = nfserr_io;
++			goto write_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++write_out:
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++
++	return status;
++}
++
++__be32
++spnfs_write(struct inode *inode, loff_t offset, size_t len, int vlen,
++	    struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return write(inode, offset, len, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++int
++spnfs_commit(void)
++{
++	return 0;
++}
++
++/*
++ * Return the state for this object.
++ * At this time simply return 0 to indicate success and use the existing state
++ */
++int
++spnfs_get_state(struct inode *inode, struct knfsd_fh *fh, struct pnfs_get_state *arg)
++{
++	return 0;
++}
++
++/*
++ * Return the filehandle for the specified file descriptor
++ */
++int
++spnfs_getfh(int fd, struct nfs_fh *fh)
++{
++	struct file *file;
++
++	file = fget(fd);
++	if (file == NULL)
++		return -EIO;
++
++	memcpy(fh, NFS_FH(file->f_dentry->d_inode), sizeof(struct nfs_fh));
++	fput(file);
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
+@@ -242,6 +242,12 @@ struct nfs4_client {
+ 	u32			cl_cb_seq_nr;
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
++#if defined(CONFIG_PNFSD)
++	struct list_head	cl_layouts;	/* outstanding layouts */
++	struct list_head	cl_layoutrecalls; /* outstanding layoutrecall
++						     callbacks */
++	atomic_t		cl_deviceref;	/* Num outstanding devs */
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static inline void
+@@ -342,12 +348,31 @@ struct nfs4_file {
+ 	struct list_head        fi_hash;    /* hash by "struct inode *" */
+ 	struct list_head        fi_stateids;
+ 	struct list_head	fi_delegations;
++#if defined(CONFIG_PNFSD)
++	struct list_head	fi_layouts;
++	struct list_head	fi_layout_states;
++#endif /* CONFIG_PNFSD */
+ 	struct inode		*fi_inode;
+ 	u32                     fi_id;      /* used with stateowner->so_id 
+ 					     * for stateid_hashtbl hash */
+ 	bool			fi_had_conflict;
++#if defined(CONFIG_PNFSD)
++	/* used by layoutget / layoutrecall */
++	struct nfs4_fsid	fi_fsid;
++	u32			fi_fhlen;
++	u8			fi_fhval[NFS4_FHSIZE];
++#endif /* CONFIG_PNFSD */
+ };
+ 
++#if defined(CONFIG_PNFSD)
++/* pNFS Metadata server state */
++
++struct pnfs_ds_dev_entry {
++	struct list_head	dd_dev_entry; /* st_pnfs_ds_id entry */
++	u32			dd_dsid;
++};
++#endif /* CONFIG_PNFSD */
++
+ /*
+ * nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+ *
+@@ -370,6 +395,9 @@ struct nfs4_stateid {
+ 	struct list_head              st_perfile;
+ 	struct list_head              st_perstateowner;
+ 	struct list_head              st_lockowners;
++#if defined(CONFIG_PNFSD)
++	struct list_head              st_pnfs_ds_id;
++#endif /* CONFIG_PNFSD */
+ 	struct nfs4_stateowner      * st_stateowner;
+ 	struct nfs4_file            * st_file;
+ 	stateid_t                     st_stateid;
+@@ -421,6 +449,34 @@ extern void nfsd4_recdir_purge_old(void)
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+ extern void release_session_client(struct nfsd4_session *);
++extern void nfsd4_free_slab(struct kmem_cache **);
++extern struct nfs4_file *find_file(struct inode *);
++extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
++extern void put_nfs4_file(struct nfs4_file *);
++extern void get_nfs4_file(struct nfs4_file *);
++extern struct nfs4_client *find_confirmed_client(clientid_t *);
++extern struct nfs4_stateid *find_stateid(stateid_t *, int flags);
++extern struct nfs4_delegation *find_delegation_stateid(struct inode *, stateid_t *);
++extern __be32 nfs4_check_stateid(stateid_t *);
++extern void expire_client_lock(struct nfs4_client *);
++extern int filter_confirmed_clients(int (* func)(struct nfs4_client *, void *), void *);
++
++#if defined(CONFIG_PNFSD)
++extern int nfsd4_init_pnfs_slabs(void);
++extern void nfsd4_free_pnfs_slabs(void);
++extern void pnfs_expire_client(struct nfs4_client *);
++extern void release_pnfs_ds_dev_list(struct nfs4_stateid *);
++extern void nfs4_pnfs_state_init(void);
++extern void nfs4_pnfs_state_shutdown(void);
++extern void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++extern int nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *, stateid_t *);
++#else /* CONFIG_PNFSD */
++static inline void nfsd4_free_pnfs_slabs(void) {}
++static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
++static inline void pnfs_expire_client(struct nfs4_client *clp) {}
++static inline void release_pnfs_ds_dev_list(struct nfs4_stateid *stp) {}
++static inline void nfs4_pnfs_state_shutdown(void) {}
++#endif /* CONFIG_PNFSD */
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+@@ -434,4 +490,24 @@ nfs4_get_stateowner(struct nfs4_stateown
+ 	kref_get(&so->so_ref);
+ }
+ 
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
+ #endif   /* NFSD4_STATE_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
+@@ -37,7 +37,12 @@
+ #ifdef CONFIG_NFSD_V4
+ #include <linux/nfs4_acl.h>
+ #include <linux/nfsd_idmap.h>
++#include <linux/security.h>
++#include <linux/nfsd4_spnfs.h>
+ #endif /* CONFIG_NFSD_V4 */
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
+ 
+ #include "nfsd.h"
+ #include "vfs.h"
+@@ -383,6 +388,12 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ 					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
+ 			if (err)
+ 				goto out;
++#if defined(CONFIG_SPNFS_BLOCK)
++			if (pnfs_block_enabled(inode, 0)) {
++				err = bl_layoutrecall(inode, RETURN_FILE,
++				    iap->ia_size, inode->i_size - iap->ia_size);
++			}
++#endif /* CONFIG_SPNFS_BLOCK */
+ 		}
+ 
+ 		/*
+@@ -1703,6 +1714,11 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	struct inode	*fdir, *tdir;
+ 	__be32		err;
+ 	int		host_err;
++#ifdef CONFIG_SPNFS
++	unsigned long ino = 0;
++	unsigned long generation = 0;
++	unsigned int nlink = 0;
++#endif /* CONFIG_SPNFS */
+ 
+ 	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
+ 	if (err)
+@@ -1766,7 +1782,26 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	if (host_err)
+ 		goto out_dput_new;
+ 
++#ifdef CONFIG_SPNFS
++	/*
++	 * if the target is a preexisting regular file, remember the
++	 * inode number and generation so we can delete the stripes;
++	 * save the link count as well so that the stripes only get
++	 * get deleted when the last link is deleted
++	 */
++	if (ndentry && ndentry->d_inode && S_ISREG(ndentry->d_inode->i_mode)) {
++		ino = ndentry->d_inode->i_ino;
++		generation = ndentry->d_inode->i_generation;
++		nlink = ndentry->d_inode->i_nlink;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
++#ifdef CONFIG_SPNFS
++	if (spnfs_enabled() && (!host_err && ino && nlink == 1))
++		spnfs_remove(ino, generation);
++#endif /* CONFIG_SPNFS */
++
+ 	if (!host_err) {
+ 		host_err = commit_metadata(tfhp);
+ 		if (!host_err)
+@@ -1807,6 +1842,11 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	struct inode	*dirp;
+ 	__be32		err;
+ 	int		host_err;
++#if defined(CONFIG_SPNFS)
++	unsigned long	ino;
++	unsigned long	generation;
++	unsigned int	nlink;
++#endif /* defined(CONFIG_SPNFS) */
+ 
+ 	err = nfserr_acces;
+ 	if (!flen || isdotent(fname, flen))
+@@ -1830,6 +1870,17 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 		goto out;
+ 	}
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * Remember the inode number to communicate to the spnfsd
++	 * for removal of stripes; save the link count as well so that
++	 * the stripes only get get deleted when the last link is deleted
++	 */
++	ino = rdentry->d_inode->i_ino;
++	generation = rdentry->d_inode->i_generation;
++	nlink = rdentry->d_inode->i_nlink;
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	if (!type)
+ 		type = rdentry->d_inode->i_mode & S_IFMT;
+ 
+@@ -1854,6 +1905,29 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	if (!host_err)
+ 		host_err = commit_metadata(fhp);
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * spnfs: notify spnfsd of removal to destroy stripes
++	 */
++/*
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (sb->s_export_op->spnfs_remove) {
++*/
++	dprintk("%s check if spnfs_enabled\n", __FUNCTION__);
++	if (spnfs_enabled() && nlink == 1) {
++		BUG_ON(ino == 0);
++		dprintk("%s calling spnfs_remove inumber=%ld\n",
++			__FUNCTION__, ino);
++		if (spnfs_remove(ino, generation) == 0) {
++			dprintk("%s spnfs_remove success\n", __FUNCTION__);
++		} else {
++			/* XXX How do we make this atomic? */
++			printk(KERN_WARNING "nfsd: pNFS could not "
++				"remove stripes for inode: %ld\n", ino);
++		}
++	}
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ out_nfserr:
+ 	err = nfserrno(host_err);
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
+@@ -37,6 +37,8 @@
+ #ifndef _LINUX_NFSD_XDR4_H
+ #define _LINUX_NFSD_XDR4_H
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++
+ #include "state.h"
+ #include "nfsd.h"
+ 
+@@ -385,6 +387,51 @@ struct nfsd4_reclaim_complete {
+ 	u32 rca_one_fs;
+ };
+ 
++struct nfsd4_pnfs_getdevinfo {
++	struct nfsd4_pnfs_deviceid gd_devid;	/* request */
++	u32			gd_layout_type;	/* request */
++	u32			gd_maxcount;	/* request */
++	u32			gd_notify_types;/* request */
++	struct super_block	*gd_sb;
++};
++
++struct nfsd4_pnfs_getdevlist {
++	u32             gd_layout_type;	/* request */
++	u32		gd_maxdevices;	/* request */
++	u64		gd_cookie;	/* request - response */
++	u64		gd_verf;	/* request - response */
++	struct svc_fh 	*gd_fhp;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++struct nfsd4_pnfs_layoutget {
++	u64			lg_minlength;	/* request */
++	u32			lg_signal;	/* request */
++	u32			lg_maxcount;	/* request */
++	struct svc_fh		*lg_fhp;	/* request */
++	stateid_t		lg_sid;		/* request/response */
++	struct nfsd4_layout_seg	lg_seg;		/* request/response */
++	u32			lg_roc;		/* response */
++};
++
++struct nfsd4_pnfs_layoutcommit {
++	struct nfsd4_pnfs_layoutcommit_arg args;
++	stateid_t		lc_sid;		/* request */
++	struct nfsd4_pnfs_layoutcommit_res res;
++};
++
++enum layoutreturn_flags {
++	LR_FLAG_INTERN = 1 << 0,	/* internal return */
++	LR_FLAG_EXPIRE = 1 << 1,	/* return on client expiration */
++};
++
++struct nfsd4_pnfs_layoutreturn {
++	struct nfsd4_pnfs_layoutreturn_arg args;
++	u32			lr_flags;
++	stateid_t		lr_sid;		/* request/resopnse */
++	u32			lrs_present;	/* response */
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -426,6 +473,13 @@ struct nfsd4_op {
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
+ 		struct nfsd4_reclaim_complete	reclaim_complete;
++#if defined(CONFIG_PNFSD)
++		struct nfsd4_pnfs_getdevlist	pnfs_getdevlist;
++		struct nfsd4_pnfs_getdevinfo	pnfs_getdevinfo;
++		struct nfsd4_pnfs_layoutget	pnfs_layoutget;
++		struct nfsd4_pnfs_layoutcommit	pnfs_layoutcommit;
++		struct nfsd4_pnfs_layoutreturn	pnfs_layoutreturn;
++#endif /* CONFIG_PNFSD */
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
+@@ -28,6 +28,7 @@
+ #include <linux/aio.h>
+ #include <linux/gfp.h>
+ #include <linux/swap.h>
++#include <linux/pnfs_xdr.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -36,6 +37,7 @@
+ #include "internal.h"
+ #include "iostat.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_FILE
+ 
+@@ -388,12 +390,17 @@ static int nfs_write_begin(struct file *
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ 	struct page *page;
+ 	int once_thru = 0;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+ 		file->f_path.dentry->d_name.name,
+ 		mapping->host->i_ino, len, (long long) pos);
+ 
++	pnfs_update_layout(mapping->host,
++			   nfs_file_open_context(file),
++			   0, NFS4_MAX_UINT64, IOMODE_RW,
++			   &lseg);
+ start:
+ 	/*
+ 	 * Prevent starvation issues if someone is doing a consistency
+@@ -402,17 +409,22 @@ start:
+ 	ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ 			nfs_wait_bit_killable, TASK_KILLABLE);
+ 	if (ret)
+-		return ret;
++		goto out;
+ 
+ 	page = grab_cache_page_write_begin(mapping, index, flags);
+-	if (!page)
+-		return -ENOMEM;
++	if (!page) {
++		ret = -ENOMEM;
++		goto out;
++	}
+ 	*pagep = page;
+ 
+-	ret = nfs_flush_incompatible(file, page);
++	ret = nfs_flush_incompatible(file, page, lseg);
+ 	if (ret) {
+ 		unlock_page(page);
+ 		page_cache_release(page);
++		*pagep = NULL;
++		*fsdata = NULL;
++		goto out;
+ 	} else if (!once_thru &&
+ 		   nfs_want_read_modify_write(file, page, pos, len)) {
+ 		once_thru = 1;
+@@ -421,6 +433,12 @@ start:
+ 		if (!ret)
+ 			goto start;
+ 	}
++	ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
++ out:
++	if (ret) {
++		put_lseg(lseg);
++		*fsdata = NULL;
++	}
+ 	return ret;
+ }
+ 
+@@ -430,6 +448,7 @@ static int nfs_write_end(struct file *fi
+ {
+ 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ 	int status;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+@@ -456,10 +475,17 @@ static int nfs_write_end(struct file *fi
+ 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+ 	}
+ 
+-	status = nfs_updatepage(file, page, offset, copied);
++	lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
++	status = pnfs_write_end(file, page, pos, len, copied, lseg);
++	if (status)
++		goto out;
++	status = nfs_updatepage(file, page, offset, copied, lseg, fsdata);
+ 
++ out:
+ 	unlock_page(page);
+ 	page_cache_release(page);
++	pnfs_write_end_cleanup(file, fsdata);
++	put_lseg(lseg);
+ 
+ 	if (status < 0)
+ 		return status;
+@@ -570,6 +596,8 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	/* make sure the cache has finished storing the page */
+ 	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+ 
++	/* XXX Do we want to call pnfs_update_layout here? */
++
+ 	lock_page(page);
+ 	mapping = page->mapping;
+ 	if (mapping != dentry->d_inode->i_mapping)
+@@ -580,11 +608,11 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	if (pagelen == 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_flush_incompatible(filp, page);
++	ret = nfs_flush_incompatible(filp, page, NULL);
+ 	if (ret != 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_updatepage(filp, page, 0, pagelen);
++	ret = nfs_updatepage(filp, page, 0, pagelen, NULL, NULL);
+ out_unlock:
+ 	if (!ret)
+ 		return VM_FAULT_LOCKED;
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
+@@ -48,6 +48,7 @@
+ #include "internal.h"
+ #include "fscache.h"
+ #include "dns_resolve.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -278,7 +279,7 @@ nfs_fhget(struct super_block *sb, struct
+ 		 */
+ 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
+ 		if (S_ISREG(inode->i_mode)) {
+-			inode->i_fop = &nfs_file_operations;
++			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ 			inode->i_data.a_ops = &nfs_file_aops;
+ 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ 		} else if (S_ISDIR(inode->i_mode)) {
+@@ -530,6 +531,68 @@ out:
+ 	return err;
+ }
+ 
++static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
++{
++	atomic_set(&l_ctx->count, 1);
++	l_ctx->lockowner = current->files;
++	l_ctx->pid = current->tgid;
++	INIT_LIST_HEAD(&l_ctx->list);
++}
++
++static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *pos;
++
++	list_for_each_entry(pos, &ctx->lock_context.list, list) {
++		if (pos->lockowner != current->files)
++			continue;
++		if (pos->pid != current->tgid)
++			continue;
++		atomic_inc(&pos->count);
++		return pos;
++	}
++	return NULL;
++}
++
++struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *res, *new = NULL;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	spin_lock(&inode->i_lock);
++	res = __nfs_find_lock_context(ctx);
++	if (res == NULL) {
++		spin_unlock(&inode->i_lock);
++		new = kmalloc(sizeof(*new), GFP_KERNEL);
++		if (new == NULL)
++			return NULL;
++		nfs_init_lock_context(new);
++		spin_lock(&inode->i_lock);
++		res = __nfs_find_lock_context(ctx);
++		if (res == NULL) {
++			list_add_tail(&new->list, &ctx->lock_context.list);
++			new->open_context = ctx;
++			res = new;
++			new = NULL;
++		}
++	}
++	spin_unlock(&inode->i_lock);
++	kfree(new);
++	return res;
++}
++
++void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
++{
++	struct nfs_open_context *ctx = l_ctx->open_context;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
++		return;
++	list_del(&l_ctx->list);
++	spin_unlock(&inode->i_lock);
++	kfree(l_ctx);
++}
++
+ /**
+  * nfs_close_context - Common close_context() routine NFSv2/v3
+  * @ctx: pointer to context
+@@ -566,11 +629,11 @@ static struct nfs_open_context *alloc_nf
+ 		path_get(&ctx->path);
+ 		ctx->cred = get_rpccred(cred);
+ 		ctx->state = NULL;
+-		ctx->lockowner = current->files;
+ 		ctx->flags = 0;
+ 		ctx->error = 0;
+ 		ctx->dir_cookie = 0;
+-		atomic_set(&ctx->count, 1);
++		nfs_init_lock_context(&ctx->lock_context);
++		ctx->lock_context.open_context = ctx;
+ 	}
+ 	return ctx;
+ }
+@@ -578,15 +641,16 @@ static struct nfs_open_context *alloc_nf
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ 	if (ctx != NULL)
+-		atomic_inc(&ctx->count);
++		atomic_inc(&ctx->lock_context.count);
+ 	return ctx;
+ }
++EXPORT_SYMBOL(get_nfs_open_context);
+ 
+ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
+ {
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 
+-	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
++	if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ 		return;
+ 	list_del(&ctx->list);
+ 	spin_unlock(&inode->i_lock);
+@@ -933,6 +997,7 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->time_start = jiffies;
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
++EXPORT_SYMBOL(nfs_fattr_init);
+ 
+ struct nfs_fattr *nfs_alloc_fattr(void)
+ {
+@@ -1142,6 +1207,14 @@ static int nfs_update_inode(struct inode
+ 		server->fsid = fattr->fsid;
+ 
+ 	/*
++	 * file needs layout commit, server attributes may be stale
++	 */
++	if (layoutcommit_needed(nfsi) && nfsi->change_attr >= fattr->change_attr) {
++		dprintk("NFS: %s: layoutcommit is needed for file %s/%ld\n",
++			__func__, inode->i_sb->s_id, inode->i_ino);
++		return 0;
++	}
++	/*
+ 	 * Update the read time so we don't revalidate too often.
+ 	 */
+ 	nfsi->read_cache_jiffies = fattr->time_start;
+@@ -1340,9 +1413,10 @@ static int nfs_update_inode(struct inode
+  */
+ void nfs4_clear_inode(struct inode *inode)
+ {
++	pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	/* If we are holding a delegation, return it! */
+ 	nfs_inode_return_delegation_noreclaim(inode);
+-	/* First call standard NFS clear_inode() code */
+ 	nfs_clear_inode(inode);
+ }
+ #endif
+@@ -1367,7 +1441,10 @@ struct inode *nfs_alloc_inode(struct sup
+ 
+ void nfs_destroy_inode(struct inode *inode)
+ {
+-	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
++	struct nfs_inode *nfsi = NFS_I(inode);
++
++	pnfs_destroy_layout(nfsi);
++	kmem_cache_free(nfs_inode_cachep, nfsi);
+ }
+ 
+ static inline void nfs4_init_once(struct nfs_inode *nfsi)
+@@ -1377,6 +1454,11 @@ static inline void nfs4_init_once(struct
+ 	nfsi->delegation = NULL;
+ 	nfsi->delegation_state = 0;
+ 	init_rwsem(&nfsi->rwsem);
++#ifdef CONFIG_NFS_V4_1
++	init_waitqueue_head(&nfsi->lo_waitq);
++	nfsi->pnfs_layout_suspend = 0;
++	nfsi->layout = NULL;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif
+ }
+ 
+@@ -1488,6 +1570,12 @@ static int __init init_nfs_fs(void)
+ 	if (err)
+ 		goto out0;
+ 
++#ifdef CONFIG_NFS_V4_1
++	err = pnfs_initialize();
++	if (err)
++		goto out00;
++#endif /* CONFIG_NFS_V4_1 */
++
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_register(&nfs_rpcstat);
+ #endif
+@@ -1498,6 +1586,10 @@ out:
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++out00:
++	pnfs_uninitialize();
++#endif /* CONFIG_NFS_V4_1 */
+ 	nfs_destroy_directcache();
+ out0:
+ 	nfs_destroy_writepagecache();
+@@ -1531,6 +1623,9 @@ static void __exit exit_nfs_fs(void)
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++	pnfs_uninitialize();
++#endif
+ 	unregister_nfs_fs();
+ 	nfs_fs_proc_exit();
+ 	nfsiod_stop();
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
+@@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
+ 					   struct nfs_fattr *);
+ extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
+ extern int nfs4_check_client_ready(struct nfs_client *clp);
++extern int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++		const struct sockaddr *sa2);
++extern int nfs4_set_client(struct nfs_server *server,
++		const char *hostname,
++		const struct sockaddr *addr,
++		const size_t addrlen,
++		const char *ip_addr,
++		rpc_authflavor_t authflavour,
++		int proto, const struct rpc_timeout *timeparms,
++		u32 minorversion);
+ #ifdef CONFIG_PROC_FS
+ extern int __init nfs_fs_proc_init(void);
+ extern void nfs_fs_proc_exit(void);
+@@ -201,6 +211,8 @@ extern const u32 nfs41_maxwrite_overhead
+ extern struct rpc_procinfo nfs4_procedures[];
+ #endif
+ 
++extern int nfs4_recover_expired_lease(struct nfs_client *clp);
++
+ /* proc.c */
+ void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+ 
+@@ -248,10 +260,31 @@ extern int nfs4_get_rootfh(struct nfs_se
+ #endif
+ 
+ /* read.c */
++extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
++extern int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
+ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+ 
+ /* write.c */
++extern int nfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int pnfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int nfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++			       int how);
++extern int pnfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++				int how, int pnfs);
+ extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
++extern void nfs_mark_list_commit(struct list_head *head);
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+ 		struct page *, struct page *);
+diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
+--- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
+@@ -79,10 +79,48 @@ config NFS_V4_1
+ 	depends on NFS_V4 && EXPERIMENTAL
+ 	help
+ 	  This option enables support for minor version 1 of the NFSv4 protocol
+-	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
++	  (RFC5661) including support for the parallel NFS (pNFS) features
++	  in the kernel's NFS client.
+ 
+ 	  Unless you're an NFS developer, say N.
+ 
++config PNFS_FILE_LAYOUT
++	tristate "NFS client support for the pNFS nfs-files layout (DEVELOPER ONLY)"
++	depends on NFS_FS && NFS_V4_1
++	default y
++	help
++	  This option enables support for the pNFS nfs-files layout.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFS_OBJLAYOUT
++	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
++	help
++	  Say M here if you want your pNFS client to support the Objects Layout Driver.
++	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
++	  upper level driver (SCSI_OSD_ULD).
++
++	  If unsure, say N.
++
++config PNFS_PANLAYOUT
++	tristate "Provide support for the Panasas OSD Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on PNFS_OBJLAYOUT
++	help
++	  Say M or y here if you want your pNFS client to support the Panasas OSD Layout Driver.
++
++	  If unsure, say N.
++
++config PNFS_BLOCK
++	tristate "Provide a pNFS block client (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1
++	select MD
++	select BLK_DEV_DM
++	help
++	  Say M or y here if you want your pNfs client to support the block protocol
++
++	  If unsure, say N.
++
+ config ROOT_NFS
+ 	bool "Root file system on NFS"
+ 	depends on NFS_FS=y && IP_PNP
+diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
+--- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
+@@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
+ 			   delegation.o idmap.o \
+ 			   callback.o callback_xdr.o callback_proc.o \
+ 			   nfs4namespace.o
++nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
+ nfs-$(CONFIG_SYSCTL) += sysctl.o
+ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
++
++obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
++nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
++
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
++obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
+@@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs3_dir_inode_operations,
+ 	.file_inode_ops	= &nfs3_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs3_proc_get_root,
+ 	.getattr	= nfs3_proc_getattr,
+ 	.setattr	= nfs3_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
+@@ -0,0 +1,765 @@
++/*
++ *  linux/fs/nfs/nfs4filelayout.c
++ *
++ *  Module for the pnfs nfs4 file layout driver.
++ *  Defines all I/O and Policy interface operations, plus code
++ *  to register itself with the pNFS client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/time.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++
++#include "nfs4filelayout.h"
++#include "nfs4_fs.h"
++#include "internal.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dean Hildebrand <dhildebz@eecs.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4 file layout driver");
++
++/* Callback operations to the pNFS client */
++struct pnfs_client_operations *pnfs_callback_ops;
++
++/* Forward declaration */
++struct layoutdriver_io_operations filelayout_io_operations;
++
++int
++filelayout_initialize_mountpoint(struct nfs_server *nfss,
++				 const struct nfs_fh *mntfh)
++{
++	int status = nfs4_alloc_init_deviceid_cache(nfss->nfs_client,
++						nfs4_fl_free_deviceid_callback);
++	if (status) {
++		printk(KERN_WARNING "%s: deviceid cache could not be "
++			"initialized\n", __func__);
++		return status;
++	}
++	dprintk("%s: deviceid cache has been initialized successfully\n",
++		__func__);
++	return 0;
++}
++
++/* Uninitialize a mountpoint by destroying its device list */
++int
++filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
++{
++	dprintk("--> %s\n", __func__);
++
++	if (nfss->pnfs_curr_ld && nfss->nfs_client->cl_devid_cache)
++		nfs4_put_deviceid_cache(nfss->nfs_client);
++	return 0;
++}
++
++/* This function is used by the layout driver to calculate the
++ * offset of the file on the dserver based on whether the
++ * layout type is STRIPE_DENSE or STRIPE_SPARSE
++ */
++static loff_t
++filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++
++	switch (flseg->stripe_type) {
++	case STRIPE_SPARSE:
++		return offset;
++
++	case STRIPE_DENSE:
++	{
++		u32 stripe_width;
++		u64 tmp, off;
++		u32 unit = flseg->stripe_unit;
++
++		stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
++		tmp = off = offset - flseg->pattern_offset;
++		do_div(tmp, stripe_width);
++		return tmp * unit + do_div(off, unit);
++	}
++	default:
++		BUG();
++	}
++
++	/* We should never get here... just to stop the gcc warning */
++	return 0;
++}
++
++/*
++ * Call ops for the async read/write cases
++ * In the case of dense layouts, the offset needs to be reset to its
++ * original value.
++ */
++static void filelayout_read_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	if (rdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			rdata->args.offset, rdata->fldata.orig_offset);
++		rdata->args.offset = rdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	rdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_read_release(void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	put_lseg(rdata->pdata.lseg);
++	rdata->pdata.lseg = NULL;
++	rdata->pdata.call_ops->rpc_release(data);
++}
++
++static void filelayout_write_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	if (wdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			wdata->args.offset, wdata->fldata.orig_offset);
++		wdata->args.offset = wdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_write_release(void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	put_lseg(wdata->pdata.lseg);
++	wdata->pdata.lseg = NULL;
++	wdata->pdata.call_ops->rpc_release(data);
++}
++
++struct rpc_call_ops filelayout_read_call_ops = {
++	.rpc_call_prepare = nfs_read_prepare,
++	.rpc_call_done = filelayout_read_call_done,
++	.rpc_release = filelayout_read_release,
++};
++
++struct rpc_call_ops filelayout_write_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_write_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/* Perform sync or async reads.
++ *
++ * An optimization for the NFS file layout driver
++ * allows the original read/write data structs to be passed in the
++ * last argument.
++ *
++ * TODO: join with write_pagelist?
++ */
++static enum pnfs_try_status
++filelayout_read_pagelist(struct nfs_read_data *data, unsigned nr_pages)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
++		__func__, data->inode->i_ino, nr_pages,
++		data->args.pgbase, (size_t)data->args.count, offset);
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s USE DS:ip %x %s\n", __func__,
++		htonl(ds->ds_ip_addr), ds->r_addr);
++
++	/* just try the first data server for the index..*/
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++
++	/*
++	 * Now get the file offset on the dserver
++	 * Set the read offset to this offset, and
++	 * save the original offset in orig_offset
++	 * In the case of aync reads, the offset will be reset in the
++	 * call_ops->rpc_call_done() routine.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/* Perform an asynchronous read */
++	nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
++			  &filelayout_read_call_ops);
++
++	data->pdata.pnfs_error = 0;
++
++	return PNFS_ATTEMPTED;
++}
++
++/* Perform async writes. */
++static enum pnfs_try_status
++filelayout_write_pagelist(struct nfs_write_data *data, unsigned nr_pages, int sync)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu %s\n", __func__,
++		data->inode->i_ino, sync, (size_t) data->args.count, offset,
++		htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
++
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++	/*
++	 * Get the file offset on the dserver. Set the write offset to
++	 * this offset and save the original offset.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/*
++	 * Perform an asynchronous write The offset will be reset in the
++	 * call_ops->rpc_call_done() routine
++	 */
++	nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
++			   &filelayout_write_call_ops, sync);
++
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++}
++
++/*
++ * Create a filelayout layout structure and return it.  The pNFS client
++ * will use the pnfs_layout_type type to refer to the layout for this
++ * inode from now on.
++ */
++static struct pnfs_layout_type *
++filelayout_alloc_layout(struct inode *inode)
++{
++	struct nfs4_filelayout *flp;
++
++	dprintk("NFS_FILELAYOUT: allocating layout\n");
++	flp =  kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
++	return flp ? &flp->fl_layout : NULL;
++}
++
++/* Free a filelayout layout structure */
++static void
++filelayout_free_layout(struct pnfs_layout_type *lo)
++{
++	dprintk("NFS_FILELAYOUT: freeing layout\n");
++	kfree(FILE_LO(lo));
++}
++
++/*
++ * filelayout_check_layout()
++ *
++ * Make sure layout segment parameters are sane WRT the device.
++ *
++ * Notes:
++ * 1) current code insists that # stripe index = # data servers in ds_list
++ *    which is wrong.
++ * 2) pattern_offset is ignored and must == 0 which is wrong;
++ * 3) the pattern_offset needs to be a mutliple of the stripe unit.
++ * 4) stripe unit is multiple of page size
++ */
++
++static int
++filelayout_check_layout(struct pnfs_layout_type *lo,
++			struct pnfs_layout_segment *lseg)
++{
++	struct nfs4_filelayout_segment *fl = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	int status = -EINVAL;
++	struct nfs_server *nfss = NFS_SERVER(PNFS_INODE(lo));
++
++	dprintk("--> %s\n", __func__);
++	dsaddr = nfs4_pnfs_device_item_find(nfss->nfs_client, &fl->dev_id);
++	if (dsaddr == NULL) {
++		dsaddr = get_device_info(PNFS_INODE(lo), &fl->dev_id);
++		if (dsaddr == NULL) {
++			dprintk("%s NO device for dev_id %s\n",
++				__func__, deviceid_fmt(&fl->dev_id));
++			goto out;
++		}
++	}
++	if (fl->first_stripe_index < 0 ||
++	    fl->first_stripe_index > dsaddr->stripe_count) {
++		dprintk("%s Bad first_stripe_index %d\n",
++				__func__, fl->first_stripe_index);
++		goto out;
++	}
++
++	if (fl->pattern_offset != 0) {
++		dprintk("%s Unsupported no-zero pattern_offset %Ld\n",
++				__func__, fl->pattern_offset);
++		goto out;
++	}
++
++	if (fl->stripe_unit % PAGE_SIZE) {
++		dprintk("%s Stripe unit (%u) not page aligned\n",
++			__func__, fl->stripe_unit);
++		goto out;
++	}
++
++	/* XXX only support SPARSE packing. Don't support use MDS open fh */
++	if (!(fl->num_fh == 1 || fl->num_fh == dsaddr->ds_num)) {
++		dprintk("%s num_fh %u not equal to 1 or ds_num %u\n",
++			__func__, fl->num_fh, dsaddr->ds_num);
++		goto out;
++	}
++
++	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
++		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
++			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
++			nfss->wsize);
++	}
++
++	/* reference the device */
++	nfs4_set_layout_deviceid(lseg, &dsaddr->deviceid);
++
++	status = 0;
++out:
++	dprintk("--> %s returns %d\n", __func__, status);
++	return status;
++}
++
++static void _filelayout_free_lseg(struct pnfs_layout_segment *lseg);
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl);
++
++/* Decode layout and store in layoutid.  Overwrite any existing layout
++ * information for this file.
++ */
++static int
++filelayout_set_layout(struct nfs4_filelayout *flo,
++		      struct nfs4_filelayout_segment *fl,
++		      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t nfl_util;
++	int i;
++
++	dprintk("%s: set_layout_map Begin\n", __func__);
++
++	memcpy(&fl->dev_id, p, NFS4_PNFS_DEVICEID4_SIZE);
++	p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++	nfl_util = be32_to_cpup(p++);
++	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
++		fl->commit_through_mds = 1;
++	if (nfl_util & NFL4_UFLG_DENSE)
++		fl->stripe_type = STRIPE_DENSE;
++	else
++		fl->stripe_type = STRIPE_SPARSE;
++	fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
++
++	if (!flo->stripe_unit)
++		flo->stripe_unit = fl->stripe_unit;
++	else if (flo->stripe_unit != fl->stripe_unit) {
++		printk(KERN_NOTICE "%s: updating strip_unit from %u to %u\n",
++			__func__, flo->stripe_unit, fl->stripe_unit);
++		flo->stripe_unit = fl->stripe_unit;
++	}
++
++	fl->first_stripe_index = be32_to_cpup(p++);
++	p = xdr_decode_hyper(p, &fl->pattern_offset);
++	fl->num_fh = be32_to_cpup(p++);
++
++	dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu dev_id %s\n",
++		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
++		fl->pattern_offset, deviceid_fmt(&fl->dev_id));
++
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE) {
++		fl->fh_array = vmalloc(fl->num_fh * sizeof(struct nfs_fh));
++		if (fl->fh_array)
++			memset(fl->fh_array, 0,
++				fl->num_fh * sizeof(struct nfs_fh));
++	} else {
++		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh),
++					GFP_KERNEL);
++       }
++	if (!fl->fh_array)
++		return -ENOMEM;
++
++	for (i = 0; i < fl->num_fh; i++) {
++		/* fh */
++		fl->fh_array[i].size = be32_to_cpup(p++);
++		if (sizeof(struct nfs_fh) < fl->fh_array[i].size) {
++			printk(KERN_ERR "Too big fh %d received %d\n",
++				i, fl->fh_array[i].size);
++			/* Layout is now invalid, pretend it doesn't exist */
++			filelayout_free_fh_array(fl);
++			fl->num_fh = 0;
++			break;
++		}
++		memcpy(fl->fh_array[i].data, p, fl->fh_array[i].size);
++		p += XDR_QUADLEN(fl->fh_array[i].size);
++		dprintk("DEBUG: %s: fh len %d\n", __func__,
++					fl->fh_array[i].size);
++	}
++
++	return 0;
++}
++
++static struct pnfs_layout_segment *
++filelayout_alloc_lseg(struct pnfs_layout_type *layoutid,
++		      struct nfs4_pnfs_layoutget_res *lgr)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++	struct pnfs_layout_segment *lseg;
++	int rc;
++
++	dprintk("--> %s\n", __func__);
++	lseg = kzalloc(sizeof(struct pnfs_layout_segment) +
++		       sizeof(struct nfs4_filelayout_segment), GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++
++	rc = filelayout_set_layout(flo, LSEG_LD_DATA(lseg), lgr);
++
++	if (rc != 0 || filelayout_check_layout(layoutid, lseg)) {
++		_filelayout_free_lseg(lseg);
++		lseg = NULL;
++	}
++	return lseg;
++}
++
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
++{
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE)
++		vfree(fl->fh_array);
++	else
++		kfree(fl->fh_array);
++
++	fl->fh_array = NULL;
++}
++
++static void
++_filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	filelayout_free_fh_array(LSEG_LD_DATA(lseg));
++	kfree(lseg);
++}
++
++static void
++filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("--> %s\n", __func__);
++	nfs4_unset_layout_deviceid(lseg, lseg->deviceid,
++				   nfs4_fl_free_deviceid_callback);
++	_filelayout_free_lseg(lseg);
++}
++
++/* Allocate a new nfs_write_data struct and initialize */
++static struct nfs_write_data *
++filelayout_clone_write_data(struct nfs_write_data *old)
++{
++	static struct nfs_write_data *new;
++
++	new = nfs_commitdata_alloc();
++	if (!new)
++		goto out;
++	kref_init(&new->refcount);
++	new->parent      = old;
++	kref_get(&old->refcount);
++	new->inode       = old->inode;
++	new->cred        = old->cred;
++	new->args.offset = 0;
++	new->args.count  = 0;
++	new->res.count   = 0;
++	new->res.fattr   = &new->fattr;
++	nfs_fattr_init(&new->fattr);
++	new->res.verf    = &new->verf;
++	new->args.context = get_nfs_open_context(old->args.context);
++	new->pdata.lseg = NULL;
++	new->pdata.call_ops = old->pdata.call_ops;
++	new->pdata.how = old->pdata.how;
++out:
++	return new;
++}
++
++static void filelayout_commit_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static struct rpc_call_ops filelayout_commit_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_commit_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/*
++ * Execute a COMMIT op to the MDS or to each data server on which a page
++ * in 'pages' exists.
++ * Invoke the pnfs_commit_complete callback.
++ */
++enum pnfs_try_status
++filelayout_commit(struct nfs_write_data *data, int sync)
++{
++	LIST_HEAD(head);
++	struct nfs_page *req;
++	loff_t file_offset = 0;
++	u16 idx, i;
++	struct list_head **ds_page_list = NULL;
++	u16 *indices_used;
++	int num_indices_seen = 0;
++	const struct rpc_call_ops *call_ops;
++	struct rpc_clnt *clnt;
++	struct nfs_write_data **clone_list = NULL;
++	struct nfs_write_data *dsdata;
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("%s data %p sync %d\n", __func__, data, sync);
++
++	/* Alloc room for both in one go */
++	ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
++			       (sizeof(u16) + sizeof(struct list_head *)),
++			       GFP_KERNEL);
++	if (!ds_page_list)
++		goto mem_error;
++	indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
++	/*
++	 * Sort pages based on which ds to send to.
++	 * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
++	 * Note we are assuming there is only a single lseg in play.
++	 * When that is not true, we could first sort on lseg, then
++	 * sort within each as we do here.
++	 */
++	while (!list_empty(&data->pages)) {
++		req = nfs_list_entry(data->pages.next);
++		nfs_list_remove_request(req);
++		if (!req->wb_lseg ||
++		    ((struct nfs4_filelayout_segment *)
++		     LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
++			idx = NFS4_PNFS_MAX_MULTI_CNT;
++		else {
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
++		}
++		if (ds_page_list[idx]) {
++			/* Already seen this idx */
++			list_add(&req->wb_list, ds_page_list[idx]);
++		} else {
++			/* New idx not seen so far */
++			list_add_tail(&req->wb_list, &head);
++			indices_used[num_indices_seen++] = idx;
++		}
++		ds_page_list[idx] = &req->wb_list;
++	}
++	/* Once created, clone must be released via call_op */
++	clone_list = kzalloc(num_indices_seen *
++			     sizeof(struct nfs_write_data *), GFP_KERNEL);
++	if (!clone_list)
++		goto mem_error;
++	for (i = 0; i < num_indices_seen - 1; i++) {
++		clone_list[i] = filelayout_clone_write_data(data);
++		if (!clone_list[i])
++			goto mem_error;
++	}
++	clone_list[i] = data;
++	/*
++	 * Now send off the RPCs to each ds.  Note that it is important
++	 * that any RPC to the MDS be sent last (or at least after all
++	 * clones have been made.)
++	 */
++	for (i = 0; i < num_indices_seen; i++) {
++		dsdata = clone_list[i];
++		idx = indices_used[i];
++		list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
++		if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
++			call_ops = data->pdata.call_ops;;
++			clnt = NFS_CLIENT(dsdata->inode);
++			ds = NULL;
++		} else {
++			struct nfs_fh *fh;
++
++			call_ops = &filelayout_commit_call_ops;
++			req = nfs_list_entry(dsdata->pages.next);
++			ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
++			if (!ds) {
++				/* Trigger retry of this chunk through MDS */
++				dsdata->task.tk_status = -EIO;
++				data->pdata.call_ops->rpc_release(dsdata);
++				continue;
++			}
++			clnt = ds->ds_clp->cl_rpcclient;
++			dsdata->fldata.ds_nfs_client = ds->ds_clp;
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
++			if (fh)
++				dsdata->args.fh = fh;
++		}
++		dprintk("%s: Initiating commit: %llu USE DS:\n",
++			__func__, file_offset);
++		print_ds(ds);
++
++		/* Send COMMIT to data server */
++		nfs_initiate_commit(dsdata, clnt, call_ops, sync);
++	}
++	kfree(clone_list);
++	kfree(ds_page_list);
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++
++ mem_error:
++	if (clone_list) {
++		for (i = 0; i < num_indices_seen - 1; i++) {
++			if (!clone_list[i])
++				break;
++			data->pdata.call_ops->rpc_release(clone_list[i]);
++		}
++		kfree(clone_list);
++	}
++	kfree(ds_page_list);
++	/* One of these will be empty, but doesn't hurt to do both */
++	nfs_mark_list_commit(&head);
++	nfs_mark_list_commit(&data->pages);
++	data->pdata.call_ops->rpc_release(data);
++	return PNFS_ATTEMPTED;
++}
++
++/* Return the stripesize for the specified file */
++ssize_t
++filelayout_get_stripesize(struct pnfs_layout_type *layoutid)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++
++	return flo->stripe_unit;
++}
++
++/*
++ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
++ *
++ * return 1 :  coalesce page
++ * return 0 :  don't coalesce page
++ */
++int
++filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++		   struct nfs_page *req)
++{
++	u64 p_stripe, r_stripe;
++
++	if (pgio->pg_boundary == 0)
++		return 1;
++	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
++	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
++
++	do_div(p_stripe, pgio->pg_boundary);
++	do_div(r_stripe, pgio->pg_boundary);
++
++	return (p_stripe == r_stripe);
++}
++
++struct layoutdriver_io_operations filelayout_io_operations = {
++	.commit                  = filelayout_commit,
++	.read_pagelist           = filelayout_read_pagelist,
++	.write_pagelist          = filelayout_write_pagelist,
++	.alloc_layout            = filelayout_alloc_layout,
++	.free_layout             = filelayout_free_layout,
++	.alloc_lseg              = filelayout_alloc_lseg,
++	.free_lseg               = filelayout_free_lseg,
++	.initialize_mountpoint   = filelayout_initialize_mountpoint,
++	.uninitialize_mountpoint = filelayout_uninitialize_mountpoint,
++};
++
++struct layoutdriver_policy_operations filelayout_policy_operations = {
++	.flags                 = PNFS_USE_RPC_CODE,
++	.get_stripesize        = filelayout_get_stripesize,
++	.pg_test               = filelayout_pg_test,
++};
++
++struct pnfs_layoutdriver_type filelayout_type = {
++	.id = LAYOUT_NFSV4_1_FILES,
++	.name = "LAYOUT_NFSV4_1_FILES",
++	.ld_io_ops = &filelayout_io_operations,
++	.ld_policy_ops = &filelayout_policy_operations,
++};
++
++static int __init nfs4filelayout_init(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
++	       __func__);
++
++	/*
++	 * Need to register file_operations struct with global list to indicate
++	 * that NFS4 file layout is a possible pNFS I/O module
++	 */
++	pnfs_callback_ops = pnfs_register_layoutdriver(&filelayout_type);
++
++	return 0;
++}
++
++static void __exit nfs4filelayout_exit(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
++	       __func__);
++
++	/* Unregister NFS4 file layout driver with pNFS client*/
++	pnfs_unregister_layoutdriver(&filelayout_type);
++}
++
++module_init(nfs4filelayout_init);
++module_exit(nfs4filelayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
+@@ -0,0 +1,636 @@
++/*
++ *  linux/fs/nfs/nfs4filelayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *  Garth Goodson   <Garth.Goodson@netapp.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/hash.h>
++
++#include <linux/nfs4.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_xdr.h>
++
++#include <asm/div64.h>
++
++#include <linux/utsname.h>
++#include <linux/vmalloc.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
++#include "nfs4filelayout.h"
++#include "internal.h"
++#include "nfs4_fs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
++
++DEFINE_SPINLOCK(nfs4_ds_cache_lock);
++static LIST_HEAD(nfs4_data_server_cache);
++
++void
++print_ds(struct nfs4_pnfs_ds *ds)
++{
++	if (ds == NULL) {
++		dprintk("%s NULL device \n", __func__);
++		return;
++	}
++	dprintk("        ip_addr %x\n", ntohl(ds->ds_ip_addr));
++	dprintk("        port %hu\n", ntohs(ds->ds_port));
++	dprintk("        client %p\n", ds->ds_clp);
++	dprintk("        ref count %d\n", atomic_read(&ds->ds_count));
++	if (ds->ds_clp)
++		dprintk("        cl_exchange_flags %x\n",
++					    ds->ds_clp->cl_exchange_flags);
++	dprintk("        ip:port %s\n", ds->r_addr);
++}
++
++void
++print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	int i;
++
++	dprintk("%s dsaddr->ds_num %d\n", __func__,
++		dsaddr->ds_num);
++	for (i = 0; i < dsaddr->ds_num; i++)
++		print_ds(dsaddr->ds_list[i]);
++}
++
++/* Debugging function assuming a 64bit major/minor split of the deviceid */
++char *
++deviceid_fmt(const struct pnfs_deviceid *dev_id)
++{
++	static char buf[17];
++	uint32_t *p = (uint32_t *)dev_id->data;
++	uint64_t major, minor;
++
++	p = xdr_decode_hyper(p, &major);
++	p = xdr_decode_hyper(p, &minor);
++
++	sprintf(buf, "%08llu %08llu", major, minor);
++	return buf;
++}
++
++/* nfs4_ds_cache_lock is held */
++static inline struct nfs4_pnfs_ds *
++_data_server_lookup(u32 ip_addr, u32 port)
++{
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
++			ntohl(ip_addr), ntohs(port));
++
++	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
++		if (ds->ds_ip_addr == ip_addr &&
++		    ds->ds_port == port) {
++			return ds;
++		}
++	}
++	return NULL;
++}
++
++/* Create an rpc to the data server defined in 'dev_list' */
++static int
++nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
++{
++	struct nfs_server	*tmp;
++	struct sockaddr_in	sin;
++	struct rpc_clnt 	*mds_clnt = mds_srv->client;
++	struct nfs_client	*clp = mds_srv->nfs_client;
++	struct sockaddr		*mds_addr;
++	int err = 0;
++
++	dprintk("--> %s ip:port %s au_flavor %d\n", __func__,
++		ds->r_addr, mds_clnt->cl_auth->au_flavor);
++
++	sin.sin_family = AF_INET;
++	sin.sin_addr.s_addr = ds->ds_ip_addr;
++	sin.sin_port = ds->ds_port;
++
++	/*
++	 * If this DS is also the MDS, use the MDS session only if the
++	 * MDS exchangeid flags show the EXCHGID4_FLAG_USE_PNFS_DS pNFS role.
++	 */
++	mds_addr = (struct sockaddr *)&clp->cl_addr;
++	if (nfs_sockaddr_cmp((struct sockaddr *)&sin, mds_addr)) {
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++			printk(KERN_INFO "ip:port %s is not a pNFS Data "
++				"Server\n", ds->r_addr);
++			err = -ENODEV;
++		} else {
++			atomic_inc(&clp->cl_count);
++			ds->ds_clp = clp;
++			dprintk("%s Using MDS Session for DS\n", __func__);
++		}
++		goto out;
++	}
++
++	/* Temporay server for nfs4_set_client */
++	tmp = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
++	if (!tmp)
++		goto out;
++
++	/*
++	 * Set a retrans, timeout interval, and authflavor equual to the MDS
++	 * values. Use the MDS nfs_client cl_ipaddr field so as to use the
++	 * same co_ownerid as the MDS.
++	 */
++	err = nfs4_set_client(tmp,
++			      mds_srv->nfs_client->cl_hostname,
++			      (struct sockaddr *)&sin,
++			      sizeof(struct sockaddr),
++			      mds_srv->nfs_client->cl_ipaddr,
++			      mds_clnt->cl_auth->au_flavor,
++			      IPPROTO_TCP,
++			      mds_clnt->cl_xprt->timeout,
++			      1 /* minorversion */);
++	if (err < 0)
++		goto out_free;
++
++	clp = tmp->nfs_client;
++
++	/* Ask for only the EXCHGID4_FLAG_USE_PNFS_DS pNFS role */
++	dprintk("%s EXCHANGE_ID for clp %p\n", __func__, clp);
++	clp->cl_exchange_flags = EXCHGID4_FLAG_USE_PNFS_DS;
++
++	err = nfs4_recover_expired_lease(clp);
++	if (!err)
++		err = nfs4_check_client_ready(clp);
++	if (err)
++		goto out_put;
++
++	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++		printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
++			ds->r_addr);
++		err = -ENODEV;
++		goto out_put;
++	}
++	/*
++	 * Mask the (possibly) returned EXCHGID4_FLAG_USE_PNFS_MDS pNFS role
++	 * The is_ds_only_session depends on this.
++	 */
++	clp->cl_exchange_flags &= ~EXCHGID4_FLAG_USE_PNFS_MDS;
++	/*
++	 * Set DS lease equal to the MDS lease, renewal is scheduled in
++	 * create_session
++	 */
++	spin_lock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
++	spin_unlock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_last_renewal = jiffies;
++
++	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
++	ds->ds_clp = clp;
++
++	dprintk("%s: ip=%x, port=%hu, rpcclient %p\n", __func__,
++				ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
++				clp->cl_rpcclient);
++out_free:
++	kfree(tmp);
++out:
++	dprintk("%s Returns %d\n", __func__, err);
++	return err;
++out_put:
++	nfs_put_client(clp);
++	goto out_free;
++}
++
++static void
++destroy_ds(struct nfs4_pnfs_ds *ds)
++{
++	dprintk("--> %s\n", __func__);
++	print_ds(ds);
++
++	if (ds->ds_clp)
++		nfs_put_client(ds->ds_clp);
++	kfree(ds);
++}
++
++static void
++nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	struct nfs4_pnfs_ds *ds;
++	int i;
++
++	dprintk("%s: device id=%s\n", __func__,
++		deviceid_fmt(&dsaddr->deviceid.de_id));
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		ds = dsaddr->ds_list[i];
++		if (ds != NULL) {
++			if (atomic_dec_and_lock(&ds->ds_count,
++						&nfs4_ds_cache_lock)) {
++				list_del_init(&ds->ds_node);
++				spin_unlock(&nfs4_ds_cache_lock);
++				destroy_ds(ds);
++			}
++		}
++	}
++	kfree(dsaddr->stripe_indices);
++	kfree(dsaddr);
++}
++
++void
++nfs4_fl_free_deviceid_callback(struct kref *kref)
++{
++	struct nfs4_deviceid *device =
++		container_of(kref, struct nfs4_deviceid, de_kref);
++	struct nfs4_file_layout_dsaddr *dsaddr =
++		container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
++
++	nfs4_fl_free_deviceid(dsaddr);
++}
++
++static void
++nfs4_pnfs_ds_add(struct inode *inode, struct nfs4_pnfs_ds **dsp,
++		 u32 ip_addr, u32 port, char *r_addr, int len)
++{
++	struct nfs4_pnfs_ds *tmp_ds, *ds;
++
++	*dsp = NULL;
++
++	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
++	if (!ds)
++		return;
++
++	spin_lock(&nfs4_ds_cache_lock);
++	tmp_ds = _data_server_lookup(ip_addr, port);
++	if (tmp_ds == NULL) {
++		ds->ds_ip_addr = ip_addr;
++		ds->ds_port = port;
++		strncpy(ds->r_addr, r_addr, len);
++		atomic_set(&ds->ds_count, 1);
++		INIT_LIST_HEAD(&ds->ds_node);
++		ds->ds_clp = NULL;
++		list_add(&ds->ds_node, &nfs4_data_server_cache);
++		*dsp = ds;
++		dprintk("%s add new data server ip 0x%x\n", __func__,
++				ds->ds_ip_addr);
++		spin_unlock(&nfs4_ds_cache_lock);
++	} else {
++		atomic_inc(&tmp_ds->ds_count);
++		*dsp = tmp_ds;
++		dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
++				__func__, tmp_ds->ds_ip_addr,
++				atomic_read(&tmp_ds->ds_count));
++		spin_unlock(&nfs4_ds_cache_lock);
++		kfree(ds);
++	}
++}
++
++static struct nfs4_pnfs_ds *
++decode_and_add_ds(uint32_t **pp, struct inode *inode)
++{
++	struct nfs4_pnfs_ds *ds = NULL;
++	char r_addr[29]; /* max size of ip/port string */
++	int len;
++	u32 ip_addr, port;
++	int tmp[6];
++	uint32_t *p = *pp;
++
++	dprintk("%s enter\n", __func__);
++	/* check and skip r_netid */
++	len = be32_to_cpup(p++);
++	/* "tcp" */
++	if (len != 3) {
++		printk("%s: ERROR: non TCP r_netid len %d\n",
++			__func__, len);
++		goto out_err;
++	}
++	/*
++	 * Read the bytes into a temporary buffer
++	 * XXX: should probably sanity check them
++	 */
++	tmp[0] = be32_to_cpup(p++);
++
++	len = be32_to_cpup(p++);
++	if (len >= sizeof(r_addr)) {
++		printk("%s: ERROR: Device ip/port too long (%d)\n",
++			__func__, len);
++		goto out_err;
++	}
++	memcpy(r_addr, p, len);
++	p += XDR_QUADLEN(len);
++	*pp = p;
++	r_addr[len] = '\0';
++	sscanf(r_addr, "%d.%d.%d.%d.%d.%d", &tmp[0], &tmp[1],
++	       &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
++	ip_addr = htonl((tmp[0]<<24) | (tmp[1]<<16) | (tmp[2]<<8) | (tmp[3]));
++	port = htons((tmp[4] << 8) | (tmp[5]));
++
++	nfs4_pnfs_ds_add(inode, &ds, ip_addr, port, r_addr, len);
++
++	dprintk("%s: addr:port string = %s\n", __func__, r_addr);
++	return ds;
++out_err:
++	dprintk("%s returned NULL\n", __func__);
++	return NULL;
++}
++
++/* Decode opaque device data and return the result */
++static struct nfs4_file_layout_dsaddr*
++decode_device(struct inode *ino, struct pnfs_device *pdev)
++{
++	int i, dummy;
++	u32 cnt, num;
++	u8 *indexp;
++	uint32_t *p = (u32 *)pdev->area, *indicesp;
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	/* Get the stripe count (number of stripe index) */
++	cnt = be32_to_cpup(p++);
++	dprintk("%s stripe count  %d\n", __func__, cnt);
++	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
++		printk(KERN_WARNING "%s: stripe count %d greater than "
++		       "supported maximum %d\n", __func__,
++			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
++		goto out_err;
++	}
++
++	/* Check the multipath list count */
++	indicesp = p;
++	p += XDR_QUADLEN(cnt << 2);
++	num = be32_to_cpup(p++);
++	dprintk("%s ds_num %u\n", __func__, num);
++	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
++		printk(KERN_WARNING "%s: multipath count %d greater than "
++			"supported maximum %d\n", __func__,
++			num, NFS4_PNFS_MAX_MULTI_CNT);
++		goto out_err;
++	}
++	dsaddr = kzalloc(sizeof(*dsaddr) +
++			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
++			GFP_KERNEL);
++	if (!dsaddr)
++		goto out_err;
++
++	dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
++	if (!dsaddr->stripe_indices)
++		goto out_err_free;
++
++	dsaddr->stripe_count = cnt;
++	dsaddr->ds_num = num;
++
++	memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id,
++	       NFS4_PNFS_DEVICEID4_SIZE);
++
++	/* Go back an read stripe indices */
++	p = indicesp;
++	indexp = &dsaddr->stripe_indices[0];
++	for (i = 0; i < dsaddr->stripe_count; i++) {
++		dummy = be32_to_cpup(p++);
++		*indexp = dummy; /* bound by NFS4_PNFS_MAX_MULTI_CNT */
++		indexp++;
++	}
++	/* Skip already read multipath list count */
++	p++;
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		int j;
++
++		dummy = be32_to_cpup(p++); /* multipath count */
++		if (dummy > 1) {
++			printk(KERN_WARNING
++			       "%s: Multipath count %d not supported, "
++			       "skipping all greater than 1\n", __func__,
++				dummy);
++		}
++		for (j = 0; j < dummy; j++) {
++			if (j == 0) {
++				dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
++				if (dsaddr->ds_list[i] == NULL)
++					goto out_err_free;
++			} else {
++				u32 len;
++				/* skip extra multipath */
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				continue;
++			}
++		}
++	}
++	nfs4_init_deviceid_node(&dsaddr->deviceid);
++
++	return dsaddr;
++
++out_err_free:
++	nfs4_fl_free_deviceid(dsaddr);
++out_err:
++	dprintk("%s ERROR: returning NULL\n", __func__);
++	return NULL;
++}
++
++/*
++ * Decode the opaque device specified in 'dev'
++ * and add it to the list of available devices.
++ * If the deviceid is already cached, nfs4_add_deviceid will return
++ * a pointer to the cached struct and throw away the new.
++ */
++static struct nfs4_file_layout_dsaddr*
++decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
++{
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	struct nfs4_deviceid *d;
++
++	dsaddr = decode_device(inode, dev);
++	if (!dsaddr) {
++		printk(KERN_WARNING "%s: Could not decode or add device\n",
++			__func__);
++		return NULL;
++	}
++
++	d = nfs4_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
++			      &dsaddr->deviceid);
++
++	return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Retrieve the information for dev_id, add it to the list
++ * of available devices, and return it.
++ */
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id)
++{
++	struct pnfs_device *pdev = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
++	int rc, i;
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
++		__func__, inode, max_resp_sz, max_pages);
++
++	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
++	if (pdev == NULL)
++		return NULL;
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(pdev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set pdev->area */
++	pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!pdev->area)
++		goto out_free;
++
++	memcpy(&pdev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE);
++	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
++	pdev->pages = pages;
++	pdev->pgbase = 0;
++	pdev->pglen = PAGE_SIZE * max_pages;
++	pdev->mincount = 0;
++	/* TODO: Update types when CB_NOTIFY_DEVICEID is available */
++	pdev->dev_notify_types = 0;
++
++	rc = pnfs_callback_ops->nfs_getdeviceinfo(server, pdev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	/*
++	 * Found new device, need to decode it and then add it to the
++	 * list of known devices for this mountpoint.
++	 */
++	dsaddr = decode_and_add_device(inode, pdev);
++out_free:
++	if (pdev->area != NULL)
++		vunmap(pdev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(pdev);
++	dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
++	return dsaddr;
++}
++
++struct nfs4_file_layout_dsaddr *
++nfs4_pnfs_device_item_find(struct nfs_client *clp, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++
++	d = nfs4_find_deviceid(clp->cl_devid_cache, id);
++	dprintk("%s device id (%s) nfs4_deviceid %p\n", __func__,
++		deviceid_fmt(id), d);
++	return (d == NULL) ? NULL :
++		container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
++ * Then: ((res + fsi) % dsaddr->stripe_count)
++ */
++static inline u32
++_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u64 tmp;
++
++	tmp = offset - flseg->pattern_offset;
++	do_div(tmp, flseg->stripe_unit);
++	tmp += flseg->first_stripe_index;
++	return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
++}
++
++u32
++nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	u32 j;
++
++	j = _nfs4_fl_calc_j_index(lseg, offset);
++	return FILE_DSADDR(lseg)->stripe_indices[j];
++}
++
++struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u32 i;
++
++	if (flseg->stripe_type == STRIPE_SPARSE) {
++		if (flseg->num_fh == 1)
++			i = 0;
++		else if (flseg->num_fh == 0)
++			return NULL;
++		else
++			i = nfs4_fl_calc_ds_index(lseg, offset);
++	} else
++		i = _nfs4_fl_calc_j_index(lseg, offset);
++	return &flseg->fh_array[i];
++}
++
++struct nfs4_pnfs_ds *
++nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	dsaddr = FILE_DSADDR(lseg);
++	if (dsaddr->ds_list[ds_idx] == NULL) {
++		printk(KERN_ERR "%s: No data server for device id (%s)!!\n",
++			__func__, deviceid_fmt(&flseg->dev_id));
++		return NULL;
++	}
++
++	if (!dsaddr->ds_list[ds_idx]->ds_clp) {
++		int err;
++
++		err = nfs4_pnfs_ds_create(PNFS_NFS_SERVER(lseg->layout),
++					  dsaddr->ds_list[ds_idx]);
++		if (err) {
++			printk(KERN_ERR "%s nfs4_pnfs_ds_create error %d\n",
++			       __func__, err);
++			return NULL;
++		}
++	}
++	dprintk("%s: dev_id=%s, ds_idx=%u\n",
++		__func__, deviceid_fmt(&flseg->dev_id), ds_idx);
++
++	return dsaddr->ds_list[ds_idx];
++}
++
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
+@@ -0,0 +1,97 @@
++/*
++ *  pnfs_nfs4filelayout.h
++ *
++ *  NFSv4 file layout driver data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_NFS4FILELAYOUT_H
++#define FS_NFS_NFS4FILELAYOUT_H
++
++#include <linux/kref.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
++
++#define NFS4_PNFS_DEV_HASH_BITS 5
++#define NFS4_PNFS_DEV_HASH_SIZE (1 << NFS4_PNFS_DEV_HASH_BITS)
++#define NFS4_PNFS_DEV_HASH_MASK (NFS4_PNFS_DEV_HASH_SIZE - 1)
++
++#define NFS4_PNFS_MAX_STRIPE_CNT 4096
++#define NFS4_PNFS_MAX_MULTI_CNT  64 /* 256 fit into a u8 stripe_index */
++#define NFS4_PNFS_MAX_MULTI_DS   2
++
++#define FILE_DSADDR(lseg) (container_of(lseg->deviceid, \
++					struct nfs4_file_layout_dsaddr, \
++					deviceid))
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++/* Individual ip address */
++struct nfs4_pnfs_ds {
++	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
++	u32 			ds_ip_addr;
++	u32 			ds_port;
++	struct nfs_client	*ds_clp;
++	atomic_t		ds_count;
++	char r_addr[29];
++};
++
++struct nfs4_file_layout_dsaddr {
++	struct nfs4_deviceid	deviceid;
++	u32 			stripe_count;
++	u8			*stripe_indices;
++	u32			ds_num;
++	struct nfs4_pnfs_ds	*ds_list[1];
++};
++
++struct nfs4_pnfs_dev_hlist {
++	rwlock_t		dev_lock;
++	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH_SIZE];
++};
++
++struct nfs4_filelayout_segment {
++	u32 stripe_type;
++	u32 commit_through_mds;
++	u32 stripe_unit;
++	u32 first_stripe_index;
++	u64 pattern_offset;
++	struct pnfs_deviceid dev_id;
++	unsigned int num_fh;
++	struct nfs_fh *fh_array;
++};
++
++struct nfs4_filelayout {
++	struct pnfs_layout_type fl_layout;
++	u32 stripe_unit;
++};
++
++extern struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
++
++static inline struct nfs4_filelayout *
++FILE_LO(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct nfs4_filelayout, fl_layout);
++}
++
++extern struct pnfs_client_operations *pnfs_callback_ops;
++
++extern void nfs4_fl_free_deviceid_callback(struct kref *);
++extern void print_ds(struct nfs4_pnfs_ds *ds);
++char *deviceid_fmt(const struct pnfs_deviceid *dev_id);
++u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset);
++struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
++					u32 ds_idx);
++extern struct nfs4_file_layout_dsaddr *
++nfs4_pnfs_device_item_find(struct nfs_client *, struct pnfs_deviceid *dev_id);
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id);
++
++#endif /* FS_NFS_NFS4FILELAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
+@@ -45,8 +45,28 @@ enum nfs4_client_state {
+ 	NFS4CLNT_RECLAIM_NOGRACE,
+ 	NFS4CLNT_DELEGRETURN,
+ 	NFS4CLNT_SESSION_RESET,
+-	NFS4CLNT_SESSION_DRAINING,
+ 	NFS4CLNT_RECALL_SLOT,
++	NFS4CLNT_LAYOUT_RECALL,
++};
++
++enum nfs4_session_state {
++	NFS4_SESSION_INITING,
++	NFS4_SESSION_DRAINING,
++};
++
++struct nfs4_minor_version_ops {
++	u32	minor_version;
++
++	int	(*call_sync)(struct nfs_server *server,
++			struct rpc_message *msg,
++			struct nfs4_sequence_args *args,
++			struct nfs4_sequence_res *res,
++			int cache_reply);
++	int	(*validate_stateid)(struct nfs_delegation *,
++			const nfs4_stateid *);
++	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
++	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
++	const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ };
+ 
+ /*
+@@ -89,7 +109,6 @@ struct nfs_unique_id {
+  */
+ struct nfs4_state_owner {
+ 	struct nfs_unique_id so_owner_id;
+-	struct nfs_client    *so_client;
+ 	struct nfs_server    *so_server;
+ 	struct rb_node	     so_client_node;
+ 
+@@ -99,7 +118,6 @@ struct nfs4_state_owner {
+ 	atomic_t	     so_count;
+ 	unsigned long	     so_flags;
+ 	struct list_head     so_states;
+-	struct list_head     so_delegations;
+ 	struct nfs_seqid_counter so_seqid;
+ 	struct rpc_sequence  so_sequence;
+ };
+@@ -125,10 +143,20 @@ enum {
+  * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+  */
+ 
++struct nfs4_lock_owner {
++	unsigned int lo_type;
++#define NFS4_ANY_LOCK_TYPE	(0U)
++#define NFS4_FLOCK_LOCK_TYPE	(1U << 0)
++#define NFS4_POSIX_LOCK_TYPE	(1U << 1)
++	union {
++		fl_owner_t posix_owner;
++		pid_t flock_owner;
++	} lo_u;
++};
++
+ struct nfs4_lock_state {
+ 	struct list_head	ls_locks;	/* Other lock stateids */
+ 	struct nfs4_state *	ls_state;	/* Pointer to open state */
+-	fl_owner_t		ls_owner;	/* POSIX lock owner */
+ #define NFS_LOCK_INITIALIZED 1
+ 	int			ls_flags;
+ 	struct nfs_seqid_counter	ls_seqid;
+@@ -136,6 +164,7 @@ struct nfs4_lock_state {
+ 	struct nfs_unique_id	ls_id;
+ 	nfs4_stateid		ls_stateid;
+ 	atomic_t		ls_count;
++	struct nfs4_lock_owner	ls_owner;
+ };
+ 
+ /* bits for nfs4_state->flags */
+@@ -219,22 +248,34 @@ extern int nfs4_open_revalidate(struct i
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
+ 		struct nfs4_fs_locations *fs_locations, struct page *page);
++extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+ 
+-extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
+-extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
+ #if defined(CONFIG_NFS_V4_1)
+-extern int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return server->nfs_client->cl_session;
++}
++
++extern int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task);
+ extern void nfs4_destroy_session(struct nfs4_session *session);
+ extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
++extern int nfs4_proc_exchange_id(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_create_session(struct nfs_client *);
+ extern int nfs4_proc_destroy_session(struct nfs4_session *);
+ extern int nfs4_init_session(struct nfs_server *server);
+ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
+ 		struct nfs_fsinfo *fsinfo);
+ #else /* CONFIG_NFS_v4_1 */
+-static inline int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return NULL;
++}
++
++static inline int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task)
+ {
+@@ -247,12 +288,12 @@ static inline int nfs4_init_session(stru
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+-extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
++extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
+ 
+ extern const u32 nfs4_fattr_bitmap[2];
+ extern const u32 nfs4_statfs_bitmap[2];
+ extern const u32 nfs4_pathconf_bitmap[2];
+-extern const u32 nfs4_fsinfo_bitmap[2];
++extern const u32 nfs4_fsinfo_bitmap[3];
+ extern const u32 nfs4_fs_locations_bitmap[2];
+ 
+ /* nfs4renewd.c */
+@@ -284,7 +325,7 @@ extern void nfs41_handle_sequence_flag_e
+ extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
++extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+ 
+ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+@@ -293,6 +334,7 @@ extern void nfs_increment_lock_seqid(int
+ extern void nfs_release_seqid(struct nfs_seqid *seqid);
+ extern void nfs_free_seqid(struct nfs_seqid *seqid);
+ 
++/* write.c */
+ extern const nfs4_stateid zero_stateid;
+ 
+ /* nfs4xdr.c */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
+@@ -49,12 +49,15 @@
+ #include <linux/mount.h>
+ #include <linux/module.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "delegation.h"
+ #include "internal.h"
+ #include "iostat.h"
+ #include "callback.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PROC
+ 
+@@ -67,7 +70,7 @@ struct nfs4_opendata;
+ static int _nfs4_proc_open(struct nfs4_opendata *data);
+ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
+ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
++static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, struct nfs_client *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+@@ -125,11 +128,16 @@ const u32 nfs4_pathconf_bitmap[2] = {
+ 	0
+ };
+ 
+-const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
++const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
+ 			| FATTR4_WORD0_MAXREAD
+ 			| FATTR4_WORD0_MAXWRITE
+ 			| FATTR4_WORD0_LEASE_TIME,
++#ifdef CONFIG_NFS_V4_1
++			FATTR4_WORD1_FS_LAYOUT_TYPES,
++			FATTR4_WORD2_LAYOUT_BLKSIZE
++#else /* CONFIG_NFS_V4_1 */
+ 			0
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ const u32 nfs4_fs_locations_bitmap[2] = {
+@@ -356,7 +364,7 @@ static void nfs41_check_drain_session_co
+ {
+ 	struct rpc_task *task;
+ 
+-	if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
++	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
+ 		if (task)
+ 			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+@@ -370,12 +378,11 @@ static void nfs41_check_drain_session_co
+ 	complete(&ses->complete);
+ }
+ 
+-static void nfs41_sequence_free_slot(const struct nfs_client *clp,
+-			      struct nfs4_sequence_res *res)
++static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+ {
+ 	struct nfs4_slot_table *tbl;
+ 
+-	tbl = &clp->cl_session->fc_slot_table;
++	tbl = &res->sr_session->fc_slot_table;
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+ 		/* just wake up the next guy waiting since
+ 		 * we may have not consumed a slot after all */
+@@ -385,18 +392,17 @@ static void nfs41_sequence_free_slot(con
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+ 	nfs4_free_slot(tbl, res->sr_slotid);
+-	nfs41_check_drain_session_complete(clp->cl_session);
++	nfs41_check_drain_session_complete(res->sr_session);
+ 	spin_unlock(&tbl->slot_tbl_lock);
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ }
+ 
+-static void nfs41_sequence_done(struct nfs_client *clp,
+-				struct nfs4_sequence_res *res,
+-				int rpc_status)
++static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+ {
+ 	unsigned long timestamp;
+ 	struct nfs4_slot_table *tbl;
+ 	struct nfs4_slot *slot;
++	struct nfs_client *clp;
+ 
+ 	/*
+ 	 * sr_status remains 1 if an RPC level error occurred. The server
+@@ -411,13 +417,16 @@ static void nfs41_sequence_done(struct n
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+ 		goto out;
+ 
++	tbl = &res->sr_session->fc_slot_table;
++	slot = tbl->slots + res->sr_slotid;
++
+ 	/* Check the SEQUENCE operation status */
+-	if (res->sr_status == 0) {
+-		tbl = &clp->cl_session->fc_slot_table;
+-		slot = tbl->slots + res->sr_slotid;
++	switch (res->sr_status) {
++	case 0:
+ 		/* Update the slot's sequence and clientid lease timer */
+ 		++slot->seq_nr;
+ 		timestamp = res->sr_renewal_time;
++		clp = res->sr_session->clp;
+ 		spin_lock(&clp->cl_lock);
+ 		if (time_before(clp->cl_last_renewal, timestamp))
+ 			clp->cl_last_renewal = timestamp;
+@@ -425,11 +434,39 @@ static void nfs41_sequence_done(struct n
+ 		/* Check sequence flags */
+ 		if (atomic_read(&clp->cl_count) > 1)
+ 			nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
++		break;
++	case -NFS4ERR_DELAY:
++		/* The server detected a resend of the RPC call and
++		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
++		 * of RFC5661.
++		 */
++		dprintk("%s: slot=%d seq=%d: Operation in progress\n",
++				__func__, res->sr_slotid, slot->seq_nr);
++		goto out_retry;
++	default:
++		/* Just update the slot sequence no. */
++		++slot->seq_nr;
+ 	}
+ out:
+ 	/* The session may be reset by one of the error handlers. */
+ 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
+-	nfs41_sequence_free_slot(clp, res);
++	nfs41_sequence_free_slot(res);
++	return 1;
++out_retry:
++	rpc_delay(task, NFS4_POLL_RETRY_MAX);
++	rpc_restart_call(task);
++	/* FIXME: rpc_restart_call() should be made to return success/fail */
++	if (RPC_ASSASSINATED(task))
++		goto out;
++	return 0;
++}
++
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	if (res->sr_session == NULL)
++		return 1;
++	return nfs41_sequence_done(task, res);
+ }
+ 
+ /*
+@@ -480,12 +517,11 @@ static int nfs41_setup_sequence(struct n
+ 	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+ 		return 0;
+ 
+-	memset(res, 0, sizeof(*res));
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ 	tbl = &session->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
++	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+ 	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+ 		/*
+ 		 * The state manager will wait until the slot table is empty.
+@@ -525,6 +561,7 @@ static int nfs41_setup_sequence(struct n
+ 	res->sr_session = session;
+ 	res->sr_slotid = slotid;
+ 	res->sr_renewal_time = jiffies;
++	res->sr_status_flags = 0;
+ 	/*
+ 	 * sr_status is only set in decode_sequence, and so will remain
+ 	 * set to 1 if an rpc level failure occurs.
+@@ -533,33 +570,36 @@ static int nfs41_setup_sequence(struct n
+ 	return 0;
+ }
+ 
+-int nfs4_setup_sequence(struct nfs_client *clp,
++int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 			struct nfs4_sequence_args *args,
+ 			struct nfs4_sequence_res *res,
+ 			int cache_reply,
+ 			struct rpc_task *task)
+ {
++	struct nfs4_session *session = nfs4_get_session(server);
+ 	int ret = 0;
+ 
++	if (ds_session)
++		session = ds_session;
++	if (session == NULL) {
++		args->sa_session = NULL;
++		res->sr_session = NULL;
++		goto out;
++	}
++
+ 	dprintk("--> %s clp %p session %p sr_slotid %d\n",
+-		__func__, clp, clp->cl_session, res->sr_slotid);
++		__func__, session->clp, session, res->sr_slotid);
+ 
+-	if (!nfs4_has_session(clp))
+-		goto out;
+-	ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
++	ret = nfs41_setup_sequence(session, args, res, cache_reply,
+ 				   task);
+-	if (ret && ret != -EAGAIN) {
+-		/* terminate rpc task */
+-		task->tk_status = ret;
+-		task->tk_action = NULL;
+-	}
+ out:
+ 	dprintk("<-- %s status=%d\n", __func__, ret);
+ 	return ret;
+ }
+ 
+ struct nfs41_call_sync_data {
+-	struct nfs_client *clp;
++	const struct nfs_server *seq_server;
+ 	struct nfs4_sequence_args *seq_args;
+ 	struct nfs4_sequence_res *seq_res;
+ 	int cache_reply;
+@@ -569,9 +609,9 @@ static void nfs41_call_sync_prepare(stru
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	dprintk("--> %s data->clp->cl_session %p\n", __func__,
+-		data->clp->cl_session);
+-	if (nfs4_setup_sequence(data->clp, data->seq_args,
++	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
++
++	if (nfs4_setup_sequence(data->seq_server, NULL, data->seq_args,
+ 				data->seq_res, data->cache_reply, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -587,7 +627,7 @@ static void nfs41_call_sync_done(struct 
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
++	nfs41_sequence_done(task, data->seq_res);
+ }
+ 
+ struct rpc_call_ops nfs41_call_sync_ops = {
+@@ -600,8 +640,7 @@ struct rpc_call_ops nfs41_call_priv_sync
+ 	.rpc_call_done = nfs41_call_sync_done,
+ };
+ 
+-static int nfs4_call_sync_sequence(struct nfs_client *clp,
+-				   struct rpc_clnt *clnt,
++static int nfs4_call_sync_sequence(struct nfs_server *server,
+ 				   struct rpc_message *msg,
+ 				   struct nfs4_sequence_args *args,
+ 				   struct nfs4_sequence_res *res,
+@@ -611,13 +650,13 @@ static int nfs4_call_sync_sequence(struc
+ 	int ret;
+ 	struct rpc_task *task;
+ 	struct nfs41_call_sync_data data = {
+-		.clp = clp,
++		.seq_server = server,
+ 		.seq_args = args,
+ 		.seq_res = res,
+ 		.cache_reply = cache_reply,
+ 	};
+ 	struct rpc_task_setup task_setup = {
+-		.rpc_client = clnt,
++		.rpc_client = server->client,
+ 		.rpc_message = msg,
+ 		.callback_ops = &nfs41_call_sync_ops,
+ 		.callback_data = &data
+@@ -642,10 +681,15 @@ int _nfs4_call_sync_session(struct nfs_s
+ 			    struct nfs4_sequence_res *res,
+ 			    int cache_reply)
+ {
+-	return nfs4_call_sync_sequence(server->nfs_client, server->client,
+-				       msg, args, res, cache_reply, 0);
++	return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
+ }
+ 
++#else
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	return 1;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ int _nfs4_call_sync(struct nfs_server *server,
+@@ -659,18 +703,9 @@ int _nfs4_call_sync(struct nfs_server *s
+ }
+ 
+ #define nfs4_call_sync(server, msg, args, res, cache_reply) \
+-	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
++	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
+ 			&(res)->seq_res, (cache_reply))
+ 
+-static void nfs4_sequence_done(const struct nfs_server *server,
+-			       struct nfs4_sequence_res *res, int rpc_status)
+-{
+-#ifdef CONFIG_NFS_V4_1
+-	if (nfs4_has_session(server->nfs_client))
+-		nfs41_sequence_done(server->nfs_client, res, rpc_status);
+-#endif /* CONFIG_NFS_V4_1 */
+-}
+-
+ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(dir);
+@@ -745,19 +780,14 @@ static struct nfs4_opendata *nfs4_openda
+ 	p->o_arg.server = server;
+ 	p->o_arg.bitmask = server->attr_bitmask;
+ 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+-	if (flags & O_EXCL) {
+-		if (nfs4_has_persistent_session(server->nfs_client)) {
+-			/* GUARDED */
+-			p->o_arg.u.attrs = &p->attrs;
+-			memcpy(&p->attrs, attrs, sizeof(p->attrs));
+-		} else { /* EXCLUSIVE4_1 */
+-			u32 *s = (u32 *) p->o_arg.u.verifier.data;
+-			s[0] = jiffies;
+-			s[1] = current->pid;
+-		}
+-	} else if (flags & O_CREAT) {
++	if (flags & O_CREAT) {
++		u32 *s;
++
+ 		p->o_arg.u.attrs = &p->attrs;
+ 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
++		s = (u32 *) p->o_arg.u.verifier.data;
++		s[0] = jiffies;
++		s[1] = current->pid;
+ 	}
+ 	p->c_arg.fh = &p->o_res.fh;
+ 	p->c_arg.stateid = &p->o_res.stateid;
+@@ -851,8 +881,10 @@ static void update_open_stateflags(struc
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+ {
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+-	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
++		memcpy(state->stateid.u.data, stateid->u.data,
++		       sizeof(state->stateid.u.data));
++	memcpy(state->open_stateid.u.data, stateid->u.data,
++	       sizeof(state->open_stateid.u.data));
+ 	switch (fmode) {
+ 		case FMODE_READ:
+ 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+@@ -880,7 +912,8 @@ static void __update_open_stateid(struct
+ 	 */
+ 	write_seqlock(&state->seqlock);
+ 	if (deleg_stateid != NULL) {
+-		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
++		memcpy(state->stateid.u.data, deleg_stateid->u.data,
++		       sizeof(state->stateid.u.data));
+ 		set_bit(NFS_DELEGATED_STATE, &state->flags);
+ 	}
+ 	if (open_stateid != NULL)
+@@ -911,7 +944,8 @@ static int update_open_stateid(struct nf
+ 
+ 	if (delegation == NULL)
+ 		delegation = &deleg_cur->stateid;
+-	else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
++	else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
++			NFS4_STATEID_SIZE) != 0)
+ 		goto no_delegation_unlock;
+ 
+ 	nfs_mark_delegation_referenced(deleg_cur);
+@@ -973,7 +1007,8 @@ static struct nfs4_state *nfs4_try_open_
+ 			break;
+ 		}
+ 		/* Save the delegation */
+-		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
++		memcpy(stateid.u.data, delegation->stateid.u.data,
++		       sizeof(stateid.u.data));
+ 		rcu_read_unlock();
+ 		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
+ 		if (ret != 0)
+@@ -1127,10 +1162,13 @@ static int nfs4_open_recover(struct nfs4
+ 	 * Check if we need to update the current stateid.
+ 	 */
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+-	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
++	    memcmp(state->stateid.u.data, state->open_stateid.u.data,
++		   sizeof(state->stateid.u.data)) != 0) {
+ 		write_seqlock(&state->seqlock);
+ 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
++			memcpy(state->stateid.u.data,
++			       state->open_stateid.u.data,
++			       sizeof(state->stateid.u.data));
+ 		write_sequnlock(&state->seqlock);
+ 	}
+ 	return 0;
+@@ -1199,8 +1237,8 @@ static int _nfs4_open_delegation_recall(
+ 	if (IS_ERR(opendata))
+ 		return PTR_ERR(opendata);
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+-	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
+-			sizeof(opendata->o_arg.u.delegation.data));
++	memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
++			sizeof(opendata->o_arg.u.delegation.u.data));
+ 	ret = nfs4_open_recover(opendata, state);
+ 	nfs4_opendata_put(opendata);
+ 	return ret;
+@@ -1258,8 +1296,8 @@ static void nfs4_open_confirm_done(struc
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+-				sizeof(data->o_res.stateid.data));
++		memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
++				sizeof(data->o_res.stateid.u.data));
+ 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 		renew_lease(data->o_res.server, data->timestamp);
+ 		data->rpc_done = 1;
+@@ -1356,13 +1394,13 @@ static void nfs4_open_prepare(struct rpc
+ 	}
+ 	/* Update sequence id. */
+ 	data->o_arg.id = sp->so_owner_id.id;
+-	data->o_arg.clientid = sp->so_client->cl_clientid;
++	data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
+ 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
+ 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
+ 	}
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
++	if (nfs4_setup_sequence(data->o_arg.server, NULL,
+ 				&data->o_arg.seq_args,
+ 				&data->o_res.seq_res, 1, task))
+ 		return;
+@@ -1385,8 +1423,8 @@ static void nfs4_open_done(struct rpc_ta
+ 
+ 	data->rpc_status = task->tk_status;
+ 
+-	nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->o_res.seq_res))
++		return;
+ 
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+@@ -1539,9 +1577,8 @@ static int _nfs4_proc_open(struct nfs4_o
+ 	return 0;
+ }
+ 
+-static int nfs4_recover_expired_lease(struct nfs_server *server)
++int nfs4_recover_expired_lease(struct nfs_client *clp)
+ {
+-	struct nfs_client *clp = server->nfs_client;
+ 	unsigned int loop;
+ 	int ret;
+ 
+@@ -1557,6 +1594,7 @@ static int nfs4_recover_expired_lease(st
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(nfs4_recover_expired_lease);
+ 
+ /*
+  * OPEN_EXPIRED:
+@@ -1646,7 +1684,7 @@ static int _nfs4_do_open(struct inode *d
+ 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+ 		goto out_err;
+ 	}
+-	status = nfs4_recover_expired_lease(server);
++	status = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (status != 0)
+ 		goto err_put_state_owner;
+ 	if (path->dentry->d_inode != NULL)
+@@ -1773,7 +1811,7 @@ static int _nfs4_do_setattr(struct inode
+ 	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ 		/* Use that stateid */
+ 	} else if (state != NULL) {
+-		nfs4_copy_stateid(&arg.stateid, state, current->files);
++		nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
+ 	} else
+ 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+ 
+@@ -1838,7 +1876,8 @@ static void nfs4_close_done(struct rpc_t
+ 	struct nfs4_state *state = calldata->state;
+ 	struct nfs_server *server = NFS_SERVER(calldata->inode);
+ 
+-	nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+         /* hmm. we are done with the inode, and in the process of freeing
+@@ -1858,7 +1897,7 @@ static void nfs4_close_done(struct rpc_t
+ 			if (calldata->arg.fmode == 0)
+ 				break;
+ 		default:
+-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
++			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ 				rpc_restart_call_prepare(task);
+ 	}
+ 	nfs_release_seqid(calldata->arg.seqid);
+@@ -1903,7 +1942,7 @@ static void nfs4_close_prepare(struct rp
+ 
+ 	nfs_fattr_init(calldata->res.fattr);
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
++	if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), NULL,
+ 				&calldata->arg.seq_args, &calldata->res.seq_res,
+ 				1, task))
+ 		return;
+@@ -2323,6 +2362,9 @@ nfs4_proc_setattr(struct dentry *dentry,
+ 	struct nfs4_state *state = NULL;
+ 	int status;
+ 
++	if (pnfs_ld_layoutret_on_setattr(inode))
++		pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	nfs_fattr_init(fattr);
+ 	
+ 	/* Search for an existing open(O_WRITE) file */
+@@ -2648,8 +2690,9 @@ static int nfs4_proc_unlink_done(struct 
+ {
+ 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
+ 
+-	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
+-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
++	if (!nfs4_sequence_done(task, &res->seq_res))
++		return 0;
++	if (nfs4_async_handle_error(task, res->server, NULL, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+ 	nfs_post_op_update_inode(dir, res->dir_attr);
+@@ -3090,18 +3133,31 @@ static int nfs4_proc_pathconf(struct nfs
+ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+ {
+ 	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
+-	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
+ 
+-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, server->nfs_client);
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
+ 
+ 	nfs_invalidate_atime(data->inode);
+-	if (task->tk_status > 0)
++	if (task->tk_status > 0 && client == server->nfs_client)
+ 		renew_lease(server, data->timestamp);
+ 	return 0;
+ }
+@@ -3112,20 +3168,56 @@ static void nfs4_proc_read_setup(struct 
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ }
+ 
++static void pnfs4_update_write_done(struct nfs_inode *nfsi, struct nfs_write_data *data)
++{
++#ifdef CONFIG_NFS_V4_1
++	pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++	pnfs_need_layoutcommit(nfsi, data->args.context);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
+ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
++	struct nfs_server *server = NFS_SERVER(inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++#ifdef CONFIG_NFS_V4_1
++	/* restore original count after retry? */
++	if (data->pdata.orig_count) {
++		dprintk("%s: restoring original count %u\n", __func__,
++			data->pdata.orig_count);
++		data->args.count = data->pdata.orig_count;
++	}
++
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
++
++	/*
++	 * MDS write: renew lease
++	 * DS write: update lastbyte written, mark for layout commit
++	 */
+ 	if (task->tk_status >= 0) {
+-		renew_lease(NFS_SERVER(inode), data->timestamp);
+-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		if (client == server->nfs_client) {
++			renew_lease(server, data->timestamp);
++			nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		} else
++			pnfs4_update_write_done(NFS_I(inode), data);
+ 	}
+ 	return 0;
+ }
+@@ -3138,20 +3230,42 @@ static void nfs4_proc_write_setup(struct
+ 	data->res.server = server;
+ 	data->timestamp   = jiffies;
+ 
++#ifdef CONFIG_NFS_V4_1
++	/* writes to DS use pnfs vector */
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_WRITE];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+ }
+ 
+ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
++	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
++
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS commit\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL, NULL) == -EAGAIN) {
+ 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ 		return -EAGAIN;
+ 	}
+-	nfs_refresh_inode(inode, data->res.fattr);
++	if (client == server->nfs_client)
++		nfs_refresh_inode(inode, data->res.fattr);
+ 	return 0;
+ }
+ 
+@@ -3161,6 +3275,12 @@ static void nfs4_proc_commit_setup(struc
+ 	
+ 	data->args.bitmask = server->cache_consistency_bitmask;
+ 	data->res.server = server;
++#if defined(CONFIG_NFS_V4_1)
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_COMMIT];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
+@@ -3464,9 +3584,12 @@ static int nfs4_proc_set_acl(struct inod
+ }
+ 
+ static int
+-_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
++nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state, struct nfs_client *clp)
+ {
+-	if (!clp || task->tk_status >= 0)
++	if (!clp)
++		clp = server->nfs_client;
++
++	if (task->tk_status >= 0)
+ 		return 0;
+ 	switch(task->tk_status) {
+ 		case -NFS4ERR_ADMIN_REVOKED:
+@@ -3491,8 +3614,9 @@ _nfs4_async_handle_error(struct rpc_task
+ 		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ 		case -NFS4ERR_SEQ_FALSE_RETRY:
+ 		case -NFS4ERR_SEQ_MISORDERED:
+-			dprintk("%s ERROR %d, Reset session\n", __func__,
+-				task->tk_status);
++			dprintk("%s ERROR %d, Reset session. Exchangeid "
++				"flags 0x%x\n", __func__, task->tk_status,
++				clp->cl_exchange_flags);
+ 			nfs4_schedule_state_recovery(clp);
+ 			task->tk_status = 0;
+ 			return -EAGAIN;
+@@ -3512,6 +3636,8 @@ _nfs4_async_handle_error(struct rpc_task
+ 	task->tk_status = nfs4_map_errors(task->tk_status);
+ 	return 0;
+ do_state_recovery:
++	if (is_ds_only_client(clp))
++		return 0;
+ 	rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+ 	nfs4_schedule_state_recovery(clp);
+ 	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+@@ -3520,12 +3646,6 @@ do_state_recovery:
+ 	return -EAGAIN;
+ }
+ 
+-static int
+-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+-{
+-	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+-}
+-
+ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
+ 		unsigned short port, struct rpc_cred *cred,
+ 		struct nfs4_setclientid_res *res)
+@@ -3641,8 +3761,8 @@ static void nfs4_delegreturn_done(struct
+ {
+ 	struct nfs4_delegreturndata *data = calldata;
+ 
+-	nfs4_sequence_done(data->res.server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_STALE_STATEID:
+@@ -3651,8 +3771,8 @@ static void nfs4_delegreturn_done(struct
+ 		renew_lease(data->res.server, data->timestamp);
+ 		break;
+ 	default:
+-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
+-				-EAGAIN) {
++		if (nfs4_async_handle_error(task, data->res.server, NULL, NULL)
++				== -EAGAIN) {
+ 			nfs_restart_rpc(task, data->res.server->nfs_client);
+ 			return;
+ 		}
+@@ -3672,7 +3792,7 @@ static void nfs4_delegreturn_prepare(str
+ 
+ 	d_data = (struct nfs4_delegreturndata *)data;
+ 
+-	if (nfs4_setup_sequence(d_data->res.server->nfs_client,
++	if (nfs4_setup_sequence(d_data->res.server, NULL,
+ 				&d_data->args.seq_args,
+ 				&d_data->res.seq_res, 1, task))
+ 		return;
+@@ -3892,15 +4012,16 @@ static void nfs4_locku_done(struct rpc_t
+ {
+ 	struct nfs4_unlockdata *calldata = data;
+ 
+-	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
+-			   task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	switch (task->tk_status) {
+ 		case 0:
+-			memcpy(calldata->lsp->ls_stateid.data,
+-					calldata->res.stateid.data,
+-					sizeof(calldata->lsp->ls_stateid.data));
++			memcpy(calldata->lsp->ls_stateid.u.data,
++					calldata->res.stateid.u.data,
++					sizeof(calldata->lsp->ls_stateid.u.
++					       data));
+ 			renew_lease(calldata->server, calldata->timestamp);
+ 			break;
+ 		case -NFS4ERR_BAD_STATEID:
+@@ -3909,7 +4030,7 @@ static void nfs4_locku_done(struct rpc_t
+ 		case -NFS4ERR_EXPIRED:
+ 			break;
+ 		default:
+-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
++			if (nfs4_async_handle_error(task, calldata->server, NULL, NULL) == -EAGAIN)
+ 				nfs_restart_rpc(task,
+ 						 calldata->server->nfs_client);
+ 	}
+@@ -3927,7 +4048,7 @@ static void nfs4_locku_prepare(struct rp
+ 		return;
+ 	}
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence(calldata->server->nfs_client,
++	if (nfs4_setup_sequence(calldata->server, NULL,
+ 				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 1, task))
+ 		return;
+@@ -4082,7 +4203,8 @@ static void nfs4_lock_prepare(struct rpc
+ 	} else
+ 		data->arg.new_lock_owner = 0;
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
++	if (nfs4_setup_sequence(data->server, NULL,
++				&data->arg.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -4101,8 +4223,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 
+-	nfs4_sequence_done(data->server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	data->rpc_status = task->tk_status;
+ 	if (RPC_ASSASSINATED(task))
+@@ -4114,8 +4236,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 			goto out;
+ 	}
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+-					sizeof(data->lsp->ls_stateid.data));
++		memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
++					sizeof(data->lsp->ls_stateid.u.data));
+ 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ 		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ 	}
+@@ -4424,6 +4546,34 @@ out:
+ 	return err;
+ }
+ 
++static void nfs4_release_lockowner_release(void *calldata)
++{
++	kfree(calldata);
++}
++
++const struct rpc_call_ops nfs4_release_lockowner_ops = {
++	.rpc_release = nfs4_release_lockowner_release,
++};
++
++void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
++{
++	struct nfs_server *server = lsp->ls_state->owner->so_server;
++	struct nfs_release_lockowner_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
++	};
++
++	if (server->nfs_client->cl_mvops->minor_version != 0)
++		return;
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (!args)
++		return;
++	args->lock_owner.clientid = server->nfs_client->cl_clientid;
++	args->lock_owner.id = lsp->ls_id.id;
++	msg.rpc_argp = args;
++	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
++}
++
+ #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+ 
+ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+@@ -4526,7 +4676,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	nfs4_verifier verifier;
+ 	struct nfs41_exchange_id_args args = {
+ 		.client = clp,
+-		.flags = clp->cl_exchange_flags,
++		.flags = clp->cl_exchange_flags & ~EXCHGID4_FLAG_CONFIRMED_R,
+ 	};
+ 	struct nfs41_exchange_id_res res = {
+ 		.client = clp,
+@@ -4574,6 +4724,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	dprintk("<-- %s status= %d\n", __func__, status);
+ 	return status;
+ }
++EXPORT_SYMBOL(nfs4_proc_exchange_id);
+ 
+ struct nfs4_get_lease_time_data {
+ 	struct nfs4_get_lease_time_args *args;
+@@ -4611,7 +4762,8 @@ static void nfs4_get_lease_time_done(str
+ 			(struct nfs4_get_lease_time_data *)calldata;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
++	if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
++		return;
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_DELAY:
+ 	case -NFS4ERR_GRACE:
+@@ -4805,13 +4957,6 @@ struct nfs4_session *nfs4_alloc_session(
+ 	if (!session)
+ 		return NULL;
+ 
+-	/*
+-	 * The create session reply races with the server back
+-	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+-	 * so that the client back channel can find the
+-	 * nfs_client struct
+-	 */
+-	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	init_completion(&session->complete);
+ 
+ 	tbl = &session->fc_slot_table;
+@@ -4824,6 +4969,8 @@ struct nfs4_session *nfs4_alloc_session(
+ 	spin_lock_init(&tbl->slot_tbl_lock);
+ 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ 
++	session->session_state = 1<<NFS4_SESSION_INITING;
++
+ 	session->clp = clp;
+ 	return session;
+ }
+@@ -5040,6 +5187,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (!nfs4_has_session(clp))
+ 		return 0;
+ 
++	session = clp->cl_session;
++	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
++		return 0;
++
+ 	rsize = server->rsize;
+ 	if (rsize == 0)
+ 		rsize = NFS_MAX_FILE_IO_SIZE;
+@@ -5047,11 +5198,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (wsize == 0)
+ 		wsize = NFS_MAX_FILE_IO_SIZE;
+ 
+-	session = clp->cl_session;
+ 	session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ 	session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ 
+-	ret = nfs4_recover_expired_lease(server);
++	ret = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (!ret)
+ 		ret = nfs4_check_client_ready(clp);
+ 	return ret;
+@@ -5060,69 +5210,70 @@ int nfs4_init_session(struct nfs_server 
+ /*
+  * Renew the cl_session lease.
+  */
+-static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+-{
++struct nfs4_sequence_data {
++	struct nfs_client *clp;
+ 	struct nfs4_sequence_args args;
+ 	struct nfs4_sequence_res res;
+-
+-	struct rpc_message msg = {
+-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+-		.rpc_argp = &args,
+-		.rpc_resp = &res,
+-		.rpc_cred = cred,
+-	};
+-
+-	args.sa_cache_this = 0;
+-
+-	return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
+-				       &res, args.sa_cache_this, 1);
+-}
++};
+ 
+ static void nfs41_sequence_release(void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(calldata);
++}
++
++static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
+ }
+ 
+ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+-	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
++	if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
++		return;
+ 
+ 	if (task->tk_status < 0) {
+ 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+ 		if (atomic_read(&clp->cl_count) == 1)
+ 			goto out;
+ 
+-		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+-								== -EAGAIN) {
+-			nfs_restart_rpc(task, clp);
++		if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
++			rpc_restart_call_prepare(task);
+ 			return;
+ 		}
+ 	}
+ 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
+ out:
+-	kfree(task->tk_msg.rpc_argp);
+-	kfree(task->tk_msg.rpc_resp);
+-
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 	struct nfs4_sequence_args *args;
+ 	struct nfs4_sequence_res *res;
+ 
+-	clp = (struct nfs_client *)data;
+ 	args = task->tk_msg.rpc_argp;
+ 	res = task->tk_msg.rpc_resp;
+ 
+-	if (nfs4_setup_sequence(clp, args, res, 0, task))
++	if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
+@@ -5133,32 +5284,67 @@ static const struct rpc_call_ops nfs41_s
+ 	.rpc_release = nfs41_sequence_release,
+ };
+ 
+-static int nfs41_proc_async_sequence(struct nfs_client *clp,
+-				     struct rpc_cred *cred)
++static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+ {
+-	struct nfs4_sequence_args *args;
+-	struct nfs4_sequence_res *res;
++	struct nfs4_sequence_data *calldata;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+ 		.rpc_cred = cred,
+ 	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = clp->cl_rpcclient,
++		.rpc_message = &msg,
++		.callback_ops = &nfs41_sequence_ops,
++		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
++	};
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+-		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_NOFS);
+-	res = kzalloc(sizeof(*res), GFP_NOFS);
+-	if (!args || !res) {
+-		kfree(args);
+-		kfree(res);
++		return ERR_PTR(-EIO);
++	calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
++	if (calldata == NULL) {
+ 		nfs_put_client(clp);
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 	}
+-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+-	msg.rpc_argp = args;
+-	msg.rpc_resp = res;
++	calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	msg.rpc_argp = &calldata->args;
++	msg.rpc_resp = &calldata->res;
++	calldata->clp = clp;
++	task_setup_data.callback_data = calldata;
+ 
+-	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			      &nfs41_sequence_ops, (void *)clp);
++	return rpc_run_task(&task_setup_data);
++}
++
++static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret = 0;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task))
++		ret = PTR_ERR(task);
++	else
++		rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
++}
++
++static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	ret = rpc_wait_for_completion_task(task);
++	if (!ret)
++		ret = task->tk_status;
++	rpc_put_task(task);
++out:
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
+ }
+ 
+ struct nfs4_reclaim_complete_data {
+@@ -5172,13 +5358,31 @@ static void nfs4_reclaim_complete_prepar
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+ 
+ 	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+-	if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
++	if (nfs41_setup_sequence(calldata->clp->cl_session,
++				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 0, task))
+ 		return;
+ 
+ 	rpc_call_start(task);
+ }
+ 
++static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case 0:
++	case -NFS4ERR_COMPLETE_ALREADY:
++	case -NFS4ERR_WRONG_CRED: /* What to do here? */
++		break;
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
++}
++
+ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
+ {
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+@@ -5186,32 +5390,13 @@ static void nfs4_reclaim_complete_done(s
+ 	struct nfs4_sequence_res *res = &calldata->res.seq_res;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(clp, res, task->tk_status);
+-	switch (task->tk_status) {
+-	case 0:
+-	case -NFS4ERR_COMPLETE_ALREADY:
+-		break;
+-	case -NFS4ERR_BADSESSION:
+-	case -NFS4ERR_DEADSESSION:
+-		/*
+-		 * Handle the session error, but do not retry the operation, as
+-		 * we have no way of telling whether the clientid had to be
+-		 * reset before we got our reply.  If reset, a new wave of
+-		 * reclaim operations will follow, containing their own reclaim
+-		 * complete.  We don't want our retry to get on the way of
+-		 * recovery by incorrectly indicating to the server that we're
+-		 * done reclaiming state since the process had to be restarted.
+-		 */
+-		_nfs4_async_handle_error(task, NULL, clp, NULL);
+-		break;
+-	default:
+-		if (_nfs4_async_handle_error(
+-				task, NULL, clp, NULL) == -EAGAIN) {
+-			rpc_restart_call_prepare(task);
+-			return;
+-		}
+-	}
++	if (!nfs41_sequence_done(task, res))
++		return;
+ 
++	if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
++		rpc_restart_call_prepare(task);
++		return;
++	}
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+@@ -5268,6 +5453,404 @@ out:
+ 	dprintk("<-- %s status=%d\n", __func__, status);
+ 	return status;
+ }
++
++static void
++nfs4_pnfs_layoutget_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
++				&lgp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_pnfs_layoutget_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	pnfs_get_layout_done(lgp, task->tk_status);
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	lgp->status = task->tk_status;
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_pnfs_layoutget_release(void *calldata)
++{
++	struct nfs4_pnfs_layoutget *lgp = calldata;
++
++	dprintk("--> %s\n", __func__);
++	pnfs_layout_release(NFS_I(lgp->args.inode)->layout, NULL);
++	if (lgp->res.layout.buf != NULL)
++		free_page((unsigned long) lgp->res.layout.buf);
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_pnfs_layoutget_call_ops = {
++	.rpc_call_prepare = nfs4_pnfs_layoutget_prepare,
++	.rpc_call_done = nfs4_pnfs_layoutget_done,
++	.rpc_release = nfs4_pnfs_layoutget_release,
++};
++
++/* FIXME: We need to call nfs4_handle_exception
++ * and deal with retries.
++ * Currently we can't since we release lgp and its contents.
++ */
++static int _pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTGET],
++		.rpc_argp = &lgp->args,
++		.rpc_resp = &lgp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_pnfs_layoutget_call_ops,
++		.callback_data = lgp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
++	if (lgp->res.layout.buf == NULL) {
++		nfs4_pnfs_layoutget_release(lgp);
++		return -ENOMEM;
++	}
++
++	lgp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = lgp->status;
++	if (status != 0)
++		goto out;
++	status = pnfs_layout_process(lgp);
++out:
++	rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server, _pnfs4_proc_layoutget(lgp),
++					    &exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void pnfs_layoutcommit_prepare(struct rpc_task *task, void *data)
++{
++	struct pnfs_layoutcommit_data *ldata =
++		(struct pnfs_layoutcommit_data *)data;
++	struct nfs_server *server = NFS_SERVER(ldata->args.inode);
++
++	if (nfs4_setup_sequence(server, NULL, &ldata->args.seq_args,
++				&ldata->res.seq_res, 1, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void
++pnfs_layoutcommit_done(struct rpc_task *task, void *calldata)
++{
++	struct pnfs_layoutcommit_data *data =
++		(struct pnfs_layoutcommit_data *)calldata;
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	data->status = task->tk_status;
++}
++
++static void pnfs_layoutcommit_release(void *lcdata)
++{
++	struct pnfs_layoutcommit_data *data =
++		(struct pnfs_layoutcommit_data *)lcdata;
++
++	put_rpccred(data->cred);
++	pnfs_cleanup_layoutcommit(lcdata);
++	pnfs_layoutcommit_free(lcdata);
++	/* Matched by get_layout in pnfs_layoutcommit_inode */
++	put_layout(data->args.inode);
++}
++
++static const struct rpc_call_ops pnfs_layoutcommit_ops = {
++	.rpc_call_prepare = pnfs_layoutcommit_prepare,
++	.rpc_call_done = pnfs_layoutcommit_done,
++	.rpc_release = pnfs_layoutcommit_release,
++};
++
++/* Execute a layoutcommit to the server */
++static int
++_pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data, int issync)
++{
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT],
++		.rpc_argp = &data->args,
++		.rpc_resp = &data->res,
++		.rpc_cred = data->cred,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.task = &data->task,
++		.rpc_client = NFS_CLIENT(data->args.inode),
++		.rpc_message = &msg,
++		.callback_ops = &pnfs_layoutcommit_ops,
++		.callback_data = data,
++		.flags = RPC_TASK_ASYNC,
++	};
++	struct rpc_task *task;
++	int status = 0;
++
++	dprintk("NFS: %4d initiating layoutcommit call. %llu@%llu lbw: %llu "
++		"type: %d issync %d\n",
++		data->task.tk_pid,
++		data->args.lseg.length,
++		data->args.lseg.offset,
++		data->args.lastbytewritten,
++		data->args.layout_type, issync);
++
++	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = data->status;
++out:
++	dprintk("%s: status %d\n", __func__, status);
++	rpc_put_task(task);
++	return 0;
++}
++
++int pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data, int issync)
++{
++	struct nfs4_exception exception = { };
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++					_pnfs4_proc_layoutcommit(data, issync),
++					&exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void
++nfs4_pnfs_layoutreturn_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
++				&lrp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_pnfs_layoutreturn_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_pnfs_layoutreturn_release(void *calldata)
++{
++	struct nfs4_pnfs_layoutreturn *lrp = calldata;
++	struct pnfs_layout_type *lo = NFS_I(lrp->args.inode)->layout;
++
++	dprintk("--> %s return_type %d lo %p\n", __func__,
++		lrp->args.return_type, lo);
++
++	if (lrp->args.return_type == RETURN_FILE) {
++		if (!lrp->res.lrs_present)
++			pnfs_set_layout_stateid(lo, &zero_stateid);
++		pnfs_layout_release(lo, &lrp->args.lseg);
++	}
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_pnfs_layoutreturn_call_ops = {
++	.rpc_call_prepare = nfs4_pnfs_layoutreturn_prepare,
++	.rpc_call_done = nfs4_pnfs_layoutreturn_done,
++	.rpc_release = nfs4_pnfs_layoutreturn_release,
++};
++
++int _pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool issync)
++{
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_LAYOUTRETURN],
++		.rpc_argp = &lrp->args,
++		.rpc_resp = &lrp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_pnfs_layoutreturn_call_ops,
++		.callback_data = lrp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++	lrp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = task->tk_status;
++out:
++	dprintk("<-- %s\n", __func__);
++	rpc_put_task(task);
++	return status;
++}
++
++int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool issync)
++{
++	struct nfs_server *server = NFS_SERVER(lrp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server,
++				_pnfs4_proc_layoutreturn(lrp, issync),
++				&exception);
++	} while (exception.retry);
++
++	return err;
++}
++
++/*
++ * Retrieve the list of Data Server devices from the MDS.
++ */
++static int _nfs4_pnfs_getdevicelist(struct nfs_server *server,
++				    const struct nfs_fh *fh,
++				    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_pnfs_getdevicelist_arg arg = {
++		.fh = fh,
++		.layoutclass = server->pnfs_curr_ld->id,
++	};
++	struct nfs4_pnfs_getdevicelist_res res = {
++		.devlist = devlist,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_GETDEVICELIST],
++		.rpc_argp = &arg,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &arg, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_pnfs_getdevicelist(struct nfs_server *server,
++			    const struct nfs_fh *fh,
++			    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_exception exception = { };
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_pnfs_getdevicelist(server, fh, devlist),
++				&exception);
++	} while (exception.retry);
++
++	dprintk("nfs4_pnfs_getdevlist: err=%d, num_devs=%u\n",
++		err, devlist->num_devs);
++
++	return err;
++}
++
++int nfs4_pnfs_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
++{
++	struct nfs4_pnfs_getdeviceinfo_arg args = {
++		.pdev = pdev,
++	};
++	struct nfs4_pnfs_getdeviceinfo_res res = {
++		.pdev = pdev,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_GETDEVICEINFO],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++
++	return status;
++}
++
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+@@ -5325,28 +5908,30 @@ struct nfs4_state_maintenance_ops nfs41_
+ };
+ #endif
+ 
+-/*
+- * Per minor version reboot and network partition recovery ops
+- */
+-
+-struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
+-	&nfs40_reboot_recovery_ops,
+-#if defined(CONFIG_NFS_V4_1)
+-	&nfs41_reboot_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
++	.minor_version = 0,
++	.call_sync = _nfs4_call_sync,
++	.validate_stateid = nfs4_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
++	.state_renewal_ops = &nfs40_state_renewal_ops,
+ };
+ 
+-struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
+-	&nfs40_nograce_recovery_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_nograce_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
++	.minor_version = 1,
++	.call_sync = _nfs4_call_sync_session,
++	.validate_stateid = nfs41_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
++	.state_renewal_ops = &nfs41_state_renewal_ops,
+ };
++#endif
+ 
+-struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
+-	&nfs40_state_renewal_ops,
++const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
++	[0] = &nfs_v4_0_minor_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_state_renewal_ops,
++	[1] = &nfs_v4_1_minor_ops,
+ #endif
+ };
+ 
+@@ -5364,6 +5949,7 @@ const struct nfs_rpc_ops nfs_v4_clientop
+ 	.dentry_ops	= &nfs4_dentry_operations,
+ 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+ 	.file_inode_ops	= &nfs4_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs4_proc_get_root,
+ 	.getattr	= nfs4_proc_getattr,
+ 	.setattr	= nfs4_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
+@@ -54,17 +54,17 @@
+ void
+ nfs4_renew_state(struct work_struct *work)
+ {
+-	struct nfs4_state_maintenance_ops *ops;
++	const struct nfs4_state_maintenance_ops *ops;
+ 	struct nfs_client *clp =
+ 		container_of(work, struct nfs_client, cl_renewd.work);
+ 	struct rpc_cred *cred;
+ 	long lease;
+ 	unsigned long last, now;
+ 
+-	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
++	ops = clp->cl_mvops->state_renewal_ops;
+ 	dprintk("%s: start\n", __func__);
+ 	/* Are there any active superblocks? */
+-	if (list_empty(&clp->cl_superblocks))
++	if (list_empty(&clp->cl_superblocks) && !is_ds_only_client(clp))
+ 		goto out;
+ 	spin_lock(&clp->cl_lock);
+ 	lease = clp->cl_lease_time;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
+@@ -53,6 +53,9 @@
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++#include "pnfs.h"
+ 
+ #define OPENOWNER_POOL_SIZE	8
+ 
+@@ -126,6 +129,11 @@ static int nfs41_setup_state_renewal(str
+ 	int status;
+ 	struct nfs_fsinfo fsinfo;
+ 
++	if (is_ds_only_client(clp)) {
++		nfs4_schedule_state_renewal(clp);
++		return 0;
++	}
++
+ 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+ 	if (status == 0) {
+ 		/* Update lease time and schedule renewal */
+@@ -145,7 +153,9 @@ static void nfs4_end_drain_session(struc
+ 	struct nfs4_session *ses = clp->cl_session;
+ 	int max_slots;
+ 
+-	if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
++	if (ses == NULL)
++		return;
++	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		spin_lock(&ses->fc_slot_table.slot_tbl_lock);
+ 		max_slots = ses->fc_slot_table.max_slots;
+ 		while (max_slots--) {
+@@ -167,7 +177,7 @@ static int nfs4_begin_drain_session(stru
+ 	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
++	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ 	if (tbl->highest_used_slotid != -1) {
+ 		INIT_COMPLETION(ses->complete);
+ 		spin_unlock(&tbl->slot_tbl_lock);
+@@ -371,7 +381,6 @@ nfs4_alloc_state_owner(void)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+ 	INIT_LIST_HEAD(&sp->so_states);
+-	INIT_LIST_HEAD(&sp->so_delegations);
+ 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+ 	sp->so_seqid.sequence = &sp->so_sequence;
+ 	spin_lock_init(&sp->so_sequence.lock);
+@@ -384,7 +393,7 @@ static void
+ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+ {
+ 	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+-		struct nfs_client *clp = sp->so_client;
++		struct nfs_client *clp = sp->so_server->nfs_client;
+ 
+ 		spin_lock(&clp->cl_lock);
+ 		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+@@ -406,7 +415,6 @@ struct nfs4_state_owner *nfs4_get_state_
+ 	new = nfs4_alloc_state_owner();
+ 	if (new == NULL)
+ 		return NULL;
+-	new->so_client = clp;
+ 	new->so_server = server;
+ 	new->so_cred = cred;
+ 	spin_lock(&clp->cl_lock);
+@@ -423,7 +431,7 @@ struct nfs4_state_owner *nfs4_get_state_
+ 
+ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+ {
+-	struct nfs_client *clp = sp->so_client;
++	struct nfs_client *clp = sp->so_server->nfs_client;
+ 	struct rpc_cred *cred = sp->so_cred;
+ 
+ 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+@@ -583,8 +591,24 @@ static void __nfs4_close(struct path *pa
+ 	if (!call_close) {
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+-	} else
++	} else {
++		u32 roc_iomode;
++		struct nfs_inode *nfsi = NFS_I(state->inode);
++
++		if (has_layout(nfsi) &&
++		    (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
++			struct nfs4_pnfs_layout_segment range = {
++				.iomode = roc_iomode,
++				.offset = 0,
++				.length = NFS4_MAX_UINT64,
++			};
++
++			pnfs_return_layout(state->inode, &range, NULL,
++					   RETURN_FILE, wait);
++		}
++
+ 		nfs4_do_close(path, state, gfp_mask, wait);
++	}
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+@@ -602,12 +626,21 @@ void nfs4_close_sync(struct path *path, 
+  * that is compatible with current->files
+  */
+ static struct nfs4_lock_state *
+-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *pos;
+ 	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+-		if (pos->ls_owner != fl_owner)
++		if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ 			continue;
++		switch (pos->ls_owner.lo_type) {
++		case NFS4_POSIX_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.posix_owner != fl_owner)
++				continue;
++			break;
++		case NFS4_FLOCK_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.flock_owner != fl_pid)
++				continue;
++		}
+ 		atomic_inc(&pos->ls_count);
+ 		return pos;
+ 	}
+@@ -619,10 +652,10 @@ __nfs4_find_lock_state(struct nfs4_state
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp;
+-	struct nfs_client *clp = state->owner->so_client;
++	struct nfs_client *clp = state->owner->so_server->nfs_client;
+ 
+ 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+@@ -633,7 +666,18 @@ static struct nfs4_lock_state *nfs4_allo
+ 	lsp->ls_seqid.sequence = &lsp->ls_sequence;
+ 	atomic_set(&lsp->ls_count, 1);
+ 	lsp->ls_state = state;
+-	lsp->ls_owner = fl_owner;
++	lsp->ls_owner.lo_type = type;
++	switch (lsp->ls_owner.lo_type) {
++	case NFS4_FLOCK_LOCK_TYPE:
++		lsp->ls_owner.lo_u.flock_owner = fl_pid;
++		break;
++	case NFS4_POSIX_LOCK_TYPE:
++		lsp->ls_owner.lo_u.posix_owner = fl_owner;
++		break;
++	default:
++		kfree(lsp);
++		return NULL;
++	}
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ 	spin_unlock(&clp->cl_lock);
+@@ -643,7 +687,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 
+ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+ {
+-	struct nfs_client *clp = lsp->ls_state->owner->so_client;
++	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ 
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+@@ -657,13 +701,13 @@ static void nfs4_free_lock_state(struct 
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
++static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp, *new = NULL;
+ 	
+ 	for(;;) {
+ 		spin_lock(&state->state_lock);
+-		lsp = __nfs4_find_lock_state(state, owner);
++		lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ 		if (lsp != NULL)
+ 			break;
+ 		if (new != NULL) {
+@@ -674,7 +718,7 @@ static struct nfs4_lock_state *nfs4_get_
+ 			break;
+ 		}
+ 		spin_unlock(&state->state_lock);
+-		new = nfs4_alloc_lock_state(state, owner);
++		new = nfs4_alloc_lock_state(state, owner, pid, type);
+ 		if (new == NULL)
+ 			return NULL;
+ 	}
+@@ -701,6 +745,8 @@ void nfs4_put_lock_state(struct nfs4_loc
+ 	if (list_empty(&state->lock_states))
+ 		clear_bit(LK_STATE_IN_USE, &state->flags);
+ 	spin_unlock(&state->state_lock);
++	if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
++		nfs4_release_lockowner(lsp);
+ 	nfs4_free_lock_state(lsp);
+ }
+ 
+@@ -728,7 +774,12 @@ int nfs4_set_lock_state(struct nfs4_stat
+ 
+ 	if (fl->fl_ops != NULL)
+ 		return 0;
+-	lsp = nfs4_get_lock_state(state, fl->fl_owner);
++	if (fl->fl_flags & FL_POSIX)
++		lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
++	else if (fl->fl_flags & FL_FLOCK)
++		lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
++	else
++		return -EINVAL;
+ 	if (lsp == NULL)
+ 		return -ENOMEM;
+ 	fl->fl_u.nfs4_fl.owner = lsp;
+@@ -740,7 +791,7 @@ int nfs4_set_lock_state(struct nfs4_stat
+  * Byte-range lock aware utility to initialize the stateid of read/write
+  * requests.
+  */
+-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
++void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+ {
+ 	struct nfs4_lock_state *lsp;
+ 	int seq;
+@@ -753,7 +804,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 		return;
+ 
+ 	spin_lock(&state->state_lock);
+-	lsp = __nfs4_find_lock_state(state, fl_owner);
++	lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ 	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ 		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ 	spin_unlock(&state->state_lock);
+@@ -1031,8 +1082,8 @@ restart:
+ 				 * Open state on this file cannot be recovered
+ 				 * All we can do is revert to using the zero stateid.
+ 				 */
+-				memset(state->stateid.data, 0,
+-					sizeof(state->stateid.data));
++				memset(state->stateid.u.data, 0,
++					sizeof(state->stateid.u.data));
+ 				/* Mark the file as being 'closed' */
+ 				state->state = 0;
+ 				break;
+@@ -1041,11 +1092,11 @@ restart:
+ 			case -NFS4ERR_BAD_STATEID:
+ 			case -NFS4ERR_RECLAIM_BAD:
+ 			case -NFS4ERR_RECLAIM_CONFLICT:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 				break;
+ 			case -NFS4ERR_EXPIRED:
+ 			case -NFS4ERR_NO_GRACE:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 			case -NFS4ERR_STALE_CLIENTID:
+ 			case -NFS4ERR_BADSESSION:
+ 			case -NFS4ERR_BADSLOT:
+@@ -1120,8 +1171,7 @@ static void nfs4_state_end_reclaim_reboo
+ 	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ 		return;
+ 
+-	nfs4_reclaim_complete(clp,
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++	nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+ 
+ 	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ 		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+@@ -1211,8 +1261,8 @@ restart:
+ static int nfs4_check_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_maintenance_ops *ops =
+-		nfs4_state_renewal_ops[clp->cl_minorversion];
++	const struct nfs4_state_maintenance_ops *ops =
++		clp->cl_mvops->state_renewal_ops;
+ 	int status = -NFS4ERR_EXPIRED;
+ 
+ 	/* Is the client already known to have an expired lease? */
+@@ -1235,8 +1285,8 @@ out:
+ static int nfs4_reclaim_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_recovery_ops *ops =
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion];
++	const struct nfs4_state_recovery_ops *ops =
++		clp->cl_mvops->reboot_recovery_ops;
+ 	int status = -ENOENT;
+ 
+ 	cred = ops->get_clid_cred(clp);
+@@ -1421,6 +1471,7 @@ static void nfs4_state_manager(struct nf
+ 			}
+ 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ 			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
++			pnfs_destroy_all_layouts(clp);
+ 		}
+ 
+ 		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+@@ -1444,7 +1495,7 @@ static void nfs4_state_manager(struct nf
+ 		/* First recover reboot state... */
+ 		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->reboot_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ 				continue;
+@@ -1458,7 +1509,7 @@ static void nfs4_state_manager(struct nf
+ 		/* Now recover expired state... */
+ 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_nograce_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->nograce_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
+@@ -50,8 +50,11 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_xdr.h>
+ #include "nfs4_fs.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+ 
+@@ -89,7 +92,7 @@ static int nfs4_stat_to_errno(int);
+ #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
+ #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
+ 				((3+NFS4_FHSIZE) >> 2))
+-#define nfs4_fattr_bitmap_maxsz 3
++#define nfs4_fattr_bitmap_maxsz 4
+ #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+ #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+ #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+@@ -111,7 +114,11 @@ static int nfs4_stat_to_errno(int);
+ #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+ #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
+ #define encode_fsinfo_maxsz	(encode_getattr_maxsz)
+-#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
++/* The 5 accounts for the PNFS attributes, and assumes that at most three
++ * layout types will be returned.
++ */
++#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + \
++				 nfs4_fattr_bitmap_maxsz + 8 + 5)
+ #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
+ #define decode_renew_maxsz	(op_decode_hdr_maxsz)
+ #define encode_setclientid_maxsz \
+@@ -202,14 +209,17 @@ static int nfs4_stat_to_errno(int);
+ #define encode_link_maxsz	(op_encode_hdr_maxsz + \
+ 				nfs4_name_maxsz)
+ #define decode_link_maxsz	(op_decode_hdr_maxsz + decode_change_info_maxsz)
++#define encode_lockowner_maxsz	(7)
+ #define encode_lock_maxsz	(op_encode_hdr_maxsz + \
+ 				 7 + \
+-				 1 + encode_stateid_maxsz + 8)
++				 1 + encode_stateid_maxsz + 1 + \
++				 encode_lockowner_maxsz)
+ #define decode_lock_denied_maxsz \
+ 				(8 + decode_lockowner_maxsz)
+ #define decode_lock_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+-#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 12)
++#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 5 + \
++				encode_lockowner_maxsz)
+ #define decode_lockt_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+ #define encode_locku_maxsz	(op_encode_hdr_maxsz + 3 + \
+@@ -217,6 +227,11 @@ static int nfs4_stat_to_errno(int);
+ 				 4)
+ #define decode_locku_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_stateid_maxsz)
++#define encode_release_lockowner_maxsz \
++				(op_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define decode_release_lockowner_maxsz \
++				(op_decode_hdr_maxsz)
+ #define encode_access_maxsz	(op_encode_hdr_maxsz + 1)
+ #define decode_access_maxsz	(op_decode_hdr_maxsz + 2)
+ #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
+@@ -302,6 +317,35 @@ static int nfs4_stat_to_errno(int);
+ 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+ #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
+ #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
++#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
++				encode_verifier_maxsz)
++#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 +  \
++				decode_verifier_maxsz +             \
++				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM *  \
++				NFS4_PNFS_DEVICEID4_SIZE))
++#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
++				XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE))
++#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
++				4 /*layout type */ + \
++				4 /* opaque devaddr4 length */ +\
++				4 /* notification bitmap length */ + \
++				4 /* notification bitmap */)
++#define encode_layoutget_sz	(op_encode_hdr_maxsz + 10 + \
++				encode_stateid_maxsz)
++#define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
++				decode_stateid_maxsz + \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
++#define encode_layoutcommit_sz	(18 +                           \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
++				op_encode_hdr_maxsz +          \
++				encode_stateid_maxsz)
++#define decode_layoutcommit_maxsz (3 + op_decode_hdr_maxsz)
++#define encode_layoutreturn_sz	(8 + op_encode_hdr_maxsz + \
++				encode_stateid_maxsz + \
++				1 /* FIXME: opaque lrf_body always empty at
++				   *the moment */)
++#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
++				1 + decode_stateid_maxsz)
+ #else /* CONFIG_NFS_V4_1 */
+ #define encode_sequence_maxsz	0
+ #define decode_sequence_maxsz	0
+@@ -471,6 +515,12 @@ static int nfs4_stat_to_errno(int);
+ 				decode_sequence_maxsz + \
+ 				decode_putfh_maxsz + \
+ 				decode_locku_maxsz)
++#define NFS4_enc_release_lockowner_sz \
++				(compound_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define NFS4_dec_release_lockowner_sz \
++				(compound_decode_hdr_maxsz + \
++				 decode_lockowner_maxsz)
+ #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+ 				encode_sequence_maxsz + \
+ 				encode_putfh_maxsz + \
+@@ -685,6 +735,60 @@ static int nfs4_stat_to_errno(int);
+ #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
+ 					 decode_sequence_maxsz + \
+ 					 decode_reclaim_complete_maxsz)
++#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_getdevicelist_maxsz)
++#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_getdevicelist_maxsz)
++#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
++				encode_sequence_maxsz +\
++				encode_getdeviceinfo_maxsz)
++#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz +    \
++				decode_sequence_maxsz + \
++				decode_getdeviceinfo_maxsz)
++#define NFS4_enc_layoutget_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz +        \
++				encode_layoutget_sz)
++#define NFS4_dec_layoutget_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz +        \
++				decode_layoutget_maxsz)
++#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_layoutcommit_sz + \
++				encode_getattr_maxsz)
++#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutcommit_maxsz + \
++				decode_getattr_maxsz)
++#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_layoutreturn_sz)
++#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutreturn_maxsz)
++#define NFS4_enc_dswrite_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_write_maxsz)
++#define NFS4_dec_dswrite_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_write_maxsz)
++#define NFS4_enc_dscommit_sz	(compound_encode_hdr_maxsz + \
++				encode_putfh_maxsz + \
++				encode_commit_maxsz)
++#define NFS4_dec_dscommit_sz	(compound_decode_hdr_maxsz + \
++				decode_putfh_maxsz + \
++				decode_commit_maxsz)
+ 
+ const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ 				      compound_encode_hdr_maxsz +
+@@ -915,7 +1019,7 @@ static void encode_close(struct xdr_stre
+ 	p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_CLOSE);
+ 	*p++ = cpu_to_be32(arg->seqid->sequence->counter);
+-	xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_close_maxsz;
+ }
+@@ -989,6 +1093,35 @@ static void encode_getattr_two(struct xd
+ 	hdr->replen += decode_getattr_maxsz;
+ }
+ 
++static void
++encode_getattr_three(struct xdr_stream *xdr,
++		     uint32_t bm0, uint32_t bm1, uint32_t bm2,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_GETATTR);
++	if (bm2) {
++		p = reserve_space(xdr, 16);
++		*p++ = cpu_to_be32(3);
++		*p++ = cpu_to_be32(bm0);
++		*p++ = cpu_to_be32(bm1);
++		*p = cpu_to_be32(bm2);
++	} else if (bm1) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(2);
++		*p++ = cpu_to_be32(bm0);
++		*p = cpu_to_be32(bm1);
++	} else {
++		p = reserve_space(xdr, 8);
++		*p++ = cpu_to_be32(1);
++		*p = cpu_to_be32(bm0);
++	}
++	hdr->nops++;
++	hdr->replen += decode_getattr_maxsz;
++}
++
+ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+ 	encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
+@@ -997,8 +1130,11 @@ static void encode_getfattr(struct xdr_s
+ 
+ static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+-	encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+-			   bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
++	encode_getattr_three(xdr,
++			     bitmask[0] & nfs4_fsinfo_bitmap[0],
++			     bitmask[1] & nfs4_fsinfo_bitmap[1],
++			     bitmask[2] & nfs4_fsinfo_bitmap[2],
++			     hdr);
+ }
+ 
+ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+@@ -1042,6 +1178,17 @@ static inline uint64_t nfs4_lock_length(
+ 	return fl->fl_end - fl->fl_start + 1;
+ }
+ 
++static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 28);
++	p = xdr_encode_hyper(p, lowner->clientid);
++	*p++ = cpu_to_be32(16);
++	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
++	xdr_encode_hyper(p, lowner->id);
++}
++
+ /*
+  * opcode,type,reclaim,offset,length,new_lock_owner = 32
+  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+@@ -1058,18 +1205,16 @@ static void encode_lock(struct xdr_strea
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	*p = cpu_to_be32(args->new_lock_owner);
+ 	if (args->new_lock_owner){
+-		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
++		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 		*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+-		p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
++					    NFS4_STATEID_SIZE);
+ 		*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+-		p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-		*p++ = cpu_to_be32(16);
+-		p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-		xdr_encode_hyper(p, args->lock_owner.id);
++		encode_lockowner(xdr, &args->lock_owner);
+ 	}
+ 	else {
+ 		p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+-		p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
+ 		*p = cpu_to_be32(args->lock_seqid->sequence->counter);
+ 	}
+ 	hdr->nops++;
+@@ -1080,15 +1225,12 @@ static void encode_lockt(struct xdr_stre
+ {
+ 	__be32 *p;
+ 
+-	p = reserve_space(xdr, 52);
++	p = reserve_space(xdr, 24);
+ 	*p++ = cpu_to_be32(OP_LOCKT);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+-	p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-	*p++ = cpu_to_be32(16);
+-	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-	xdr_encode_hyper(p, args->lock_owner.id);
++	encode_lockowner(xdr, &args->lock_owner);
+ 	hdr->nops++;
+ 	hdr->replen += decode_lockt_maxsz;
+ }
+@@ -1101,13 +1243,25 @@ static void encode_locku(struct xdr_stre
+ 	*p++ = cpu_to_be32(OP_LOCKU);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	*p++ = cpu_to_be32(args->seqid->sequence->counter);
+-	p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
++				    NFS4_STATEID_SIZE);
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	hdr->nops++;
+ 	hdr->replen += decode_locku_maxsz;
+ }
+ 
++static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
++	encode_lockowner(xdr, lowner);
++	hdr->nops++;
++	hdr->replen += decode_release_lockowner_maxsz;
++}
++
+ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
+ {
+ 	int len = name->len;
+@@ -1172,7 +1326,7 @@ static inline void encode_createmode(str
+ 		break;
+ 	default:
+ 		clp = arg->server->nfs_client;
+-		if (clp->cl_minorversion > 0) {
++		if (clp->cl_mvops->minor_version > 0) {
+ 			if (nfs4_has_persistent_session(clp)) {
+ 				*p = cpu_to_be32(NFS4_CREATE_GUARDED);
+ 				encode_attrs(xdr, arg->u.attrs, arg->server);
+@@ -1251,7 +1405,7 @@ static inline void encode_claim_delegate
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	encode_string(xdr, name->len, name->name);
+ }
+ 
+@@ -1282,7 +1436,7 @@ static void encode_open_confirm(struct x
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	hdr->nops++;
+ 	hdr->replen += decode_open_confirm_maxsz;
+@@ -1294,7 +1448,7 @@ static void encode_open_downgrade(struct
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	encode_share_access(xdr, arg->fmode);
+ 	hdr->nops++;
+@@ -1324,17 +1478,17 @@ static void encode_putrootfh(struct xdr_
+ 	hdr->replen += decode_putrootfh_maxsz;
+ }
+ 
+-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
++static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
+ {
+ 	nfs4_stateid stateid;
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, NFS4_STATEID_SIZE);
+ 	if (ctx->state != NULL) {
+-		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+-		xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
++		nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
++		xdr_encode_opaque_fixed(p, stateid.u.data, NFS4_STATEID_SIZE);
+ 	} else
+-		xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++		xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+@@ -1344,7 +1498,7 @@ static void encode_read(struct xdr_strea
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_READ);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 12);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1448,7 +1602,7 @@ encode_setacl(struct xdr_stream *xdr, st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ 	p = reserve_space(xdr, 2*4);
+ 	*p++ = cpu_to_be32(1);
+ 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
+@@ -1479,7 +1633,7 @@ static void encode_setattr(struct xdr_st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setattr_maxsz;
+ 	encode_attrs(xdr, arg->iap, server);
+@@ -1523,7 +1677,7 @@ static void encode_write(struct xdr_stre
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_WRITE);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 16);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1542,7 +1696,7 @@ static void encode_delegreturn(struct xd
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 
+ 	*p++ = cpu_to_be32(OP_DELEGRETURN);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_delegreturn_maxsz;
+ }
+@@ -1696,6 +1850,162 @@ static void encode_sequence(struct xdr_s
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#ifdef CONFIG_NFS_V4_1
++static void
++encode_getdevicelist(struct xdr_stream *xdr,
++		     const struct nfs4_pnfs_getdevicelist_arg *args,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++	nfs4_verifier dummy = {
++		.data = "dummmmmy",
++	};
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_GETDEVICELIST);
++	*p++ = cpu_to_be32(args->layoutclass);
++	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
++	xdr_encode_hyper(p, 0ULL);                          /* cookie */
++	encode_nfs4_verifier(xdr, &dummy);
++	hdr->nops++;
++}
++
++static void
++encode_getdeviceinfo(struct xdr_stream *xdr,
++		     const struct nfs4_pnfs_getdeviceinfo_arg *args,
++		     struct compound_hdr *hdr)
++{
++	int has_bitmap = (args->pdev->dev_notify_types != 0);
++	int len = 16 + NFS4_PNFS_DEVICEID4_SIZE + (has_bitmap * 4);
++	__be32 *p;
++
++	p = reserve_space(xdr, len);
++	*p++ = cpu_to_be32(OP_GETDEVICEINFO);
++	p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
++				    NFS4_PNFS_DEVICEID4_SIZE);
++	*p++ = cpu_to_be32(args->pdev->layout_type);
++	*p++ = cpu_to_be32(args->pdev->pglen + len);	/* gdia_maxcount */
++	*p++ = cpu_to_be32(has_bitmap);			/* bitmap length [01] */
++	if (has_bitmap)
++		*p = cpu_to_be32(args->pdev->dev_notify_types);
++	hdr->nops++;
++}
++
++static void
++encode_layoutget(struct xdr_stream *xdr,
++		      const struct nfs4_pnfs_layoutget_arg *args,
++		      struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTGET);
++	*p++ = cpu_to_be32(0);     /* Signal layout available */
++	*p++ = cpu_to_be32(args->type);
++	*p++ = cpu_to_be32(args->lseg.iomode);
++	p = xdr_encode_hyper(p, args->lseg.offset);
++	p = xdr_encode_hyper(p, args->lseg.length);
++	p = xdr_encode_hyper(p, args->minlength);
++	pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++	p = xdr_encode_opaque_fixed(p, &stateid.u.data, NFS4_STATEID_SIZE);
++	*p = cpu_to_be32(args->maxcount);
++
++	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
++		__func__,
++		args->type,
++		args->lseg.iomode,
++		(unsigned long)args->lseg.offset,
++		(unsigned long)args->lseg.length,
++		args->maxcount);
++	hdr->nops++;
++	hdr->replen += decode_layoutget_maxsz;
++}
++
++static int
++encode_layoutcommit(struct xdr_stream *xdr,
++		    const struct pnfs_layoutcommit_arg *args,
++		    struct compound_hdr *hdr)
++{
++	struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++	__be32 *p;
++
++	dprintk("%s: %llu@%llu lbw: %llu type: %d\n", __func__,
++		args->lseg.length, args->lseg.offset, args->lastbytewritten,
++		args->layout_type);
++
++	p = reserve_space(xdr, 40 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
++	p = xdr_encode_hyper(p, args->lseg.offset);
++	p = xdr_encode_hyper(p, args->lseg.length);
++	*p++ = cpu_to_be32(0);     /* reclaim */
++	p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(1);     /* newoffset = TRUE */
++	p = xdr_encode_hyper(p, args->lastbytewritten);
++	*p = cpu_to_be32(args->time_modify_changed != 0);
++	if (args->time_modify_changed) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(0);
++		*p++ = cpu_to_be32(args->time_modify.tv_sec);
++		*p = cpu_to_be32(args->time_modify.tv_nsec);
++	}
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(args->layout_type);
++
++	if (ld_io_ops->encode_layoutcommit) {
++		ld_io_ops->encode_layoutcommit(NFS_I(args->inode)->layout,
++					       xdr, args);
++	} else {
++		p = reserve_space(xdr, 4);
++		xdr_encode_opaque(p, NULL, 0);
++	}
++
++	hdr->nops++;
++	hdr->replen += decode_layoutcommit_maxsz;
++	return 0;
++}
++
++static void
++encode_layoutreturn(struct xdr_stream *xdr,
++		    const struct nfs4_pnfs_layoutreturn_arg *args,
++		    struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
++	*p++ = cpu_to_be32(args->reclaim);
++	*p++ = cpu_to_be32(args->layout_type);
++	*p++ = cpu_to_be32(args->lseg.iomode);
++	*p = cpu_to_be32(args->return_type);
++	if (args->return_type == RETURN_FILE) {
++		struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++
++		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
++		p = xdr_encode_hyper(p, args->lseg.offset);
++		p = xdr_encode_hyper(p, args->lseg.length);
++		pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++		p = xdr_encode_opaque_fixed(p, &stateid.u.data,
++					    NFS4_STATEID_SIZE);
++		dprintk("%s: call %pF\n", __func__,
++		ld_io_ops->encode_layoutreturn);
++		if (ld_io_ops->encode_layoutreturn) {
++			ld_io_ops->encode_layoutreturn(
++				NFS_I(args->inode)->layout, xdr, args);
++		} else {
++			p = reserve_space(xdr, 4);
++			*p = cpu_to_be32(0);
++		}
++	}
++	hdr->nops++;
++	hdr->replen += decode_layoutreturn_maxsz;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" ENCODE ROUTINES.
+  */
+@@ -1704,7 +2014,7 @@ static u32 nfs4_xdr_minorversion(const s
+ {
+ #if defined(CONFIG_NFS_V4_1)
+ 	if (args->sa_session)
+-		return args->sa_session->clp->cl_minorversion;
++		return args->sa_session->clp->cl_mvops->minor_version;
+ #endif /* CONFIG_NFS_V4_1 */
+ 	return 0;
+ }
+@@ -2048,6 +2358,20 @@ static int nfs4_xdr_enc_locku(struct rpc
+ 	return 0;
+ }
+ 
++static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = 0,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
+ /*
+  * Encode a READLINK request
+  */
+@@ -2330,7 +2654,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 	struct compound_hdr hdr = {
+ 		.nops	= 0,
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2395,7 +2719,7 @@ static int nfs4_xdr_enc_exchange_id(stru
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2413,7 +2737,7 @@ static int nfs4_xdr_enc_create_session(s
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2431,7 +2755,7 @@ static int nfs4_xdr_enc_destroy_session(
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = session->clp->cl_minorversion,
++		.minorversion = session->clp->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2469,7 +2793,7 @@ static int nfs4_xdr_enc_get_lease_time(s
+ 	struct compound_hdr hdr = {
+ 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2499,6 +2823,159 @@ static int nfs4_xdr_enc_reclaim_complete
+ 	return 0;
+ }
+ 
++/*
++ * Encode GETDEVICELIST request
++ */
++static int
++nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, uint32_t *p,
++			   struct nfs4_pnfs_getdevicelist_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_getdevicelist(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode GETDEVICEINFO request
++ */
++static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
++				      struct nfs4_pnfs_getdeviceinfo_arg *args)
++{
++	struct xdr_stream xdr;
++	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++	int replen;
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_getdeviceinfo(&xdr, args, &hdr);
++
++	/* set up reply kvec. Subtract notification bitmap max size (8)
++	 * so that notification bitmap is put in xdr_buf tail */
++	replen = (RPC_REPHDRSIZE + auth->au_rslack +
++		  NFS4_dec_getdeviceinfo_sz - 8) << 2;
++	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pdev->pages,
++			 args->pdev->pgbase, args->pdev->pglen);
++	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
++		__func__, replen, args->pdev->pages,
++		args->pdev->pgbase, args->pdev->pglen);
++
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTGET request
++ */
++static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
++				  struct nfs4_pnfs_layoutget_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutget(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTCOMMIT request
++ */
++static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, uint32_t *p,
++				     struct pnfs_layoutcommit_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_layoutcommit(&xdr, args, &hdr);
++	encode_getfattr(&xdr, args->bitmask, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode LAYOUTRETURN request
++ */
++static int nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_pnfs_layoutreturn_arg *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutreturn(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server WRITE request
++ */
++static int nfs4_xdr_enc_dswrite(struct rpc_rqst *req, uint32_t *p,
++				struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_write(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server COMMIT request
++ */
++static int nfs4_xdr_enc_dscommit(struct rpc_rqst *req, uint32_t *p,
++				 struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_commit(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+@@ -2599,14 +3076,17 @@ static int decode_attr_bitmap(struct xdr
+ 		goto out_overflow;
+ 	bmlen = be32_to_cpup(p);
+ 
+-	bitmap[0] = bitmap[1] = 0;
++	bitmap[0] = bitmap[1] = bitmap[2] = 0;
+ 	p = xdr_inline_decode(xdr, (bmlen << 2));
+ 	if (unlikely(!p))
+ 		goto out_overflow;
+ 	if (bmlen > 0) {
+ 		bitmap[0] = be32_to_cpup(p++);
+-		if (bmlen > 1)
+-			bitmap[1] = be32_to_cpup(p);
++		if (bmlen > 1) {
++			bitmap[1] = be32_to_cpup(p++);
++			if (bmlen > 2)
++				bitmap[2] = be32_to_cpup(p);
++		}
+ 	}
+ 	return 0;
+ out_overflow:
+@@ -2635,8 +3115,9 @@ static int decode_attr_supported(struct 
+ 		decode_attr_bitmap(xdr, bitmask);
+ 		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+ 	} else
+-		bitmask[0] = bitmask[1] = 0;
+-	dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
++		bitmask[0] = bitmask[1] = bitmask[2] = 0;
++	dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__,
++		bitmask[0], bitmask[1], bitmask[2]);
+ 	return 0;
+ }
+ 
+@@ -3565,7 +4046,7 @@ static int decode_opaque_fixed(struct xd
+ 
+ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+ {
+-	return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
++	return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+@@ -3621,7 +4102,7 @@ out_overflow:
+ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3647,7 +4128,7 @@ xdr_error:
+ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3679,7 +4160,7 @@ xdr_error:
+ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3705,7 +4186,7 @@ static int decode_getfattr(struct xdr_st
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0},
++		 bitmap[3] = {0},
+ 		 type;
+ 	int status;
+ 	umode_t fmode = 0;
+@@ -3824,24 +4305,101 @@ xdr_error:
+ 	return status;
+ }
+ 
+-
+-static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * Decode potentially multiple layout types. Currently we only support
++ * one layout driver per file system.
++ */
++static int decode_pnfs_list(struct xdr_stream *xdr, uint32_t *layoutclass)
+ {
+-	__be32 *savep;
+-	uint32_t attrlen, bitmap[2];
+-	int status;
++	uint32_t *p;
++	int num;
+ 
+-	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+-		goto xdr_error;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	num = be32_to_cpup(p);
+ 
+-	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++	/* pNFS is not supported by the underlying file system */
++	if (num == 0) {
++		*layoutclass = 0;
++		return 0;
++	}
+ 
+-	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+-		goto xdr_error;
++	/* TODO: We will eventually support multiple layout drivers ? */
++	if (num > 1)
++		printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
++			"per filesystem not supported\n", __func__);
++
++	/* Decode and set first layout type */
++	p = xdr_inline_decode(xdr, num * 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	*layoutclass = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++/*
++ * The type of file system exported
++ */
++static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
++				uint32_t *layoutclass)
++{
++	int status = 0;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
++	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
++		return -EIO;
++	if (likely(bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES)) {
++		status = decode_pnfs_list(xdr, layoutclass);
++		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
++	}
++	return status;
++}
++
++/*
++ * The prefered block size for layout directed io
++ */
++static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
++				      uint32_t *res)
++{
++	__be32 *p;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
++	*res = 0;
++	if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p)) {
++			print_overflow_msg(__func__, xdr);
++			return -EIO;
++		}
++		*res = be32_to_cpup(p);
++		bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
++	}
++	return 0;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
++static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++{
++	__be32 *savep;
++	uint32_t attrlen, bitmap[3];
++	int status;
++
++	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
++		goto xdr_error;
++
++	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++
++	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
++		goto xdr_error;
+ 	if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+ 		goto xdr_error;
+ 	if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+@@ -3850,6 +4408,14 @@ static int decode_fsinfo(struct xdr_stre
+ 	if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+ 		goto xdr_error;
+ 	fsinfo->wtpref = fsinfo->wtmax;
++#if defined(CONFIG_NFS_V4_1)
++	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
++	if (status)
++		goto xdr_error;
++	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
++	if (status)
++		goto xdr_error;
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ 	status = verify_attr_len(xdr, savep, attrlen);
+ xdr_error:
+@@ -3973,6 +4539,11 @@ static int decode_locku(struct xdr_strea
+ 	return status;
+ }
+ 
++static int decode_release_lockowner(struct xdr_stream *xdr)
++{
++	return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
++}
++
+ static int decode_lookup(struct xdr_stream *xdr)
+ {
+ 	return decode_op_hdr(xdr, OP_LOOKUP);
+@@ -4333,7 +4904,7 @@ static int decode_getacl(struct xdr_stre
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0};
++		 bitmap[3] = {0};
+ 	struct kvec *iov = req->rq_rcv_buf.head;
+ 	int status;
+ 
+@@ -4682,6 +5253,226 @@ out_overflow:
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * TODO: Need to handle case when EOF != true;
++ */
++static int decode_getdevicelist(struct xdr_stream *xdr,
++				struct pnfs_devicelist *res)
++{
++	__be32 *p;
++	int status, i;
++	struct nfs_writeverf verftemp;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 8 + 8 + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++
++	/* TODO: Skip cookie for now */
++	p += 2;
++
++	/* Read verifier */
++	p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
++
++	res->num_devs = be32_to_cpup(p);
++
++	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
++
++	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM)
++		return -NFS4ERR_REP_TOO_BIG;
++
++	p = xdr_inline_decode(xdr,
++			      res->num_devs * NFS4_PNFS_DEVICEID4_SIZE + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	for (i = 0; i < res->num_devs; i++)
++		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
++					    NFS4_PNFS_DEVICEID4_SIZE);
++	res->eof = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_getdeviceinfo(struct xdr_stream *xdr,
++				struct pnfs_device *pdev)
++{
++	__be32 *p;
++	uint32_t len, type;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
++	if (status) {
++		if (status == -ETOOSMALL) {
++			p = xdr_inline_decode(xdr, 4);
++			if (unlikely(!p))
++				goto out_overflow;
++			pdev->mincount = be32_to_cpup(p);
++			dprintk("%s: Min count too small. mincnt = %u\n",
++				__func__, pdev->mincount);
++		}
++		return status;
++	}
++
++	p = xdr_inline_decode(xdr, 8);
++	if (unlikely(!p))
++		goto out_overflow;
++	type = be32_to_cpup(p++);
++	if (type != pdev->layout_type) {
++		dprintk("%s: layout mismatch req: %u pdev: %u\n",
++			__func__, pdev->layout_type, type);
++		return -EINVAL;
++	}
++	/*
++	 * Get the length of the opaque device_addr4. xdr_read_pages places
++	 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
++	 * and places the remaining xdr data in xdr_buf->tail
++	 */
++	pdev->mincount = be32_to_cpup(p);
++	xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
++
++	/* At most one bitmap word */
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	len = be32_to_cpup(p);
++	if (len) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p))
++			goto out_overflow;
++		pdev->dev_notify_types = be32_to_cpup(p);
++	} else
++		pdev->dev_notify_types = 0;
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
++			    struct nfs4_pnfs_layoutget_res *res)
++{
++	__be32 *p;
++	int status;
++	u32 layout_count, dummy;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTGET);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->return_on_close = be32_to_cpup(p++);
++	p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
++	layout_count = be32_to_cpup(p);
++	if (!layout_count) {
++		dprintk("%s: server responded with empty layout array\n",
++			__func__);
++		return -EINVAL;
++	}
++
++	p = xdr_inline_decode(xdr, 24);
++	if (unlikely(!p))
++		goto out_overflow;
++	p = xdr_decode_hyper(p, &res->lseg.offset);
++	p = xdr_decode_hyper(p, &res->lseg.length);
++	res->lseg.iomode = be32_to_cpup(p++);
++	res->type = be32_to_cpup(p++);
++
++	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
++	if (unlikely(status))
++		return status;
++
++	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
++		__func__,
++		(unsigned long)res->lseg.offset,
++		(unsigned long)res->lseg.length,
++		res->lseg.iomode,
++		res->type,
++		res->layout.len);
++
++	/* presuambly, pnfs4_proc_layoutget allocated a single page */
++	if (res->layout.len > PAGE_SIZE)
++		return -ENOMEM;
++	memcpy(res->layout.buf, p, res->layout.len);
++
++	/* FIXME: the whole layout array should be passed up to the pnfs
++	 * client */
++	if (layout_count > 1) {
++		dprintk("%s: server responded with %d layouts, dropping tail\n",
++			__func__, layout_count);
++
++		while (--layout_count) {
++			p = xdr_inline_decode(xdr, 24);
++			if (unlikely(!p))
++				goto out_overflow;
++			status = decode_opaque_inline(xdr, &dummy, (char **)&p);
++			if (unlikely(status))
++				return status;
++		}
++	}
++
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutreturn(struct xdr_stream *xdr,
++			       struct nfs4_pnfs_layoutreturn_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->lrs_present = be32_to_cpup(p);
++	if (res->lrs_present)
++		status = decode_stateid(xdr, &res->stateid);
++	return status;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutcommit(struct xdr_stream *xdr,
++				    struct rpc_rqst *req,
++				    struct pnfs_layoutcommit_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->sizechanged = be32_to_cpup(p);
++
++	if (res->sizechanged) {
++		p = xdr_inline_decode(xdr, 8);
++		if (unlikely(!p))
++			goto out_overflow;
++		xdr_decode_hyper(p, &res->newsize);
++	}
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" DECODE ROUTINES.
+  */
+@@ -5259,6 +6050,19 @@ out:
+ 	return status;
+ }
+ 
++static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (!status)
++		status = decode_release_lockowner(&xdr);
++	return status;
++}
++
+ /*
+  * Decode READLINK response
+  */
+@@ -5696,6 +6500,186 @@ static int nfs4_xdr_dec_reclaim_complete
+ 		status = decode_reclaim_complete(&xdr, (void *)NULL);
+ 	return status;
+ }
++
++/*
++ * Decode GETDEVICELIST response
++ */
++static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_pnfs_getdevicelist_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	dprintk("encoding getdevicelist!\n");
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status != 0)
++		goto out;
++	status = decode_getdevicelist(&xdr, res->devlist);
++out:
++	return status;
++}
++
++/*
++ * Decode GETDEVINFO response
++ */
++static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_pnfs_getdeviceinfo_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_getdeviceinfo(&xdr, res->pdev);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTGET response
++ */
++static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
++				  struct nfs4_pnfs_layoutget_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutget(&xdr, rqstp, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTRETURN response
++ */
++static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_pnfs_layoutreturn_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutreturn(&xdr, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTCOMMIT response
++ */
++static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct pnfs_layoutcommit_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutcommit(&xdr, rqstp, res);
++	if (status)
++		goto out;
++	decode_getfattr(&xdr, res->fattr, res->server,
++			!RPC_IS_ASYNC(rqstp->rq_task));
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server WRITE response
++ */
++static int nfs4_xdr_dec_dswrite(struct rpc_rqst *rqstp, uint32_t *p,
++				struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_write(&xdr, res);
++	if (!status)
++		return res->count;
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server COMMIT response
++ */
++static int nfs4_xdr_dec_dscommit(struct rpc_rqst *rqstp, uint32_t *p,
++				 struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_commit(&xdr, res);
++out:
++	return status;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+@@ -5866,6 +6850,7 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(GETACL,		enc_getacl,	dec_getacl),
+   PROC(SETACL,		enc_setacl,	dec_setacl),
+   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
++  PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ #if defined(CONFIG_NFS_V4_1)
+   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+   PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
+@@ -5873,6 +6858,13 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
+   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+   PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
++  PROC(PNFS_GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
++  PROC(PNFS_GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
++  PROC(PNFS_LAYOUTGET,  enc_layoutget,     dec_layoutget),
++  PROC(PNFS_LAYOUTCOMMIT, enc_layoutcommit,  dec_layoutcommit),
++  PROC(PNFS_LAYOUTRETURN, enc_layoutreturn,  dec_layoutreturn),
++  PROC(PNFS_WRITE, enc_dswrite,  dec_dswrite),
++  PROC(PNFS_COMMIT, enc_dscommit,  dec_dscommit),
+ #endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
+@@ -0,0 +1,11 @@
++#
++# Makefile for the pNFS Objects Layout Driver kernel module
++#
++objlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o objio_osd.o
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
++
++#
++# Panasas pNFS Layout Driver kernel module
++#
++panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
++obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
+@@ -0,0 +1,1087 @@
++/*
++ *  objio_osd.c
++ *
++ *  pNFS Objects layout implementation over open-osd initiator library
++ *
++ *  Copyright (C) 2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bharrosh@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <scsi/scsi_device.h>
++#include <scsi/osd_attributes.h>
++#include <scsi/osd_initiator.h>
++#include <scsi/osd_sec.h>
++#include <scsi/osd_sense.h>
++
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++#define _LLU(x) ((unsigned long long)x)
++
++enum { BIO_MAX_PAGES_KMALLOC =
++		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
++};
++
++/* A per mountpoint struct currently for device cache */
++struct objio_mount_type {
++	struct list_head dev_list;
++	spinlock_t dev_list_lock;
++};
++
++struct _dev_ent {
++	struct list_head list;
++	struct pnfs_deviceid d_id;
++	struct osd_dev *od;
++};
++
++static void _dev_list_remove_all(struct objio_mount_type *omt)
++{
++	spin_lock(&omt->dev_list_lock);
++
++	while (!list_empty(&omt->dev_list)) {
++		struct _dev_ent *de = list_entry(omt->dev_list.next,
++				 struct _dev_ent, list);
++
++		list_del_init(&de->list);
++		osduld_put_device(de->od);
++		kfree(de);
++	}
++
++	spin_unlock(&omt->dev_list_lock);
++}
++
++static struct osd_dev *___dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct list_head *le;
++
++	list_for_each(le, &omt->dev_list) {
++		struct _dev_ent *de = list_entry(le, struct _dev_ent, list);
++
++		if (0 == memcmp(&de->d_id, d_id, sizeof(*d_id)))
++			return de->od;
++	}
++
++	return NULL;
++}
++
++static struct osd_dev *_dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct osd_dev *od;
++
++	spin_lock(&omt->dev_list_lock);
++	od = ___dev_list_find(omt, d_id);
++	spin_unlock(&omt->dev_list_lock);
++	return od;
++}
++
++static int _dev_list_add(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id, struct osd_dev *od)
++{
++	struct _dev_ent *de = kzalloc(sizeof(*de), GFP_KERNEL);
++
++	if (!de)
++		return -ENOMEM;
++
++	spin_lock(&omt->dev_list_lock);
++
++	if (___dev_list_find(omt, d_id)) {
++		kfree(de);
++		goto out;
++	}
++
++	de->d_id = *d_id;
++	de->od = od;
++	list_add(&de->list, &omt->dev_list);
++
++out:
++	spin_unlock(&omt->dev_list_lock);
++	return 0;
++}
++
++struct objio_segment {
++	struct pnfs_osd_layout *layout;
++
++	unsigned mirrors_p1;
++	unsigned stripe_unit;
++	unsigned group_width;	/* Data stripe_units without integrity comps */
++	u64 group_depth;
++	unsigned group_count;
++
++	unsigned num_comps;
++	/* variable length */
++	struct osd_dev	*ods[1];
++};
++
++struct objio_state;
++typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
++
++struct objio_state {
++	/* Generic layer */
++	struct objlayout_io_state ol_state;
++
++	struct objio_segment *objio_seg;
++
++	struct kref kref;
++	objio_done_fn done;
++	void *private;
++
++	unsigned long length;
++	unsigned numdevs; /* Actually used devs in this IO */
++	/* A per-device variable array of size numdevs */
++	struct _objio_per_comp {
++		struct bio *bio;
++		struct osd_request *or;
++		unsigned long length;
++		u64 offset;
++		unsigned dev;
++	} per_dev[];
++};
++
++/* Send and wait for a get_device_info of devices in the layout,
++   then look them up with the osd_initiator library */
++static struct osd_dev *_device_lookup(struct pnfs_layout_type *pnfslay,
++			       struct objio_segment *objio_seg, unsigned comp)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	struct pnfs_osd_deviceaddr *deviceaddr;
++	struct pnfs_deviceid *d_id;
++	struct osd_dev *od;
++	struct osd_dev_info odi;
++	struct objio_mount_type *omt = PNFS_NFS_SERVER(pnfslay)->pnfs_ld_data;
++	int err;
++
++	d_id = &layout->olo_comps[comp].oc_object_id.oid_device_id;
++
++	od = _dev_list_find(omt, d_id);
++	if (od)
++		return od;
++
++	err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr);
++	if (unlikely(err)) {
++		dprintk("%s: objlayout_get_deviceinfo=>%d\n", __func__, err);
++		return ERR_PTR(err);
++	}
++
++	odi.systemid_len = deviceaddr->oda_systemid.len;
++	if (odi.systemid_len > sizeof(odi.systemid)) {
++		err = -EINVAL;
++		goto out;
++	} else if (odi.systemid_len)
++		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
++		       odi.systemid_len);
++	odi.osdname_len	 = deviceaddr->oda_osdname.len;
++	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
++
++	if (!odi.osdname_len && !odi.systemid_len) {
++		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
++			__func__);
++		err = -ENODEV;
++		goto out;
++	}
++
++	od = osduld_info_lookup(&odi);
++	if (unlikely(IS_ERR(od))) {
++		err = PTR_ERR(od);
++		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
++		goto out;
++	}
++
++	_dev_list_add(omt, d_id, od);
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	objlayout_put_deviceinfo(deviceaddr);
++	return err ? ERR_PTR(err) : od;
++}
++
++static int objio_devices_lookup(struct pnfs_layout_type *pnfslay,
++	struct objio_segment *objio_seg)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	unsigned i, num_comps = layout->olo_num_comps;
++	int err;
++
++	/* lookup all devices */
++	for (i = 0; i < num_comps; i++) {
++		struct osd_dev *od;
++
++		od = _device_lookup(pnfslay, objio_seg, i);
++		if (unlikely(IS_ERR(od))) {
++			err = PTR_ERR(od);
++			goto out;
++		}
++		objio_seg->ods[i] = od;
++	}
++	objio_seg->num_comps = num_comps;
++	err = 0;
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	return err;
++}
++
++static int _verify_data_map(struct pnfs_osd_layout *layout)
++{
++	struct pnfs_osd_data_map *data_map = &layout->olo_map;
++	u64 stripe_length;
++	u32 group_width;
++
++/* FIXME: Only raid0 for now. if not go through MDS */
++	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
++		printk(KERN_ERR "Only RAID_0 for now\n");
++		return -ENOTSUPP;
++	}
++	if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
++		printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
++			  data_map->odm_num_comps, data_map->odm_mirror_cnt);
++		return -EINVAL;
++	}
++
++	if (data_map->odm_group_width)
++		group_width = data_map->odm_group_width;
++	else
++		group_width = data_map->odm_num_comps /
++						(data_map->odm_mirror_cnt + 1);
++
++	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
++	if (stripe_length >= (1ULL << 32)) {
++		printk(KERN_ERR "Total Stripe length(0x%llx)"
++			  " >= 32bit is not supported\n", _LLU(stripe_length));
++		return -ENOTSUPP;
++	}
++
++	if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
++		printk(KERN_ERR "Stripe Unit(0x%llx)"
++			  " must be Multples of PAGE_SIZE(0x%lx)\n",
++			  _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
++		return -ENOTSUPP;
++	}
++
++	return 0;
++}
++
++int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	struct objio_segment *objio_seg;
++	int err;
++
++	err = _verify_data_map(layout);
++	if (unlikely(err))
++		return err;
++
++	objio_seg = kzalloc(sizeof(*objio_seg) +
++			(layout->olo_num_comps - 1) * sizeof(objio_seg->ods[0]),
++			GFP_KERNEL);
++	if (!objio_seg)
++		return -ENOMEM;
++
++	objio_seg->layout = layout;
++	err = objio_devices_lookup(pnfslay, objio_seg);
++	if (err)
++		goto free_seg;
++
++	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
++	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
++	if (layout->olo_map.odm_group_width) {
++		objio_seg->group_width = layout->olo_map.odm_group_width;
++		objio_seg->group_depth = layout->olo_map.odm_group_depth;
++		objio_seg->group_count = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1 /
++						objio_seg->group_width;
++	} else {
++		objio_seg->group_width = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1;
++		objio_seg->group_depth = -1;
++		objio_seg->group_count = 1;
++	}
++
++	*outp = objio_seg;
++	return 0;
++
++free_seg:
++	dprintk("%s: Error: return %d\n", __func__, err);
++	kfree(objio_seg);
++	*outp = NULL;
++	return err;
++}
++
++void objio_free_lseg(void *p)
++{
++	struct objio_segment *objio_seg = p;
++
++	kfree(objio_seg);
++}
++
++int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct objio_segment *objio_seg = seg;
++	struct objio_state *ios;
++	const unsigned first_size = sizeof(*ios) +
++				objio_seg->num_comps * sizeof(ios->per_dev[0]);
++	const unsigned sec_size = objio_seg->num_comps *
++						sizeof(ios->ol_state.ioerrs[0]);
++
++	dprintk("%s: num_comps=%d\n", __func__, objio_seg->num_comps);
++	ios = kzalloc(first_size + sec_size, GFP_KERNEL);
++	if (unlikely(!ios))
++		return -ENOMEM;
++
++	ios->objio_seg = objio_seg;
++	ios->ol_state.ioerrs = ((void *)ios) + first_size;
++	ios->ol_state.num_comps = objio_seg->num_comps;
++
++	*outp = &ios->ol_state;
++	return 0;
++}
++
++void objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++
++	kfree(ios);
++}
++
++enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
++{
++	switch (oep) {
++	case OSD_ERR_PRI_NO_ERROR:
++		return (enum pnfs_osd_errno)0;
++
++	case OSD_ERR_PRI_CLEAR_PAGES:
++		BUG_ON(1);
++		return 0;
++
++	case OSD_ERR_PRI_RESOURCE:
++		return PNFS_OSD_ERR_RESOURCE;
++	case OSD_ERR_PRI_BAD_CRED:
++		return PNFS_OSD_ERR_BAD_CRED;
++	case OSD_ERR_PRI_NO_ACCESS:
++		return PNFS_OSD_ERR_NO_ACCESS;
++	case OSD_ERR_PRI_UNREACHABLE:
++		return PNFS_OSD_ERR_UNREACHABLE;
++	case OSD_ERR_PRI_NOT_FOUND:
++		return PNFS_OSD_ERR_NOT_FOUND;
++	case OSD_ERR_PRI_NO_SPACE:
++		return PNFS_OSD_ERR_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case OSD_ERR_PRI_EIO:
++		return PNFS_OSD_ERR_EIO;
++	}
++}
++
++static void _clear_bio(struct bio *bio)
++{
++	struct bio_vec *bv;
++	unsigned i;
++
++	__bio_for_each_segment(bv, bio, i, 0) {
++		unsigned this_count = bv->bv_len;
++
++		if (likely(PAGE_SIZE == this_count))
++			clear_highpage(bv->bv_page);
++		else
++			zero_user(bv->bv_page, bv->bv_offset, this_count);
++	}
++}
++
++static int _io_check(struct objio_state *ios, bool is_write)
++{
++	enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
++	int lin_ret = 0;
++	int i;
++
++	for (i = 0; i <  ios->numdevs; i++) {
++		struct osd_sense_info osi;
++		struct osd_request *or = ios->per_dev[i].or;
++		int ret;
++
++		if (!or)
++			continue;
++
++		ret = osd_req_decode_sense(or, &osi);
++		if (likely(!ret))
++			continue;
++
++		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
++			/* start read offset passed endof file */
++			BUG_ON(is_write);
++			_clear_bio(ios->per_dev[i].bio);
++			dprintk("%s: start read offset passed end of file "
++				"offset=0x%llx, length=0x%lx\n", __func__,
++				_LLU(ios->per_dev[i].offset),
++				ios->per_dev[i].length);
++
++			continue; /* we recovered */
++		}
++		objlayout_io_set_result(&ios->ol_state, ios->per_dev[i].dev,
++					osd_pri_2_pnfs_err(osi.osd_err_pri),
++					ios->per_dev[i].offset,
++					ios->per_dev[i].length,
++					is_write);
++
++		if (osi.osd_err_pri >= oep) {
++			oep = osi.osd_err_pri;
++			lin_ret = ret;
++		}
++	}
++
++	return lin_ret;
++}
++
++/*
++ * Common IO state helpers.
++ */
++static void _io_free(struct objio_state *ios)
++{
++	unsigned i;
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[i];
++
++		if (per_dev->or) {
++			osd_end_request(per_dev->or);
++			per_dev->or = NULL;
++		}
++
++		if (per_dev->bio) {
++			bio_put(per_dev->bio);
++			per_dev->bio = NULL;
++		}
++	}
++}
++
++struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
++{
++	unsigned min_dev = ios->objio_seg->layout->olo_comps_index;
++	unsigned max_dev = min_dev + ios->ol_state.num_comps;
++
++	BUG_ON(dev < min_dev || max_dev <= dev);
++	return ios->objio_seg->ods[dev - min_dev];
++}
++
++struct _striping_info {
++	u64 obj_offset;
++	u64 group_length;
++	u64 total_group_length;
++	u64 Major;
++	unsigned dev;
++	unsigned unit_off;
++};
++
++static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
++			      struct _striping_info *si)
++{
++	u32	stripe_unit = ios->objio_seg->stripe_unit;
++	u32	group_width = ios->objio_seg->group_width;
++	u64	group_depth = ios->objio_seg->group_depth;
++	u32	U = stripe_unit * group_width;
++
++	u64	T = U * group_depth;
++	u64	S = T * ios->objio_seg->group_count;
++	u64	M = div64_u64(file_offset, S);
++
++	/*
++	G = (L - (M * S)) / T
++	H = (L - (M * S)) % T
++	*/
++	u64	LmodU = file_offset - M * S;
++	u32	G = div64_u64(LmodU, T);
++	u64	H = LmodU - G * T;
++
++	u32	N = div_u64(H, U);
++
++	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
++	si->obj_offset = si->unit_off + (N * stripe_unit) +
++				  (M * group_depth * stripe_unit);
++
++	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
++	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
++	si->dev *= ios->objio_seg->mirrors_p1;
++
++	si->group_length = T - H;
++	si->total_group_length = T;
++	si->Major = M;
++}
++
++static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
++		unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len)
++{
++	unsigned pg = *cur_pg;
++	struct request_queue *q =
++			osd_request_queue(_io_od(ios, per_dev->dev));
++
++	per_dev->length += cur_len;
++
++	if (per_dev->bio == NULL) {
++		unsigned stripes = ios->ol_state.num_comps /
++						     ios->objio_seg->mirrors_p1;
++		unsigned pages_in_stripe = stripes *
++				      (ios->objio_seg->stripe_unit / PAGE_SIZE);
++		unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
++				    stripes;
++
++		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
++		if (unlikely(!per_dev->bio)) {
++			dprintk("Faild to allocate BIO size=%u\n", bio_size);
++			return -ENOMEM;
++		}
++	}
++
++	while (cur_len > 0) {
++		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
++		unsigned added_len;
++
++		BUG_ON(ios->ol_state.nr_pages <= pg);
++		cur_len -= pglen;
++
++		added_len = bio_add_pc_page(q, per_dev->bio,
++					ios->ol_state.pages[pg], pglen, pgbase);
++		if (unlikely(pglen != added_len))
++			return -ENOMEM;
++		pgbase = 0;
++		++pg;
++	}
++	BUG_ON(cur_len);
++
++	*cur_pg = pg;
++	return 0;
++}
++
++static int _prepare_one_group(struct objio_state *ios, u64 length,
++			      struct _striping_info *si, unsigned first_comp,
++			      unsigned *last_pg)
++{
++	unsigned stripe_unit = ios->objio_seg->stripe_unit;
++	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
++	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
++	unsigned dev = si->dev;
++	unsigned first_dev = dev - (dev % devs_in_group);
++	unsigned comp = first_comp + (dev - first_dev);
++	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
++	unsigned cur_pg = *last_pg;
++	int ret = 0;
++
++	while (length) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[comp];
++		unsigned cur_len, page_off = 0;
++
++		if (!per_dev->length) {
++			per_dev->dev = dev;
++			if (dev < si->dev) {
++				per_dev->offset = si->obj_offset + stripe_unit -
++								   si->unit_off;
++				cur_len = stripe_unit;
++			} else if (dev == si->dev) {
++				per_dev->offset = si->obj_offset;
++				cur_len = stripe_unit - si->unit_off;
++				page_off = si->unit_off & ~PAGE_MASK;
++				BUG_ON(page_off &&
++				      (page_off != ios->ol_state.pgbase));
++			} else { /* dev > si->dev */
++				per_dev->offset = si->obj_offset - si->unit_off;
++				cur_len = stripe_unit;
++			}
++
++			if (max_comp < comp)
++				max_comp = comp;
++
++			dev += mirrors_p1;
++			dev = (dev % devs_in_group) + first_dev;
++		} else {
++			cur_len = stripe_unit;
++		}
++		if (cur_len >= length)
++			cur_len = length;
++
++		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
++				       cur_len);
++		if (unlikely(ret))
++			goto out;
++
++		comp += mirrors_p1;
++		comp = (comp % devs_in_group) + first_comp;
++
++		length -= cur_len;
++		ios->length += cur_len;
++	}
++out:
++	ios->numdevs = max_comp + mirrors_p1;
++	*last_pg = cur_pg;
++	return ret;
++}
++
++static int _io_rw_pagelist(struct objio_state *ios)
++{
++	u64 length = ios->ol_state.count;
++	struct _striping_info si;
++	unsigned devs_in_group = ios->objio_seg->group_width *
++				 ios->objio_seg->mirrors_p1;
++	unsigned first_comp = 0;
++	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
++	unsigned last_pg = 0;
++	int ret = 0;
++
++	_calc_stripe_info(ios, ios->ol_state.offset, &si);
++	while (length) {
++		if (length < si.group_length)
++			si.group_length = length;
++
++		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
++					 &last_pg);
++		if (unlikely(ret))
++			goto out;
++
++		length -= si.group_length;
++
++		si.group_length = si.total_group_length;
++		si.unit_off = 0;
++		++si.Major;
++		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
++						ios->objio_seg->group_depth;
++
++		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
++		si.dev %= num_comps;
++
++		first_comp += devs_in_group;
++		first_comp %= num_comps;
++	}
++
++out:
++	if (!ios->length)
++		return ret;
++
++	return 0;
++}
++
++static ssize_t _sync_done(struct objio_state *ios)
++{
++	struct completion *waiting = ios->private;
++
++	complete(waiting);
++	return 0;
++}
++
++static void _last_io(struct kref *kref)
++{
++	struct objio_state *ios = container_of(kref, struct objio_state, kref);
++
++	ios->done(ios);
++}
++
++static void _done_io(struct osd_request *or, void *p)
++{
++	struct objio_state *ios = p;
++
++	kref_put(&ios->kref, _last_io);
++}
++
++static ssize_t _io_exec(struct objio_state *ios)
++{
++	DECLARE_COMPLETION_ONSTACK(wait);
++	ssize_t status = 0; /* sync status */
++	unsigned i;
++	objio_done_fn saved_done_fn = ios->done;
++	bool sync = ios->ol_state.sync;
++
++	if (sync) {
++		ios->done = _sync_done;
++		ios->private = &wait;
++	}
++
++	kref_init(&ios->kref);
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct osd_request *or = ios->per_dev[i].or;
++
++		if (!or)
++			continue;
++
++		kref_get(&ios->kref);
++		osd_execute_request_async(or, _done_io, ios);
++	}
++
++	kref_put(&ios->kref, _last_io);
++
++	if (sync) {
++		wait_for_completion(&wait);
++		status = saved_done_fn(ios);
++	}
++
++	return status;
++}
++
++/*
++ * read
++ */
++static ssize_t _read_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, false);
++
++	_io_free(ios);
++
++	if (likely(!ret))
++		status = ios->length;
++	else
++		status = ret;
++
++	objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct osd_request *or = NULL;
++	struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++	unsigned dev = per_dev->dev;
++	struct pnfs_osd_object_cred *cred =
++			&ios->objio_seg->layout->olo_comps[dev];
++	struct osd_obj_id obj = {
++		.partition = cred->oc_object_id.oid_partition_id,
++		.id = cred->oc_object_id.oid_object_id,
++	};
++	int ret;
++
++	or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++	if (unlikely(!or)) {
++		ret = -ENOMEM;
++		goto err;
++	}
++	per_dev->or = or;
++
++	osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
++
++	ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++	if (ret) {
++		dprintk("%s: Faild to osd_finalize_request() => %d\n",
++			__func__, ret);
++		goto err;
++	}
++
++	dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++		__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++		per_dev->length);
++
++err:
++	return ret;
++}
++
++static ssize_t _read_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _read_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _read_done;
++	return _io_exec(ios); /* In sync mode exec returns the io status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _read_exec(ios);
++}
++
++/*
++ * write
++ */
++static ssize_t _write_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, true);
++
++	_io_free(ios);
++
++	if (likely(!ret)) {
++		/* FIXME: should be based on the OSD's persistence model
++		 * See OSD2r05 Section 4.13 Data persistence model */
++		ios->ol_state.committed = NFS_UNSTABLE; //NFS_FILE_SYNC;
++		status = ios->length;
++	} else {
++		status = ret;
++	}
++
++	objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
++	unsigned dev = ios->per_dev[cur_comp].dev;
++	unsigned last_comp = cur_comp + ios->objio_seg->mirrors_p1;
++	int ret;
++
++	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
++		struct osd_request *or = NULL;
++		struct pnfs_osd_object_cred *cred =
++					&ios->objio_seg->layout->olo_comps[dev];
++		struct osd_obj_id obj = {
++			.partition = cred->oc_object_id.oid_partition_id,
++			.id = cred->oc_object_id.oid_object_id,
++		};
++		struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++		struct bio *bio;
++
++		or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++		if (unlikely(!or)) {
++			ret = -ENOMEM;
++			goto err;
++		}
++		per_dev->or = or;
++
++		if (per_dev != master_dev) {
++			bio = bio_kmalloc(GFP_KERNEL,
++					  master_dev->bio->bi_max_vecs);
++			if (unlikely(!bio)) {
++				dprintk("Faild to allocate BIO size=%u\n",
++					master_dev->bio->bi_max_vecs);
++				ret = -ENOMEM;
++				goto err;
++			}
++
++			__bio_clone(bio, master_dev->bio);
++			bio->bi_bdev = NULL;
++			bio->bi_next = NULL;
++			per_dev->bio = bio;
++			per_dev->dev = dev;
++			per_dev->length = master_dev->length;
++			per_dev->offset =  master_dev->offset;
++		} else {
++			bio = master_dev->bio;
++			/* FIXME: bio_set_dir() */
++			bio->bi_rw |= (1 << BIO_RW);
++		}
++
++		osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
++
++		ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++		if (ret) {
++			dprintk("%s: Faild to osd_finalize_request() => %d\n",
++				__func__, ret);
++			goto err;
++		}
++
++		dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++			__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++			per_dev->length);
++	}
++
++err:
++	return ret;
++}
++
++static ssize_t _write_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _write_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _write_done;
++	return _io_exec(ios); /* In sync mode exec returns the io->status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	/* TODO: ios->stable = stable; */
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _write_exec(ios);
++}
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++objlayout_get_stripesize(struct pnfs_layout_type *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zx\n", __func__, maxsz);
++	return maxsz;
++}
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++objlayout_get_blocksize(void)
++{
++	ssize_t sz = BIO_MAX_PAGES_KMALLOC * PAGE_SIZE;
++
++	return sz;
++}
++
++static struct layoutdriver_policy_operations objlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = objlayout_get_stripesize,
++	.get_blocksize         = objlayout_get_blocksize,
++};
++
++static struct pnfs_layoutdriver_type objlayout_type = {
++	.id = LAYOUT_OSD2_OBJECTS,
++	.name = "LAYOUT_OSD2_OBJECTS",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &objlayout_policy_operations,
++};
++
++void *objio_init_mt(void)
++{
++	struct objio_mount_type *omt = kzalloc(sizeof(*omt), GFP_KERNEL);
++
++	if (!omt)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&omt->dev_list);
++	spin_lock_init(&omt->dev_list_lock);
++	return omt;
++}
++
++void objio_fini_mt(void *mountid)
++{
++	_dev_list_remove_all(mountid);
++	kfree(mountid);
++}
++
++MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++objlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++objlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(objlayout_init);
++module_exit(objlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
+@@ -0,0 +1,790 @@
++/*
++ *  objlayout.c
++ *
++ *  pNFS layout driver for Panasas OSDs
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <scsi/osd_initiator.h>
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct pnfs_client_operations *pnfs_client_ops;
++
++/*
++ * Create a objlayout layout structure for the given inode and return it.
++ */
++static struct pnfs_layout_type *
++objlayout_alloc_layout(struct inode *inode)
++{
++	struct objlayout *objlay;
++
++	objlay = kzalloc(sizeof(struct objlayout), GFP_KERNEL);
++	if (objlay) {
++		spin_lock_init(&objlay->lock);
++		INIT_LIST_HEAD(&objlay->err_list);
++	}
++	dprintk("%s: Return %p\n", __func__, objlay);
++	return &objlay->pnfs_layout;
++}
++
++/*
++ * Free an objlayout layout structure
++ */
++static void
++objlayout_free_layout(struct pnfs_layout_type *lo)
++{
++	struct objlayout *objlay = OBJLAYOUT(lo);
++
++	dprintk("%s: objlay %p\n", __func__, objlay);
++
++	WARN_ON(!list_empty(&objlay->err_list));
++	kfree(objlay);
++}
++
++/*
++ * Unmarshall layout and store it in pnfslay.
++ */
++static struct pnfs_layout_segment *
++objlayout_alloc_lseg(struct pnfs_layout_type *pnfslay,
++		     struct nfs4_pnfs_layoutget_res *lgr)
++{
++	int status;
++	void *layout = lgr->layout.buf;
++	struct pnfs_layout_segment *lseg;
++	struct objlayout_segment *objlseg;
++	struct pnfs_osd_layout *pnfs_osd_layout;
++
++	dprintk("%s: Begin pnfslay %p layout %p\n", __func__, pnfslay, layout);
++
++	BUG_ON(!layout);
++
++	status = -ENOMEM;
++	lseg = kzalloc(sizeof(*lseg) + sizeof(*objlseg) +
++		       pnfs_osd_layout_incore_sz(layout), GFP_KERNEL);
++	if (!lseg)
++		goto err;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	pnfs_osd_layout = (struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++	pnfs_osd_xdr_decode_layout(pnfs_osd_layout, layout);
++
++	status = objio_alloc_lseg(&objlseg->internal, pnfslay, lseg,
++				  pnfs_osd_layout);
++	if (status)
++		goto err;
++
++	dprintk("%s: Return %p\n", __func__, lseg);
++	return lseg;
++
++ err:
++	kfree(lseg);
++	return ERR_PTR(status);
++}
++
++/*
++ * Free a layout segement
++ */
++static void
++objlayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	struct objlayout_segment *objlseg;
++
++	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
++
++	if (unlikely(!lseg))
++		return;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	objio_free_lseg(objlseg->internal);
++	kfree(lseg);
++}
++
++/*
++ * I/O Operations
++ */
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
++static struct objlayout_io_state *
++objlayout_alloc_io_state(struct pnfs_layout_type *pnfs_layout_type,
++			struct page **pages,
++			unsigned pgbase,
++			unsigned nr_pages,
++			loff_t offset,
++			size_t count,
++			struct pnfs_layout_segment *lseg,
++			void *rpcdata)
++{
++	struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++	struct objlayout_io_state *state;
++	u64 lseg_end_offset;
++	size_t size_nr_pages;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	if (objio_alloc_io_state(objlseg->internal, &state))
++		return NULL;
++
++	BUG_ON(offset < lseg->range.offset);
++	lseg_end_offset = end_offset(lseg->range.offset, lseg->range.length);
++	BUG_ON(offset >= lseg_end_offset);
++	if (offset + count > lseg_end_offset) {
++		count = lseg->range.length - (offset - lseg->range.offset);
++		dprintk("%s: truncated count %Zd\n", __func__, count);
++	}
++
++	if (pgbase > PAGE_SIZE) {
++		unsigned n = pgbase >> PAGE_SHIFT;
++
++		pgbase &= ~PAGE_MASK;
++		pages += n;
++		nr_pages -= n;
++	}
++
++	size_nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	BUG_ON(nr_pages < size_nr_pages);
++	if (nr_pages > size_nr_pages)
++		nr_pages = size_nr_pages;
++
++	INIT_LIST_HEAD(&state->err_list);
++	state->lseg = lseg;
++	state->rpcdata = rpcdata;
++	state->pages = pages;
++	state->pgbase = pgbase;
++	state->nr_pages = nr_pages;
++	state->offset = offset;
++	state->count = count;
++	state->sync = 0;
++
++	return state;
++}
++
++static void
++objlayout_free_io_state(struct objlayout_io_state *state)
++{
++	dprintk("%s: freeing io_state\n", __func__);
++	if (unlikely(!state))
++		return;
++
++	objio_free_io_state(state);
++}
++
++/*
++ * I/O done common code
++ */
++static void
++objlayout_iodone(struct objlayout_io_state *state)
++{
++	dprintk("%s: state %p status\n", __func__, state);
++
++	if (likely(state->status >= 0)) {
++		objlayout_free_io_state(state);
++	} else {
++		struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++		spin_lock(&objlay->lock);
++		objlay->delta_space_valid = OBJ_DSU_INVALID;
++		list_add(&objlay->err_list, &state->err_list);
++		spin_unlock(&objlay->lock);
++	}
++}
++
++/*
++ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
++ *
++ * The @index component IO failed (error returned from target). Register
++ * the error for later reporting at layout-return.
++ */
++void
++objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
++			int osd_error, u64 offset, u64 length, bool is_write)
++{
++	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
++
++	BUG_ON(index >= state->num_comps);
++	if (osd_error) {
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(state->lseg);
++		struct pnfs_osd_layout *layout =
++				(typeof(layout))objlseg->pnfs_osd_layout;
++
++		ioerr->oer_component = layout->olo_comps[index].oc_object_id;
++		ioerr->oer_comp_offset = offset;
++		ioerr->oer_comp_length = length;
++		ioerr->oer_iswrite = is_write;
++		ioerr->oer_errno = osd_error;
++
++		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
++			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
++			__func__, index, ioerr->oer_errno,
++			ioerr->oer_iswrite,
++			_DEVID_LO(&ioerr->oer_component.oid_device_id),
++			_DEVID_HI(&ioerr->oer_component.oid_device_id),
++			ioerr->oer_component.oid_partition_id,
++			ioerr->oer_component.oid_object_id,
++			ioerr->oer_comp_offset,
++			ioerr->oer_comp_length);
++	} else {
++		/* User need not call if no error is reported */
++		ioerr->oer_errno = 0;
++	}
++}
++
++static void _rpc_commit_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_commit_complete(wdata);
++}
++
++/*
++ * Commit data remotely on OSDs
++ */
++enum pnfs_try_status
++objlayout_commit(struct nfs_write_data *wdata, int how)
++{
++	int status = PNFS_ATTEMPTED;
++
++	INIT_WORK(&wdata->task.u.tk_work, _rpc_commit_complete);
++	schedule_work(&wdata->task.u.tk_work);
++	dprintk("%s: Return %d\n", __func__, status);
++	return status;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_read_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++
++	pnfs_client_ops->nfs_readlist_complete(rdata);
++}
++
++void
++objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
++{
++	int eof = state->eof;
++	struct nfs_read_data *rdata;
++
++	state->status = status;
++	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
++	rdata = state->rpcdata;
++	rdata->task.tk_status = status;
++	if (status >= 0) {
++		rdata->res.count = status;
++		rdata->res.eof = eof;
++	}
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_readlist_complete(rdata);
++	else {
++		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
++		schedule_work(&rdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async reads.
++ */
++enum pnfs_try_status
++objlayout_read_pagelist(struct nfs_read_data *rdata, unsigned nr_pages)
++{
++	loff_t offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct objlayout_io_state *state;
++	ssize_t status = 0;
++	loff_t eof;
++
++	dprintk("%s: Begin inode %p offset %llu count %d\n",
++		__func__, rdata->inode, offset, (int)count);
++
++	eof = i_size_read(rdata->inode);
++	if (unlikely(offset + count > eof)) {
++		if (offset >= eof) {
++			status = 0;
++			rdata->res.count = 0;
++			rdata->res.eof = 1;
++			goto out;
++		}
++		count = eof - offset;
++	}
++
++	state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
++					 rdata->args.pages, rdata->args.pgbase,
++					 nr_pages, offset, count,
++					 rdata->pdata.lseg, rdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->eof = state->offset + state->count >= eof;
++
++	status = objio_read_pagelist(state);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	rdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_write_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_writelist_complete(wdata);
++}
++
++void
++objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
++		     bool sync)
++{
++	struct nfs_write_data *wdata;
++
++	dprintk("%s: Begin\n", __func__);
++	wdata = state->rpcdata;
++	state->status = status;
++	wdata->task.tk_status = status;
++	if (status >= 0) {
++		wdata->res.count = status;
++		wdata->verf.committed = state->committed;
++		dprintk("%s: Return status %d committed %d\n",
++			__func__, wdata->task.tk_status,
++			wdata->verf.committed);
++	} else
++		dprintk("%s: Return status %d\n",
++			__func__, wdata->task.tk_status);
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_writelist_complete(wdata);
++	else {
++		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
++		schedule_work(&wdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async writes.
++ */
++enum pnfs_try_status
++objlayout_write_pagelist(struct nfs_write_data *wdata,
++			 unsigned nr_pages,
++			 int how)
++{
++	struct objlayout_io_state *state;
++	ssize_t status;
++
++	dprintk("%s: Begin inode %p offset %llu count %u\n",
++		__func__, wdata->inode, wdata->args.offset, wdata->args.count);
++
++	state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
++					 wdata->args.pages,
++					 wdata->args.pgbase,
++					 nr_pages,
++					 wdata->args.offset,
++					 wdata->args.count,
++					 wdata->pdata.lseg, wdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->sync = how & FLUSH_SYNC;
++
++	status = objio_write_pagelist(state, how & FLUSH_STABLE);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	wdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++void
++objlayout_encode_layoutcommit(struct pnfs_layout_type *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct pnfs_layoutcommit_arg *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct pnfs_osd_layoutupdate lou;
++	__be32 *start;
++
++	dprintk("%s: Begin\n", __func__);
++
++	spin_lock(&objlay->lock);
++	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
++	lou.dsu_delta = objlay->delta_space_used;
++	objlay->delta_space_used = 0;
++	objlay->delta_space_valid = OBJ_DSU_INIT;
++	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
++	spin_unlock(&objlay->lock);
++
++	start = xdr_reserve_space(xdr, 4);
++
++	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++
++	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
++		lou.dsu_delta, lou.olu_ioerr_flag);
++}
++
++static int
++err_prio(u32 oer_errno)
++{
++	switch (oer_errno) {
++	case 0:
++		return 0;
++
++	case PNFS_OSD_ERR_RESOURCE:
++		return OSD_ERR_PRI_RESOURCE;
++	case PNFS_OSD_ERR_BAD_CRED:
++		return OSD_ERR_PRI_BAD_CRED;
++	case PNFS_OSD_ERR_NO_ACCESS:
++		return OSD_ERR_PRI_NO_ACCESS;
++	case PNFS_OSD_ERR_UNREACHABLE:
++		return OSD_ERR_PRI_UNREACHABLE;
++	case PNFS_OSD_ERR_NOT_FOUND:
++		return OSD_ERR_PRI_NOT_FOUND;
++	case PNFS_OSD_ERR_NO_SPACE:
++		return OSD_ERR_PRI_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case PNFS_OSD_ERR_EIO:
++		return OSD_ERR_PRI_EIO;
++	}
++}
++
++static void
++merge_ioerr(struct pnfs_osd_ioerr *dest_err,
++	    const struct pnfs_osd_ioerr *src_err)
++{
++	u64 dest_end, src_end;
++
++	if (!dest_err->oer_errno) {
++		*dest_err = *src_err;
++		/* accumulated device must be blank */
++		memset(&dest_err->oer_component.oid_device_id, 0,
++			sizeof(dest_err->oer_component.oid_device_id));
++
++		return;
++	}
++
++	if (dest_err->oer_component.oid_partition_id !=
++				src_err->oer_component.oid_partition_id)
++		dest_err->oer_component.oid_partition_id = 0;
++
++	if (dest_err->oer_component.oid_object_id !=
++				src_err->oer_component.oid_object_id)
++		dest_err->oer_component.oid_object_id = 0;
++
++	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
++		dest_err->oer_comp_offset = src_err->oer_comp_offset;
++
++	dest_end = end_offset(dest_err->oer_comp_offset,
++			      dest_err->oer_comp_length);
++	src_end =  end_offset(src_err->oer_comp_offset,
++			      src_err->oer_comp_length);
++	if (dest_end < src_end)
++		dest_end = src_end;
++
++	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
++
++	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
++	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
++			dest_err->oer_errno = src_err->oer_errno;
++	} else if (src_err->oer_iswrite) {
++		dest_err->oer_iswrite = true;
++		dest_err->oer_errno = src_err->oer_errno;
++	}
++}
++
++static void
++encode_accumulated_error(struct objlayout *objlay, struct xdr_stream *xdr)
++{
++	struct objlayout_io_state *state, *tmp;
++	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++
++		for (i = 0; i < state->num_comps; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			merge_ioerr(&accumulated_err, ioerr);
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++
++	BUG_ON(pnfs_osd_xdr_encode_ioerr(xdr, &accumulated_err));
++}
++
++void
++objlayout_encode_layoutreturn(struct pnfs_layout_type *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_pnfs_layoutreturn_arg *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct objlayout_io_state *state, *tmp;
++	__be32 *start, *uninitialized_var(last_xdr);
++
++	dprintk("%s: Begin\n", __func__);
++	start = xdr_reserve_space(xdr, 4);
++	BUG_ON(!start);
++
++	spin_lock(&objlay->lock);
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++		int res = 0;
++
++		for (i = 0; i < state->num_comps && !res; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			dprintk("%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			last_xdr = xdr->p;
++			res = pnfs_osd_xdr_encode_ioerr(xdr, &state->ioerrs[i]);
++		}
++		if (unlikely(res)) {
++			/* no space for even one error descriptor */
++			BUG_ON(last_xdr == start + 1);
++
++			/* we've encountered a situation with lots and lots of
++			 * errors and no space to encode them all. Use the last
++			 * available slot to report the union of all the
++			 * remaining errors.
++			 */
++			xdr_rewind_stream(xdr, last_xdr -
++					       pnfs_osd_ioerr_xdr_sz() / 4);
++			encode_accumulated_error(objlay, xdr);
++			goto loop_done;
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++loop_done:
++	spin_unlock(&objlay->lock);
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++	dprintk("%s: Return\n", __func__);
++}
++
++struct objlayout_deviceinfo {
++	struct page *page;
++	struct pnfs_osd_deviceaddr da; /* This must be last */
++};
++
++/* Initialize and call nfs_getdeviceinfo, then decode and return a
++ * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
++ * should be called.
++ */
++int objlayout_get_deviceinfo(struct pnfs_layout_type *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr)
++{
++	struct objlayout_deviceinfo *odi;
++	struct pnfs_device pd;
++	struct super_block *sb;
++	struct page *page;
++	size_t sz;
++	u32 *p;
++	int err;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	pd.area = page_address(page);
++
++	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
++	pd.layout_type = LAYOUT_OSD2_OBJECTS;
++	pd.dev_notify_types = 0;
++	pd.pages = &page;
++	pd.pgbase = 0;
++	pd.pglen = PAGE_SIZE;
++	pd.mincount = 0;
++
++	sb = PNFS_INODE(pnfslay)->i_sb;
++	err = pnfs_client_ops->nfs_getdeviceinfo(PNFS_NFS_SERVER(pnfslay), &pd);
++	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
++	if (err)
++		goto err_out;
++
++	p = pd.area;
++	sz = pnfs_osd_xdr_deviceaddr_incore_sz(p);
++	odi = kzalloc(sz + (sizeof(*odi) - sizeof(odi->da)), GFP_KERNEL);
++	if (!odi) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
++	odi->page = page;
++	*deviceaddr = &odi->da;
++	return 0;
++
++err_out:
++	__free_page(page);
++	return err;
++}
++
++void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
++{
++	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
++						struct objlayout_deviceinfo,
++						da);
++
++	__free_page(odi->page);
++	kfree(odi);
++}
++
++/*
++ * Initialize a mountpoint by retrieving the list of
++ * available devices for it.
++ * Return the pnfs_mount_type structure so the
++ * pNFS_client can refer to the mount point later on.
++ */
++static int
++objlayout_initialize_mountpoint(struct nfs_server *server,
++				const struct nfs_fh *mntfh)
++{
++	void *data;
++
++	data = objio_init_mt();
++	if (IS_ERR(data)) {
++		printk(KERN_INFO "%s: objlayout lib not ready err=%ld\n",
++		       __func__, PTR_ERR(data));
++		return PTR_ERR(data);
++	}
++	server->pnfs_ld_data = data;
++
++	dprintk("%s: Return data=%p\n", __func__, data);
++	return 0;
++}
++
++/*
++ * Uninitialize a mountpoint
++ */
++static int
++objlayout_uninitialize_mountpoint(struct nfs_server *server)
++{
++	dprintk("%s: Begin %p\n", __func__, server->pnfs_ld_data);
++	objio_fini_mt(server->pnfs_ld_data);
++	return 0;
++}
++
++struct layoutdriver_io_operations objlayout_io_operations = {
++	.commit                  = objlayout_commit,
++	.read_pagelist           = objlayout_read_pagelist,
++	.write_pagelist          = objlayout_write_pagelist,
++	.alloc_layout            = objlayout_alloc_layout,
++	.free_layout             = objlayout_free_layout,
++	.alloc_lseg              = objlayout_alloc_lseg,
++	.free_lseg               = objlayout_free_lseg,
++	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
++	.encode_layoutreturn     = objlayout_encode_layoutreturn,
++	.initialize_mountpoint   = objlayout_initialize_mountpoint,
++	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
++};
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
+@@ -0,0 +1,171 @@
++/*
++ *  objlayout.h
++ *
++ *  Data types and function declerations for interfacing with the
++ *  pNFS standard object layout driver.
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef _OBJLAYOUT_H
++#define _OBJLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * in-core layout segment
++ */
++struct objlayout_segment {
++	void *internal;    /* for provider internal use */
++	u8 pnfs_osd_layout[];
++};
++
++/*
++ * per-inode layout
++ */
++struct objlayout {
++	struct pnfs_layout_type pnfs_layout;
++
++	 /* for layout_commit */
++	enum osd_delta_space_valid_enum {
++		OBJ_DSU_INIT = 0,
++		OBJ_DSU_VALID,
++		OBJ_DSU_INVALID,
++	} delta_space_valid;
++	s64 delta_space_used;  /* consumed by write ops */
++
++	 /* for layout_return */
++	spinlock_t lock;
++	struct list_head err_list;
++};
++
++static inline struct objlayout *
++OBJLAYOUT(struct pnfs_layout_type *lo)
++{
++	return container_of(lo, struct objlayout, pnfs_layout);
++}
++
++/*
++ * per-I/O operation state
++ * embedded in objects provider io_state data structure
++ */
++struct objlayout_io_state {
++	struct pnfs_layout_segment *lseg;
++
++	struct page **pages;
++	unsigned pgbase;
++	unsigned nr_pages;
++	unsigned long count;
++	loff_t offset;
++	bool sync;
++
++	void *rpcdata;
++	int status;             /* res */
++	int eof;                /* res */
++	int committed;          /* res */
++
++	/* Error reporting (layout_return) */
++	struct list_head err_list;
++	unsigned num_comps;
++	/* Pointer to array of error descriptors of size num_comps.
++	 * It should contain as many entries as devices in the osd_layout
++	 * that participate in the I/O. It is up to the io_engine to allocate
++	 * needed space and set num_comps.
++	 */
++	struct pnfs_osd_ioerr *ioerrs;
++};
++
++/*
++ * Raid engine I/O API
++ */
++extern void *objio_init_mt(void);
++extern void objio_fini_mt(void *mt);
++
++extern int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout);
++extern void objio_free_lseg(void *p);
++
++extern int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp);
++extern void objio_free_io_state(struct objlayout_io_state *state);
++
++extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
++extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
++				    bool stable);
++
++/*
++ * callback API
++ */
++extern void objlayout_io_set_result(struct objlayout_io_state *state,
++				    unsigned index, int osd_error,
++				    u64 offset, u64 length, bool is_write);
++
++static inline void
++objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
++{
++	struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++	/* If one of the I/Os errored out and the delta_space_used was
++	 * invalid we render the complete report as invalid. Protocol mandate
++	 * the DSU be accurate or not reported.
++	 */
++	spin_lock(&objlay->lock);
++	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
++		objlay->delta_space_valid = OBJ_DSU_VALID;
++		objlay->delta_space_used += space_used;
++	}
++	spin_unlock(&objlay->lock);
++}
++
++extern void objlayout_read_done(struct objlayout_io_state *state,
++				ssize_t status, bool sync);
++extern void objlayout_write_done(struct objlayout_io_state *state,
++				 ssize_t status, bool sync);
++
++extern int objlayout_get_deviceinfo(struct pnfs_layout_type *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr);
++extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
++
++/*
++ * exported generic objects function vectors
++ */
++extern struct layoutdriver_io_operations objlayout_io_operations;
++extern struct pnfs_client_operations *pnfs_client_ops;
++
++#endif /* _OBJLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
+@@ -0,0 +1,734 @@
++/*
++ *  panfs_shim.c
++ *
++ *  Shim layer for interfacing with the Panasas DirectFlow module I/O stack
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <asm/byteorder.h>
++
++#include "objlayout.h"
++#include "panfs_shim.h"
++
++#include <linux/panfs_shim_api.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct panfs_export_operations *panfs_export_ops;
++
++void *
++objio_init_mt(void)
++{
++	return panfs_export_ops == NULL ? ERR_PTR(-EAGAIN) : NULL;
++}
++
++void objio_fini_mt(void *mountid)
++{
++}
++
++static int
++panfs_shim_conv_raid01(struct pnfs_osd_layout *layout,
++		       struct pnfs_osd_data_map *lo_map,
++		       pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt) {
++		hdr->type = PAN_AGG_RAID1;
++		hdr->hdr.raid1.num_comps = lo_map->odm_mirror_cnt + 1;
++	} else if (layout->olo_num_comps > 1) {
++		hdr->type = PAN_AGG_RAID0;
++		hdr->hdr.raid0.num_comps = layout->olo_num_comps;
++		hdr->hdr.raid0.stripe_unit = lo_map->odm_stripe_unit;
++	} else
++		hdr->type = PAN_AGG_SIMPLE;
++	return 0;
++}
++
++static int
++panfs_shim_conv_raid5(struct pnfs_osd_layout *layout,
++		      struct pnfs_osd_data_map *lo_map,
++		      pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt)
++		goto err;
++
++	if (lo_map->odm_group_width || lo_map->odm_group_depth) {
++		if (!lo_map->odm_group_width || !lo_map->odm_group_depth)
++			goto err;
++
++		hdr->type = PAN_AGG_GRP_RAID5_LEFT;
++		hdr->hdr.grp_raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.grp_raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.grp_raid5_left.stripe_unit = lo_map->odm_stripe_unit;
++		hdr->hdr.grp_raid5_left.rg_width = lo_map->odm_group_width;
++		hdr->hdr.grp_raid5_left.rg_depth = lo_map->odm_group_depth;
++		/* this is a guess, panasas server is not supposed to
++		   hand out layotu otherwise */
++		hdr->hdr.grp_raid5_left.group_layout_policy =
++			PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN;
++	} else {
++		hdr->type = PAN_AGG_RAID5_LEFT;
++		hdr->hdr.raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.raid5_left.stripe_unit2 =
++		hdr->hdr.raid5_left.stripe_unit1 =
++		hdr->hdr.raid5_left.stripe_unit0 = lo_map->odm_stripe_unit;
++	}
++
++	return 0;
++err:
++	return -EINVAL;
++}
++
++/*
++ * Convert a pnfs_osd data map into Panasas aggregation layout header
++ */
++static int
++panfs_shim_conv_pnfs_osd_data_map(
++	struct pnfs_osd_layout *layout,
++	pan_agg_layout_hdr_t *hdr)
++{
++	int status = -EINVAL;
++	struct pnfs_osd_data_map *lo_map = &layout->olo_map;
++
++	if (!layout->olo_num_comps) {
++		dprintk("%s: !!layout.n_comps(%u)\n", __func__,
++			layout->olo_num_comps);
++		goto err;
++	}
++
++	switch (lo_map->odm_raid_algorithm) {
++	case PNFS_OSD_RAID_0:
++		if (layout->olo_num_comps != lo_map->odm_num_comps ||
++		    layout->olo_comps_index) {
++			dprintk("%s: !!PNFS_OSD_RAID_0 "
++				"layout.n_comps(%u) map.n_comps(%u) "
++				"comps_index(%u)\n", __func__,
++				layout->olo_num_comps,
++				lo_map->odm_num_comps,
++				layout->olo_comps_index);
++			goto err;
++		}
++		status = panfs_shim_conv_raid01(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_5:
++		if (!lo_map->odm_group_width) {
++			if (layout->olo_num_comps != lo_map->odm_num_comps ||
++			    layout->olo_comps_index) {
++				dprintk("%s: !!PNFS_OSD_RAID_5 !group_width "
++					"layout.n_comps(%u)!=map.n_comps(%u) "
++					"|| comps_index(%u)\n", __func__,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		} else if ((layout->olo_num_comps != lo_map->odm_num_comps &&
++			    layout->olo_num_comps > lo_map->odm_group_width) ||
++			   (layout->olo_comps_index % lo_map->odm_group_width)){
++				dprintk("%s: !!PNFS_OSD_RAID_5 group_width(%u) "
++					"layout.n_comps(%u) map.n_comps(%u) "
++					"comps_index(%u)\n", __func__,
++					lo_map->odm_group_width,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		status = panfs_shim_conv_raid5(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_4:
++	case PNFS_OSD_RAID_PQ:
++	default:
++		dprintk("%s: !!PNFS_OSD_RAID_(%d)\n", __func__,
++			lo_map->odm_raid_algorithm);
++		goto err;
++	}
++
++	return 0;
++
++err:
++	return status;
++}
++
++/*
++ * Convert pnfs_osd layout into Panasas map and caps type
++ */
++int
++objio_alloc_lseg(void **outp,
++	struct pnfs_layout_type *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	int i, total_comps;
++	int status;
++	struct pnfs_osd_object_cred *lo_comp;
++	pan_size_t alloc_sz, local_sz;
++	pan_sm_map_cap_t *mcs = NULL;
++	u8 *buf;
++	pan_agg_comp_obj_t *pan_comp;
++	pan_sm_sec_t *pan_sec;
++
++	status = -EINVAL;
++	if (layout->olo_num_comps < layout->olo_map.odm_group_width) {
++		total_comps = layout->olo_comps_index + layout->olo_num_comps;
++	} else {
++		/* allocate full map, otherwise SAM gets confused */
++		total_comps = layout->olo_map.odm_num_comps;
++	}
++	alloc_sz = total_comps *
++		   (sizeof(pan_agg_comp_obj_t) + sizeof(pan_sm_sec_t));
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p = layout->olo_comps[i].oc_cap.cred;
++		if (panfs_export_ops->sm_sec_t_get_size_otw(
++			(pan_sm_sec_otw_t *)&p, &local_sz, NULL, NULL))
++			goto err;
++		alloc_sz += local_sz;
++	}
++
++	status = -ENOMEM;
++	mcs = kzalloc(sizeof(*mcs) + alloc_sz, GFP_KERNEL);
++	if (!mcs)
++		goto err;
++	buf = (u8 *)&mcs[1];
++
++	mcs->offset = lseg->range.offset;
++	mcs->length = lseg->range.length;
++#if 0
++	/* FIXME: for now */
++	mcs->expiration_time.ts_sec  = 0;
++	mcs->expiration_time.ts_nsec = 0;
++#endif
++	mcs->full_map.map_hdr.avail_state = PAN_AGG_OBJ_STATE_NORMAL;
++	status = panfs_shim_conv_pnfs_osd_data_map(layout,
++						   &mcs->full_map.layout_hdr);
++	if (status)
++		goto err;
++
++	mcs->full_map.components.size = total_comps;
++	mcs->full_map.components.data = (pan_agg_comp_obj_t *)buf;
++	buf += total_comps * sizeof(pan_agg_comp_obj_t);
++
++	mcs->secs.size = total_comps;
++	mcs->secs.data = (pan_sm_sec_t *)buf;
++	buf += total_comps * sizeof(pan_sm_sec_t);
++
++	lo_comp = layout->olo_comps;
++	pan_comp = mcs->full_map.components.data + layout->olo_comps_index;
++	pan_sec = mcs->secs.data + layout->olo_comps_index;
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p;
++		pan_stor_obj_id_t *obj_id = &mcs->full_map.map_hdr.obj_id;
++		struct pnfs_osd_objid *oc_obj_id = &lo_comp->oc_object_id;
++		u64 dev_id = __be64_to_cpup(
++			(__be64 *)oc_obj_id->oid_device_id.data + 1);
++
++		dprintk("%s: i=%d deviceid=%Lx:%Lx partition=%Lx object=%Lx\n",
++			__func__, i,
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data),
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data + 1),
++			oc_obj_id->oid_partition_id, oc_obj_id->oid_object_id);
++
++		if (i == 0) {
++			/* make up mgr_id to calm sam down */
++			pan_mgr_id_construct_artificial(PAN_MGR_SM, 0,
++							&obj_id->dev_id);
++			obj_id->grp_id = oc_obj_id->oid_partition_id;
++			obj_id->obj_id = oc_obj_id->oid_object_id;
++		}
++
++		if (obj_id->grp_id != lo_comp->oc_object_id.oid_partition_id) {
++			dprintk("%s: i=%d grp_id=0x%Lx oid_partition_id=0x%Lx\n",
++				__func__, i, (u64)obj_id->grp_id,
++				lo_comp->oc_object_id.oid_partition_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		if (obj_id->obj_id != lo_comp->oc_object_id.oid_object_id) {
++			dprintk("%s: i=%d obj_id=0x%Lx oid_object_id=0x%Lx\n",
++				__func__, i, obj_id->obj_id,
++				lo_comp->oc_object_id.oid_object_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		pan_comp->dev_id = dev_id;
++		if (!pan_stor_is_device_id_an_obsd_id(pan_comp->dev_id)) {
++			dprintk("%s: i=%d dev_id=0x%Lx not an obsd_id\n",
++				__func__, i, obj_id->dev_id);
++			status = -EINVAL;
++			goto err;
++		}
++		if (lo_comp->oc_osd_version == PNFS_OSD_MISSING) {
++			dprintk("%s: degraded maps not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++		pan_comp->avail_state = PAN_AGG_COMP_STATE_NORMAL;
++		if (lo_comp->oc_cap_key_sec != PNFS_OSD_CAP_KEY_SEC_NONE) {
++			dprintk("%s: cap key security not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++
++		p = lo_comp->oc_cap.cred;
++		panfs_export_ops->sm_sec_t_unmarshall(
++			(pan_sm_sec_otw_t *)&p,
++			pan_sec,
++			buf,
++			alloc_sz,
++			NULL,
++			&local_sz);
++		buf += local_sz;
++		alloc_sz -= local_sz;
++
++		lo_comp++;
++		pan_comp++;
++		pan_sec++;
++	}
++
++	*outp = mcs;
++	dprintk("%s:Return mcs=%p\n", __func__, mcs);
++	return 0;
++
++err:
++	objio_free_lseg(mcs);
++	dprintk("%s:Error %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * Free a Panasas map and caps type
++ */
++void
++objio_free_lseg(void *p)
++{
++	kfree(p);
++}
++
++/*
++ * I/O routines
++ */
++int
++objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct panfs_shim_io_state *p;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	*outp = &p->ol_state;
++	return 0;
++}
++
++/*
++ * Free an I/O state
++ */
++void
++objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	int i;
++
++	dprintk("%s: freeing io_state\n", __func__);
++	for (i = 0; i < state->ol_state.nr_pages; i++)
++		kunmap(state->ol_state.pages[i]);
++
++	if (state->ucreds)
++		panfs_export_ops->ucreds_put(state->ucreds);
++	kfree(state->sg_list);
++	kfree(state);
++}
++
++static int
++panfs_shim_pages_to_sg(
++	struct panfs_shim_io_state *state,
++	struct page **pages,
++	unsigned int pgbase,
++	unsigned nr_pages,
++	size_t count)
++{
++	unsigned i, n;
++	pan_sg_entry_t *sg;
++
++	dprintk("%s pgbase %u nr_pages %u count %d "
++		"pg0 %p flags 0x%x index %llu\n",
++		__func__, pgbase, nr_pages, (int)count, pages[0],
++		(unsigned)pages[0]->flags, (unsigned long long)pages[0]->index);
++
++	sg = kmalloc(nr_pages * sizeof(*sg), GFP_KERNEL);
++	if (sg == NULL)
++		return -ENOMEM;
++
++	dprintk("%s sg_list %p pages %p pgbase %u nr_pages %u\n",
++		__func__, sg, pages, pgbase, nr_pages);
++
++	for (i = 0; i < nr_pages; i++) {
++		sg[i].buffer = (char *)kmap(pages[i]) + pgbase;
++		n = PAGE_SIZE - pgbase;
++		pgbase = 0;
++		if (n > count)
++			n = count;
++		sg[i].chunk_size = n;
++		count -= n;
++		if (likely(count)) {
++			sg[i].next = &sg[i+1];
++		} else {
++			/* we're done */
++			sg[i].next = NULL;
++			break;
++		}
++	}
++	BUG_ON(count);
++
++	state->sg_list = sg;
++	return 0;
++}
++
++/*
++ * Callback function for async reads
++ */
++static void
++panfs_shim_read_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_read_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.read.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++		status = res_p->length;
++		WARN_ON(status < 0);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_read rc %d: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_read_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.read.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.read.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_read(PAN_SAM_ACCESS_BYPASS_TIMESTAMP,
++					&state->u.read.args,
++					&state->obj_sec,
++					state->sg_list,
++					state->ucreds,
++					ol_state->sync ?
++						NULL : panfs_shim_read_done,
++					state, NULL,
++					&state->u.read.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_read_done(state, NULL, &state->u.read.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++/*
++ * Callback function for async writes
++ */
++static void
++panfs_shim_write_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_write_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.write.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++/*		state->ol_state.committed = NFS_FILE_SYNC;*/
++		state->ol_state.committed = NFS_UNSTABLE;
++		status = res_p->length;
++		WARN_ON(status < 0);
++
++		objlayout_add_delta_space_used(&state->ol_state,
++					       res_p->delta_capacity_used);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_write rc %u: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_write_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_write_pagelist(struct objlayout_io_state *ol_state,
++		     bool stable /* unused, PanOSD writes are stable */)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.write.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.write.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_write(PAN_SAM_ACCESS_NONE,
++					 &state->u.write.args,
++					 &state->obj_sec,
++					 state->sg_list,
++					 state->ucreds,
++					 ol_state->sync ?
++						NULL : panfs_shim_write_done,
++					 state,
++					 NULL,
++					 &state->u.write.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_write_done(state, NULL, &state->u.write.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++int
++panfs_shim_register(struct panfs_export_operations *ops)
++{
++	if (panfs_export_ops) {
++		printk(KERN_INFO
++		       "%s: panfs already registered (panfs ops %p)\n",
++		       __func__, panfs_export_ops);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: registering panfs ops %p\n",
++	       __func__, ops);
++
++	panfs_export_ops = ops;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_register);
++
++int
++panfs_shim_unregister(void)
++{
++	if (!panfs_export_ops) {
++		printk(KERN_INFO "%s: panfs is not registered\n", __func__);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: unregistering panfs ops %p\n",
++	       __func__, panfs_export_ops);
++
++	panfs_export_ops = NULL;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_unregister);
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++panlayout_get_stripesize(struct pnfs_layout_type *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	dprintk("%s: Begin\n", __func__);
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *panlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)panlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			n *= 8;	/* FIXME: until we have 2-D coalescing */
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zd\n", __func__, maxsz);
++	return maxsz;
++}
++
++#define PANLAYOUT_DEF_STRIPE_UNIT    (64*1024)
++#define PANLAYOUT_DEF_STRIPE_WIDTH   9
++#define PANLAYOUT_MAX_STRIPE_WIDTH   11
++#define PANLAYOUT_MAX_GATHER_STRIPES 8
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++panlayout_get_blocksize(void)
++{
++	ssize_t sz = (PANLAYOUT_MAX_STRIPE_WIDTH-1) *
++		      PANLAYOUT_DEF_STRIPE_UNIT *
++		      PANLAYOUT_MAX_GATHER_STRIPES;
++	dprintk("%s: Return %Zd\n", __func__, sz);
++	return sz;
++}
++
++static struct layoutdriver_policy_operations panlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = panlayout_get_stripesize,
++	.get_blocksize         = panlayout_get_blocksize,
++};
++
++#define PNFS_LAYOUT_PANOSD (NFS4_PNFS_PRIVATE_LAYOUT | LAYOUT_OSD2_OBJECTS)
++
++static struct pnfs_layoutdriver_type panlayout_type = {
++	.id = PNFS_LAYOUT_PANOSD,
++	.name = "PNFS_LAYOUT_PANOSD",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &panlayout_policy_operations,
++};
++
++MODULE_DESCRIPTION("pNFS Layout Driver for Panasas OSDs");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++panlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Registered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++panlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Unregistered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(panlayout_init);
++module_exit(panlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
+@@ -0,0 +1,482 @@
++/*
++ *  panfs_shim.h
++ *
++ *  Data types and external function declerations for interfacing with
++ *  panfs (Panasas DirectFlow) I/O stack
++ *
++ *  Copyright (C) 2007 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#ifndef _PANLAYOUT_PANFS_SHIM_H
++#define _PANLAYOUT_PANFS_SHIM_H
++
++typedef s8 pan_int8_t;
++typedef u8 pan_uint8_t;
++typedef s16 pan_int16_t;
++typedef u16 pan_uint16_t;
++typedef s32 pan_int32_t;
++typedef u32 pan_uint32_t;
++typedef s64 pan_int64_t;
++typedef u64 pan_uint64_t;
++
++/*
++ * from pan_base_types.h
++ */
++typedef  pan_uint64_t pan_rpc_none_t;
++typedef pan_uint32_t  pan_rpc_arrdim_t;
++typedef pan_uint32_t  pan_status_t;
++typedef pan_uint8_t   pan_otw_t;
++typedef pan_uint8_t   pan_pad_t;
++
++typedef pan_uint32_t  pan_timespec_sec_t;
++typedef pan_uint32_t  pan_timespec_nsec_t;
++
++typedef  struct pan_timespec_s  pan_timespec_t;
++struct pan_timespec_s {
++  pan_timespec_sec_t   ts_sec;
++  pan_timespec_nsec_t  ts_nsec;
++};
++
++/*
++ * from pan_std_types.h
++ */
++typedef pan_uint32_t pan_size_t;
++typedef  int  pan_bool_t;
++
++/*
++ * from pan_common_error.h
++ */
++#define PAN_SUCCESS                                         ((pan_status_t)0)
++#define PAN_ERR_IN_PROGRESS                                 ((pan_status_t)55)
++
++/*
++ * from pan_sg.h
++ */
++typedef struct pan_sg_entry_s pan_sg_entry_t;
++struct pan_sg_entry_s {
++  void                  *buffer;       /* pointer to memory */
++  pan_uint32_t           chunk_size;   /* size of each chunk (bytes) */
++  pan_sg_entry_t        *next;
++};
++
++/*
++ * from pan_storage.h
++ */
++typedef pan_uint64_t pan_stor_dev_id_t;
++typedef pan_uint32_t pan_stor_obj_grp_id_t;
++typedef pan_uint64_t pan_stor_obj_uniq_t;
++typedef pan_uint32_t pan_stor_action_t;
++typedef pan_uint8_t pan_stor_cap_key_t[20];
++
++typedef pan_uint8_t pan_stor_key_type_t;
++typedef pan_uint64_t pan_stor_len_t;
++typedef pan_int64_t pan_stor_delta_len_t;
++typedef pan_uint64_t pan_stor_offset_t;
++typedef pan_uint16_t pan_stor_op_t;
++
++typedef pan_uint16_t pan_stor_sec_level_t;
++
++struct pan_stor_obj_id_s {
++  pan_stor_dev_id_t      dev_id;
++  pan_stor_obj_uniq_t    obj_id;
++  pan_stor_obj_grp_id_t  grp_id;
++};
++
++typedef struct pan_stor_obj_id_s pan_stor_obj_id_t;
++
++#define PAN_STOR_OP_NONE ((pan_stor_op_t) 0U)
++#define PAN_STOR_OP_READ ((pan_stor_op_t) 8U)
++#define PAN_STOR_OP_WRITE ((pan_stor_op_t) 9U)
++#define PAN_STOR_OP_APPEND ((pan_stor_op_t) 10U)
++#define PAN_STOR_OP_GETATTR ((pan_stor_op_t) 11U)
++#define PAN_STOR_OP_SETATTR ((pan_stor_op_t) 12U)
++#define PAN_STOR_OP_FLUSH ((pan_stor_op_t) 13U)
++#define PAN_STOR_OP_CLEAR ((pan_stor_op_t) 14U)
++
++/*
++ * from pan_aggregation_map.h
++ */
++typedef pan_uint8_t pan_agg_type_t;
++typedef pan_uint64_t pan_agg_map_version_t;
++typedef pan_uint8_t pan_agg_obj_state_t;
++typedef pan_uint8_t pan_agg_comp_state_t;
++typedef pan_uint8_t pan_agg_comp_flag_t;
++
++#define PAN_AGG_OBJ_STATE_INVALID ((pan_agg_obj_state_t) 0x00)
++#define PAN_AGG_OBJ_STATE_NORMAL ((pan_agg_obj_state_t) 0x01)
++#define PAN_AGG_OBJ_STATE_DEGRADED ((pan_agg_obj_state_t) 0x02)
++#define PAN_AGG_OBJ_STATE_RECONSTRUCT ((pan_agg_obj_state_t) 0x03)
++#define PAN_AGG_OBJ_STATE_COPYBACK ((pan_agg_obj_state_t) 0x04)
++#define PAN_AGG_OBJ_STATE_UNAVAILABLE ((pan_agg_obj_state_t) 0x05)
++#define PAN_AGG_OBJ_STATE_CREATING ((pan_agg_obj_state_t) 0x06)
++#define PAN_AGG_OBJ_STATE_DELETED ((pan_agg_obj_state_t) 0x07)
++#define PAN_AGG_COMP_STATE_INVALID ((pan_agg_comp_state_t) 0x00)
++#define PAN_AGG_COMP_STATE_NORMAL ((pan_agg_comp_state_t) 0x01)
++#define PAN_AGG_COMP_STATE_UNAVAILABLE ((pan_agg_comp_state_t) 0x02)
++#define PAN_AGG_COMP_STATE_COPYBACK ((pan_agg_comp_state_t) 0x03)
++#define PAN_AGG_COMP_F_NONE ((pan_agg_comp_flag_t) 0x00)
++#define PAN_AGG_COMP_F_ATTR_STORING ((pan_agg_comp_flag_t) 0x01)
++#define PAN_AGG_COMP_F_OBJ_CORRUPT_OBS ((pan_agg_comp_flag_t) 0x02)
++#define PAN_AGG_COMP_F_TEMP ((pan_agg_comp_flag_t) 0x04)
++
++struct pan_aggregation_map_s {
++  pan_agg_map_version_t  version;
++  pan_agg_obj_state_t    avail_state;
++  pan_stor_obj_id_t      obj_id;
++};
++
++typedef struct pan_aggregation_map_s pan_aggregation_map_t;
++
++struct pan_agg_comp_obj_s {
++  pan_stor_dev_id_t     dev_id;
++  pan_agg_comp_state_t  avail_state;
++  pan_agg_comp_flag_t   comp_flags;
++};
++
++typedef struct pan_agg_comp_obj_s pan_agg_comp_obj_t;
++
++struct pan_agg_simple_header_s {
++  pan_uint8_t  unused;
++};
++
++typedef struct pan_agg_simple_header_s pan_agg_simple_header_t;
++
++struct pan_agg_raid1_header_s {
++  pan_uint16_t  num_comps;
++};
++
++typedef struct pan_agg_raid1_header_s pan_agg_raid1_header_t;
++
++struct pan_agg_raid0_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++};
++
++typedef struct pan_agg_raid0_header_s pan_agg_raid0_header_t;
++
++struct pan_agg_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit0;
++  pan_uint32_t  stripe_unit1;
++  pan_uint32_t  stripe_unit2;
++};
++
++typedef struct pan_agg_raid5_left_header_s pan_agg_raid5_left_header_t;
++
++typedef struct pan_agg_grp_raid5_left_header_s pan_agg_grp_raid5_left_header_t;
++
++struct pan_agg_grp_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++  pan_uint16_t  rg_width;
++  pan_uint16_t  rg_depth;
++  pan_uint8_t   group_layout_policy;
++};
++
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_INVALID ((pan_uint8_t) 0x00)
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN ((pan_uint8_t) 0x01)
++
++#define PAN_AGG_NULL_MAP ((pan_agg_type_t) 0x00)
++#define PAN_AGG_SIMPLE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_RAID1 ((pan_agg_type_t) 0x02)
++#define PAN_AGG_RAID0 ((pan_agg_type_t) 0x03)
++#define PAN_AGG_RAID5_LEFT ((pan_agg_type_t) 0x04)
++#define PAN_AGG_GRP_RAID5_LEFT ((pan_agg_type_t) 0x06)
++#define PAN_AGG_MINTYPE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_MAXTYPE ((pan_agg_type_t) 0x06)
++
++struct pan_agg_layout_hdr_s {
++  pan_agg_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_uint64_t                        null;
++    pan_agg_simple_header_t             simple;
++    pan_agg_raid1_header_t              raid1;
++    pan_agg_raid0_header_t              raid0;
++    pan_agg_raid5_left_header_t         raid5_left;
++    pan_agg_grp_raid5_left_header_t     grp_raid5_left;
++  } hdr;
++};
++
++typedef struct pan_agg_layout_hdr_s pan_agg_layout_hdr_t;
++
++struct pan_agg_comp_obj_a_s {
++  pan_rpc_arrdim_t size;
++  pan_agg_comp_obj_t *data;
++};
++typedef struct pan_agg_comp_obj_a_s pan_agg_comp_obj_a;
++
++struct pan_agg_full_map_s {
++  pan_aggregation_map_t  map_hdr;
++  pan_agg_layout_hdr_t   layout_hdr;
++  pan_agg_comp_obj_a     components;
++};
++
++typedef struct pan_agg_full_map_s pan_agg_full_map_t;
++
++/*
++ * from pan_obsd_rpc_types.h
++ */
++typedef pan_uint8_t pan_obsd_security_key_a[16];
++
++typedef pan_uint8_t pan_obsd_capability_key_a[20];
++
++typedef pan_uint8_t pan_obsd_key_holder_id_t;
++
++#define PAN_OBSD_KEY_HOLDER_BASIS_KEY ((pan_obsd_key_holder_id_t) 0x01)
++#define PAN_OBSD_KEY_HOLDER_CAP_KEY ((pan_obsd_key_holder_id_t) 0x02)
++
++struct pan_obsd_key_holder_s {
++  pan_obsd_key_holder_id_t select;
++  pan_pad_t pad[3];
++  union {
++    pan_obsd_security_key_a    basis_key;
++    pan_obsd_capability_key_a  cap_key;
++  } key;
++};
++
++typedef struct pan_obsd_key_holder_s pan_obsd_key_holder_t;
++
++/*
++ * from pan_sm_sec.h
++ */
++typedef pan_uint8_t pan_sm_sec_type_t;
++typedef pan_uint8_t pan_sm_sec_otw_allo_mode_t;
++
++struct pan_obsd_capability_generic_otw_t_s {
++  pan_rpc_arrdim_t size;
++  pan_uint8_t *data;
++};
++typedef struct pan_obsd_capability_generic_otw_t_s
++				pan_obsd_capability_generic_otw_t;
++
++struct pan_sm_sec_obsd_s {
++  pan_obsd_key_holder_t              key;
++  pan_obsd_capability_generic_otw_t  cap_otw;
++  pan_sm_sec_otw_allo_mode_t         allo_mode;
++};
++
++typedef struct pan_sm_sec_obsd_s pan_sm_sec_obsd_t;
++
++struct pan_sm_sec_s {
++  pan_sm_sec_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_rpc_none_t     none;
++    pan_sm_sec_obsd_t  obsd;
++  } variant;
++};
++
++typedef struct pan_sm_sec_s pan_sm_sec_t;
++
++struct pan_sm_sec_a_s {
++  pan_rpc_arrdim_t size;
++  pan_sm_sec_t *data;
++};
++typedef struct pan_sm_sec_a_s pan_sm_sec_a;
++typedef pan_otw_t *pan_sm_sec_otw_t;
++
++/*
++ * from pan_sm_types.h
++ */
++typedef pan_uint64_t pan_sm_cap_handle_t;
++
++struct pan_sm_map_cap_s {
++  pan_agg_full_map_t   full_map;
++  pan_stor_offset_t    offset;
++  pan_stor_len_t       length;
++  pan_sm_sec_a         secs;
++  pan_sm_cap_handle_t  handle;
++  pan_timespec_t       expiration_time;
++  pan_stor_action_t    action_mask;
++  pan_uint32_t         flags;
++};
++
++typedef struct pan_sm_map_cap_s pan_sm_map_cap_t;
++
++/*
++ * from pan_sm_ops.h
++ */
++typedef pan_rpc_none_t pan_sm_cache_ptr_t;
++
++/*
++ * from pan_sam_api.h
++ */
++typedef pan_uint32_t    pan_sam_access_flags_t;
++
++typedef struct pan_sam_dev_error_s  pan_sam_dev_error_t;
++struct pan_sam_dev_error_s {
++    pan_stor_dev_id_t       dev_id;
++    pan_stor_op_t           stor_op;
++    pan_status_t            error;
++};
++
++typedef struct pan_sam_ext_status_s pan_sam_ext_status_t;
++struct pan_sam_ext_status_s {
++    pan_uint32_t        available;
++    pan_uint32_t        size;
++    pan_sam_dev_error_t *errors;
++};
++
++enum pan_sam_rpc_sec_sel_e {
++    PAN_SAM_RPC_SEC_DEFAULT,
++    PAN_SAM_RPC_SEC_ATLEAST,
++    PAN_SAM_RPC_SEC_EXACTLY
++};
++typedef enum pan_sam_rpc_sec_sel_e pan_sam_rpc_sec_sel_t;
++
++typedef struct pan_sam_obj_sec_s pan_sam_obj_sec_t;
++struct pan_sam_obj_sec_s {
++    pan_stor_sec_level_t    min_security;
++    pan_sm_map_cap_t        *map_ccaps;
++};
++
++typedef struct  pan_sam_rpc_sec_s   pan_sam_rpc_sec_t;
++struct pan_sam_rpc_sec_s {
++    pan_sam_rpc_sec_sel_t   selector;
++};
++
++typedef struct pan_sam_read_args_s pan_sam_read_args_t;
++struct pan_sam_read_args_s {
++    pan_stor_obj_id_t                obj_id;
++    pan_sm_cache_ptr_t               obj_ent;
++    void                            *return_attr;
++    void                            *checksum;
++    pan_stor_offset_t                offset;
++    pan_uint16_t                     sm_options;
++    void                            *callout;
++    void                            *callout_arg;
++};
++
++typedef struct pan_sam_read_res_s pan_sam_read_res_t;
++struct pan_sam_read_res_s {
++    pan_status_t             result;
++    pan_sam_ext_status_t     ext_status;
++    pan_stor_len_t           length;
++    void                    *attr;
++    void                    *checksum;
++};
++
++typedef void (*pan_sam_read_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_read_res_t  *res_p,
++    pan_status_t        status);
++
++#define PAN_SAM_ACCESS_NONE                             0x0000
++#define PAN_SAM_ACCESS_BYPASS_TIMESTAMP                 0x0020
++
++typedef struct pan_sam_write_args_s pan_sam_write_args_t;
++struct pan_sam_write_args_s {
++    pan_stor_obj_id_t   obj_id;
++    pan_sm_cache_ptr_t  obj_ent;
++    pan_stor_offset_t   offset;
++    void                *attr;
++    void                *return_attr;
++};
++
++typedef struct pan_sam_write_res_s pan_sam_write_res_t;
++struct pan_sam_write_res_s {
++    pan_status_t            result;
++    pan_sam_ext_status_t    ext_status;
++    pan_stor_len_t          length;
++    pan_stor_delta_len_t    delta_capacity_used;
++    pan_bool_t              parity_dirty;
++    void                   *attr;
++};
++
++typedef void (*pan_sam_write_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_write_res_t *res_p,
++    pan_status_t        status);
++
++/*
++ * from pan_mgr_types.h
++ */
++#define PAN_MGR_ID_TYPE_SHIFT 56
++#define PAN_MGR_ID_TYPE_MASK ((pan_mgr_id_t)18374686479671623680ULL)
++#define PAN_MGR_ID_UNIQ_MASK ((pan_mgr_id_t)72057594037927935ULL)
++
++typedef pan_uint16_t pan_mgr_type_t;
++typedef pan_uint64_t pan_mgr_id_t;
++
++#define PAN_MGR_SM ((pan_mgr_type_t) 2U)
++#define PAN_MGR_OBSD ((pan_mgr_type_t) 6U)
++
++/*
++ * from pan_mgr_types_c.h
++ */
++#define pan_mgr_id_construct_artificial(_mgr_type_, _mgr_uniq_, _mgr_id_p_) { \
++  pan_mgr_id_t  _id1, _id2; \
++\
++  _id1 = (_mgr_type_); \
++  _id1 <<= PAN_MGR_ID_TYPE_SHIFT; \
++  _id1 &= PAN_MGR_ID_TYPE_MASK; \
++  _id2 = (_mgr_uniq_); \
++  _id2 &= PAN_MGR_ID_UNIQ_MASK; \
++  _id1 |= _id2; \
++  *(_mgr_id_p_) = _id1; \
++}
++
++/*
++ * from pan_storage_c.h
++ */
++#define pan_stor_is_device_id_an_obsd_id(_device_id_) \
++    ((((_device_id_) & PAN_MGR_ID_TYPE_MASK) >> PAN_MGR_ID_TYPE_SHIFT) \
++	== PAN_MGR_OBSD)
++
++/*
++ * pnfs_shim internal definitions
++ */
++
++struct panfs_shim_io_state {
++	struct objlayout_io_state ol_state;
++
++	pan_sg_entry_t *sg_list;
++	pan_sam_obj_sec_t obj_sec;
++	void *ucreds;
++	union {
++		struct {
++			pan_sam_read_args_t args;
++			pan_sam_read_res_t res;
++		} read;
++		struct {
++			pan_sam_write_args_t args;
++			pan_sam_write_res_t res;
++		} write;
++	} u;
++};
++
++#endif /* _PANLAYOUT_PANFS_SHIM_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
+@@ -0,0 +1,435 @@
++/*
++ *  pnfs_osd_xdr.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/pnfs_osd_xdr.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/*
++ * The following implementation is based on these Internet Drafts:
++ *
++ * draft-ietf-nfsv4-minorversion-21
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_objid(u32 *p, struct pnfs_osd_objid *objid)
++{
++	COPYMEM(objid->oid_device_id.data, sizeof(objid->oid_device_id.data));
++	READ64(objid->oid_partition_id);
++	READ64(objid->oid_object_id);
++	return p;
++}
++
++static inline u32 *
++pnfs_osd_xdr_decode_opaque_cred(u32 *p,
++				struct pnfs_osd_opaque_cred *opaque_cred)
++{
++	READ32(opaque_cred->cred_len);
++	COPYMEM(opaque_cred->cred, opaque_cred->cred_len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_object_cred(u32 *p, struct pnfs_osd_object_cred *comp,
++				u8 **credp)
++{
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_objid(p, &comp->oc_object_id);
++	READ32(comp->oc_osd_version);
++	READ32(comp->oc_cap_key_sec);
++
++	cred = *credp;
++	comp->oc_cap_key.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap_key);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap_key.cred_len));
++	comp->oc_cap.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap.cred_len));
++	*credp = cred;
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_data_map(u32 *p, struct pnfs_osd_data_map *data_map)
++{
++	READ32(data_map->odm_num_comps);
++	READ64(data_map->odm_stripe_unit);
++	READ32(data_map->odm_group_width);
++	READ32(data_map->odm_group_depth);
++	READ32(data_map->odm_mirror_cnt);
++	READ32(data_map->odm_raid_algorithm);
++	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
++		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
++		__func__,
++		data_map->odm_num_comps,
++		(unsigned long long)data_map->odm_stripe_unit,
++		data_map->odm_group_width,
++		data_map->odm_group_depth,
++		data_map->odm_mirror_cnt,
++		data_map->odm_raid_algorithm);
++	return p;
++}
++
++struct pnfs_osd_layout *
++pnfs_osd_xdr_decode_layout(struct pnfs_osd_layout *layout, u32 *p)
++{
++	int i;
++	u32 *start = p;
++	struct pnfs_osd_object_cred *comp;
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_data_map(p, &layout->olo_map);
++	READ32(layout->olo_comps_index);
++	READ32(layout->olo_num_comps);
++	layout->olo_comps = (struct pnfs_osd_object_cred *)(layout + 1);
++	comp = layout->olo_comps;
++	cred = (u8 *)(comp + layout->olo_num_comps);
++	dprintk("%s: comps_index=%u num_comps=%u\n",
++		__func__, layout->olo_comps_index, layout->olo_num_comps);
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		p = pnfs_osd_xdr_decode_object_cred(p, comp, &cred);
++		dprintk("%s: comp[%d]=dev(%llx:%llx) par=0x%llx obj=0x%llx "
++			"key_len=%u cap_len=%u\n",
++			__func__, i,
++			_DEVID_LO(&comp->oc_object_id.oid_device_id),
++			_DEVID_HI(&comp->oc_object_id.oid_device_id),
++			comp->oc_object_id.oid_partition_id,
++			comp->oc_object_id.oid_object_id,
++			comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
++		comp++;
++	}
++	dprintk("%s: xdr_size=%Zd end=%p in_core_size=%Zd\n", __func__,
++	       (char *)p - (char *)start, cred, (char *)cred - (char *)layout);
++	return layout;
++}
++
++/*
++ * Get Device Information Decoding
++ *
++ * Note: since Device Information is currently done synchronously, most
++ *       of the actual fields are left inside the rpc buffer and are only
++ *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
++ *       should not be freed while the returned information is in use.
++ */
++
++u32 *__xdr_read_calc_nfs4_string(
++	u32 *p, struct nfs4_string *str, u8 **freespace)
++{
++	u32 len;
++	char *data;
++	bool need_copy;
++
++	READ32(len);
++	data = (char *)p;
++
++	if (data[len]) { /* Not null terminated we'll need extra space */
++		data = *freespace;
++		*freespace += len + 1;
++		need_copy = true;
++	} else {
++		need_copy = false;
++	}
++
++	if (str) {
++		str->len = len;
++		str->data = data;
++		if (need_copy) {
++			memcpy(data, p, len);
++			data[len] = 0;
++		}
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++u32 *__xdr_read_calc_u8_opaque(
++	u32 *p, struct nfs4_string *str)
++{
++	u32 len;
++
++	READ32(len);
++
++	if (str) {
++		str->len = len;
++		str->data = (char *)p;
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetid {
++ * 	u32			oti_type;
++ * 	struct nfs4_string	oti_scsi_device_id;
++ * };
++ */
++u32 *__xdr_read_calc_targetid(
++	u32 *p, struct pnfs_osd_targetid* targetid, u8 **freespace)
++{
++	u32 oti_type;
++
++	READ32(oti_type);
++	if (targetid)
++		targetid->oti_type = oti_type;
++
++	switch (oti_type) {
++	case OBJ_TARGET_SCSI_NAME:
++	case OBJ_TARGET_SCSI_DEVICE_ID:
++		p = __xdr_read_calc_u8_opaque(p,
++			targetid ? &targetid->oti_scsi_device_id : NULL);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_net_addr {
++ * 	struct nfs4_string	r_netid;
++ * 	struct nfs4_string	r_addr;
++ * };
++ */
++u32 *__xdr_read_calc_net_addr(
++	u32 *p, struct pnfs_osd_net_addr* netaddr, u8 **freespace)
++{
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_netid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_addr : NULL,
++			freespace);
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetaddr {
++ * 	u32				ota_available;
++ * 	struct pnfs_osd_net_addr	ota_netaddr;
++ * };
++ */
++u32 *__xdr_read_calc_targetaddr(
++	u32 *p, struct pnfs_osd_targetaddr *targetaddr, u8 **freespace)
++{
++	u32 ota_available;
++
++	READ32(ota_available);
++	if (targetaddr)
++		targetaddr->ota_available = ota_available;
++
++	if (ota_available) {
++		p = __xdr_read_calc_net_addr(p,
++				targetaddr ? &targetaddr->ota_netaddr : NULL,
++				freespace);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++u32 *__xdr_read_calc_deviceaddr(
++	u32 *p, struct pnfs_osd_deviceaddr *deviceaddr, u8 **freespace)
++{
++	p = __xdr_read_calc_targetid(p,
++			deviceaddr ? &deviceaddr->oda_targetid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_targetaddr(p,
++			deviceaddr ? &deviceaddr->oda_targetaddr : NULL,
++			freespace);
++
++	if (deviceaddr)
++		COPYMEM(deviceaddr->oda_lun, sizeof(deviceaddr->oda_lun));
++	else
++		p += XDR_QUADLEN(sizeof(deviceaddr->oda_lun));
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_systemid : NULL);
++
++	if (deviceaddr) {
++		p = pnfs_osd_xdr_decode_object_cred(p,
++				&deviceaddr->oda_root_obj_cred, freespace);
++	} else {
++		*freespace += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_osdname : NULL);
++
++	return p;
++}
++
++size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p)
++{
++	u8 *null_freespace = NULL;
++	size_t sz;
++
++	__xdr_read_calc_deviceaddr(p, NULL, &null_freespace);
++	sz = sizeof(struct pnfs_osd_deviceaddr) + (size_t)null_freespace;
++
++	return sz;
++}
++
++void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p)
++{
++	u8 *freespace = (u8 *)(deviceaddr + 1);
++
++	__xdr_read_calc_deviceaddr(p, deviceaddr, &freespace);
++}
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou)
++{
++	__be32 *p = xdr_reserve_space(xdr, 16);
++
++	if (!p)
++		return -E2BIG;
++
++	*p++ = cpu_to_be32(lou->dsu_valid);
++	if (lou->dsu_valid)
++		p = xdr_encode_hyper(p, lou->dsu_delta);
++	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ */
++static inline int pnfs_osd_xdr_encode_objid(struct xdr_stream *xdr,
++					    struct pnfs_osd_objid *object_id)
++{
++	__be32 *p;
++
++	p = xdr_reserve_space(xdr, 32);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
++				    sizeof(object_id->oid_device_id.data));
++	p = xdr_encode_hyper(p, object_id->oid_partition_id);
++	p = xdr_encode_hyper(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++int pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr,
++			      struct pnfs_osd_ioerr *ioerr)
++{
++	__be32 *p;
++	int ret;
++
++	ret = pnfs_osd_xdr_encode_objid(xdr, &ioerr->oer_component);
++	if (ret)
++		return ret;
++
++	p = xdr_reserve_space(xdr, 24);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
++	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
++	*p++ = cpu_to_be32(ioerr->oer_iswrite);
++	*p   = cpu_to_be32(ioerr->oer_errno);
++
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ 
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ static struct kmem_cache *nfs_page_cachep;
+ 
+@@ -56,7 +57,8 @@ nfs_page_free(struct nfs_page *p)
+ struct nfs_page *
+ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ 		   struct page *page,
+-		   unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page		*req;
+ 
+@@ -79,7 +81,11 @@ nfs_create_request(struct nfs_open_conte
+ 	req->wb_pgbase	= offset;
+ 	req->wb_bytes   = count;
+ 	req->wb_context = get_nfs_open_context(ctx);
++	req->wb_lock_context = nfs_get_lock_context(ctx);
+ 	kref_init(&req->wb_kref);
++	req->wb_lseg    = lseg;
++	if (lseg)
++		get_lseg(lseg);
+ 	return req;
+ }
+ 
+@@ -141,18 +147,26 @@ void nfs_clear_request(struct nfs_page *
+ {
+ 	struct page *page = req->wb_page;
+ 	struct nfs_open_context *ctx = req->wb_context;
++	struct nfs_lock_context *l_ctx = req->wb_lock_context;
+ 
+ 	if (page != NULL) {
+ 		page_cache_release(page);
+ 		req->wb_page = NULL;
+ 	}
++	if (l_ctx != NULL) {
++		nfs_put_lock_context(l_ctx);
++		req->wb_lock_context = NULL;
++	}
+ 	if (ctx != NULL) {
+ 		put_nfs_open_context(ctx);
+ 		req->wb_context = NULL;
+ 	}
++	if (req->wb_lseg != NULL) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
+ }
+ 
+-
+ /**
+  * nfs_release_request - Release the count on an NFS read/write request
+  * @req: request to release
+@@ -231,11 +245,12 @@ void nfs_pageio_init(struct nfs_pageio_d
+  * Return 'true' if this is the case, else return 'false'.
+  */
+ static int nfs_can_coalesce_requests(struct nfs_page *prev,
+-				     struct nfs_page *req)
++				     struct nfs_page *req,
++				     struct nfs_pageio_descriptor *pgio)
+ {
+ 	if (req->wb_context->cred != prev->wb_context->cred)
+ 		return 0;
+-	if (req->wb_context->lockowner != prev->wb_context->lockowner)
++	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
+ 		return 0;
+ 	if (req->wb_context->state != prev->wb_context->state)
+ 		return 0;
+@@ -245,6 +260,12 @@ static int nfs_can_coalesce_requests(str
+ 		return 0;
+ 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ 		return 0;
++	if (req->wb_lseg != prev->wb_lseg)
++		return 0;
++#ifdef CONFIG_NFS_V4_1
++	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
++		return 0;
++#endif /* CONFIG_NFS_V4_1 */
+ 	return 1;
+ }
+ 
+@@ -277,7 +298,7 @@ static int nfs_pageio_do_add_request(str
+ 		if (newlen > desc->pg_bsize)
+ 			return 0;
+ 		prev = nfs_list_entry(desc->pg_list.prev);
+-		if (!nfs_can_coalesce_requests(prev, req))
++		if (!nfs_can_coalesce_requests(prev, req, desc))
+ 			return 0;
+ 	} else
+ 		desc->pg_base = req->wb_pgbase;
+@@ -366,6 +387,7 @@ void nfs_pageio_cond_complete(struct nfs
+  * @idx_start: lower bound of page->index to scan
+  * @npages: idx_start + npages sets the upper bound to scan.
+  * @tag: tag to scan for
++ * @use_pnfs: will be set TRUE if commit needs to be handled by layout driver
+  *
+  * Moves elements from one of the inode request lists.
+  * If the number of requests is set to 0, the entire address_space
+@@ -375,7 +397,7 @@ void nfs_pageio_cond_complete(struct nfs
+  */
+ int nfs_scan_list(struct nfs_inode *nfsi,
+ 		struct list_head *dst, pgoff_t idx_start,
+-		unsigned int npages, int tag)
++		  unsigned int npages, int tag, int *use_pnfs)
+ {
+ 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ 	struct nfs_page *req;
+@@ -406,6 +428,8 @@ int nfs_scan_list(struct nfs_inode *nfsi
+ 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+ 						req->wb_index, tag);
+ 				nfs_list_add_request(req, dst);
++				if (req->wb_lseg)
++					*use_pnfs = 1;
+ 				res++;
+ 				if (res == INT_MAX)
+ 					goto out;
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
+@@ -0,0 +1,2027 @@
++/*
++ *  linux/fs/nfs/pnfs.c
++ *
++ *  pNFS functions to call and manage layout drivers.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/smp_lock.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_mount.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/rculist.h>
++
++#include "internal.h"
++#include "nfs4_fs.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS
++
++#define MIN_POOL_LC		(4)
++
++static int pnfs_initialized;
++
++static void pnfs_free_layout(struct pnfs_layout_type *lo,
++			     struct nfs4_pnfs_layout_segment *range);
++static inline void get_layout(struct pnfs_layout_type *lo);
++
++/* Locking:
++ *
++ * pnfs_spinlock:
++ * 	protects pnfs_modules_tbl.
++ */
++static spinlock_t pnfs_spinlock = __SPIN_LOCK_UNLOCKED(pnfs_spinlock);
++
++/*
++ * pnfs_modules_tbl holds all pnfs modules
++ */
++static struct list_head	pnfs_modules_tbl;
++static struct kmem_cache *pnfs_cachep;
++static mempool_t *pnfs_layoutcommit_mempool;
++
++static inline struct pnfs_layoutcommit_data *pnfs_layoutcommit_alloc(void)
++{
++	struct pnfs_layoutcommit_data *p =
++			mempool_alloc(pnfs_layoutcommit_mempool, GFP_NOFS);
++	if (p)
++		memset(p, 0, sizeof(*p));
++
++	return p;
++}
++
++void pnfs_layoutcommit_free(struct pnfs_layoutcommit_data *p)
++{
++	mempool_free(p, pnfs_layoutcommit_mempool);
++}
++
++/*
++ * struct pnfs_module - One per pNFS device module.
++ */
++struct pnfs_module {
++	struct pnfs_layoutdriver_type *pnfs_ld_type;
++	struct list_head        pnfs_tblid;
++};
++
++int
++pnfs_initialize(void)
++{
++	INIT_LIST_HEAD(&pnfs_modules_tbl);
++
++	pnfs_cachep = kmem_cache_create("pnfs_layoutcommit_data",
++					sizeof(struct pnfs_layoutcommit_data),
++					0, SLAB_HWCACHE_ALIGN, NULL);
++	if (pnfs_cachep == NULL)
++		return -ENOMEM;
++
++	pnfs_layoutcommit_mempool = mempool_create(MIN_POOL_LC,
++						   mempool_alloc_slab,
++						   mempool_free_slab,
++						   pnfs_cachep);
++	if (pnfs_layoutcommit_mempool == NULL) {
++		kmem_cache_destroy(pnfs_cachep);
++		return -ENOMEM;
++	}
++
++	pnfs_initialized = 1;
++	return 0;
++}
++
++void pnfs_uninitialize(void)
++{
++	mempool_destroy(pnfs_layoutcommit_mempool);
++	kmem_cache_destroy(pnfs_cachep);
++}
++
++/* search pnfs_modules_tbl for right pnfs module */
++static int
++find_pnfs(u32 id, struct pnfs_module **module) {
++	struct  pnfs_module *local = NULL;
++
++	dprintk("PNFS: %s: Searching for %u\n", __func__, id);
++	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) {
++		if (local->pnfs_ld_type->id == id) {
++			*module = local;
++			return(1);
++		}
++	}
++	return 0;
++}
++
++/* Set lo_cred to indicate we require a layoutcommit
++ * If we don't even have a layout, we don't need to commit it.
++ */
++void
++pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
++{
++	dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (has_layout(nfsi) &&
++	    !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state)) {
++		nfsi->layout->lo_cred = get_rpccred(ctx->state->owner->so_cred);
++		__set_bit(NFS_INO_LAYOUTCOMMIT,
++			  &nfsi->layout->pnfs_layout_state);
++		nfsi->change_attr++;
++		spin_unlock(&nfsi->vfs_inode.i_lock);
++		dprintk("%s: Set layoutcommit\n", __func__);
++		return;
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Update last_write_offset for layoutcommit.
++ * TODO: We should only use commited extents, but the current nfs
++ * implementation does not calculate the written range in nfs_commit_done.
++ * We therefore update this field in writeback_done.
++ */
++void
++pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
++{
++	loff_t end_pos;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (offset < nfsi->layout->pnfs_write_begin_pos)
++		nfsi->layout->pnfs_write_begin_pos = offset;
++	end_pos = offset + extent - 1; /* I'm being inclusive */
++	if (end_pos > nfsi->layout->pnfs_write_end_pos)
++		nfsi->layout->pnfs_write_end_pos = end_pos;
++	dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
++		__func__,
++		(unsigned long) extent,
++		(unsigned long) offset ,
++		(unsigned long) nfsi->layout->pnfs_write_begin_pos,
++		(unsigned long) nfsi->layout->pnfs_write_end_pos);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Unitialize a mountpoint in a layout driver */
++void
++unmount_pnfs_layoutdriver(struct nfs_server *nfss)
++{
++	if (PNFS_EXISTS_LDIO_OP(nfss, uninitialize_mountpoint))
++		nfss->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint(nfss);
++}
++
++/*
++ * Set the server pnfs module to the first registered pnfs_type.
++ * Only one pNFS layout driver is supported.
++ */
++void
++set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
++		      u32 id)
++{
++	struct pnfs_module *mod = NULL;
++
++	if (server->pnfs_curr_ld)
++		return;
++
++	if (!find_pnfs(id, &mod)) {
++		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
++		find_pnfs(id, &mod);
++	}
++
++	if (!mod) {
++		dprintk("%s: No pNFS module found for %u. ", __func__, id);
++		goto out_err;
++	}
++
++	server->pnfs_curr_ld = mod->pnfs_ld_type;
++	if (mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
++							server, mntfh)) {
++		printk(KERN_ERR "%s: Error initializing mount point "
++		       "for layout driver %u. ", __func__, id);
++		goto out_err;
++	}
++
++	dprintk("%s: pNFS module for %u set\n", __func__, id);
++	return;
++
++out_err:
++	dprintk("Using NFSv4 I/O\n");
++	server->pnfs_curr_ld = NULL;
++}
++
++/* Allow I/O module to set its functions structure */
++struct pnfs_client_operations*
++pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++	struct layoutdriver_io_operations *io_ops = ld_type->ld_io_ops;
++
++	if (!pnfs_initialized) {
++		printk(KERN_ERR "%s Registration failure. "
++		       "pNFS not initialized.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops || !io_ops->alloc_layout || !io_ops->free_layout) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_layout and free_layout.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->alloc_lseg || !io_ops->free_lseg) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_lseg and free_lseg.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->read_pagelist || !io_ops->write_pagelist ||
++	    !io_ops->commit) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "read_pagelist, write_pagelist, and commit.\n",
++		       __func__);
++		return NULL;
++	}
++
++	pnfs_mod = kmalloc(sizeof(struct pnfs_module), GFP_KERNEL);
++	if (pnfs_mod != NULL) {
++		dprintk("%s Registering id:%u name:%s\n",
++			__func__,
++			ld_type->id,
++			ld_type->name);
++		pnfs_mod->pnfs_ld_type = ld_type;
++		INIT_LIST_HEAD(&pnfs_mod->pnfs_tblid);
++
++		spin_lock(&pnfs_spinlock);
++		list_add(&pnfs_mod->pnfs_tblid, &pnfs_modules_tbl);
++		spin_unlock(&pnfs_spinlock);
++	}
++
++	return &pnfs_ops;
++}
++
++/*  Allow I/O module to set its functions structure */
++void
++pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++
++	if (find_pnfs(ld_type->id, &pnfs_mod)) {
++		dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
++		spin_lock(&pnfs_spinlock);
++		list_del(&pnfs_mod->pnfs_tblid);
++		spin_unlock(&pnfs_spinlock);
++		kfree(pnfs_mod);
++	}
++}
++
++/*
++ * pNFS client layout cache
++ */
++#if defined(CONFIG_SMP)
++#define BUG_ON_UNLOCKED_INO(ino) \
++	BUG_ON(!spin_is_locked(&ino->i_lock))
++#define BUG_ON_UNLOCKED_LO(lo) \
++	BUG_ON_UNLOCKED_INO(PNFS_INODE(lo))
++#else /* CONFIG_SMP */
++#define BUG_ON_UNLOCKED_INO(lo) do {} while (0)
++#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
++#endif /* CONFIG_SMP */
++
++static inline void
++get_layout(struct pnfs_layout_type *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	lo->refcount++;
++}
++
++static inline void
++put_layout_locked(struct pnfs_layout_type *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	BUG_ON(lo->refcount <= 0);
++
++	lo->refcount--;
++	if (!lo->refcount) {
++		struct layoutdriver_io_operations *io_ops = PNFS_LD_IO_OPS(lo);
++		struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++		dprintk("%s: freeing layout cache %p\n", __func__, lo);
++		WARN_ON(!list_empty(&lo->lo_layouts));
++		io_ops->free_layout(lo);
++		nfsi->layout = NULL;
++	}
++}
++
++void
++put_layout(struct inode *inode)
++{
++	spin_lock(&inode->i_lock);
++	put_layout_locked(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++
++}
++
++void
++pnfs_layout_release(struct pnfs_layout_type *lo,
++		    struct nfs4_pnfs_layout_segment *range)
++{
++	struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (range)
++		pnfs_free_layout(lo, range);
++	/*
++	 * Matched in _pnfs_update_layout for layoutget
++	 * and by get_layout in _pnfs_return_layout for layoutreturn
++	 */
++	put_layout_locked(lo);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	wake_up_all(&nfsi->lo_waitq);
++}
++
++void
++pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++	struct pnfs_layout_type *lo;
++	struct nfs4_pnfs_layout_segment range = {
++		.iomode = IOMODE_ANY,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	lo = nfsi->layout;
++	if (lo) {
++		pnfs_free_layout(lo, &range);
++		WARN_ON(!list_empty(&nfsi->layout->segs));
++		WARN_ON(!list_empty(&nfsi->layout->lo_layouts));
++
++		if (nfsi->layout->refcount != 1)
++			printk(KERN_WARNING "%s: layout refcount not=1 %d\n",
++				__func__, nfsi->layout->refcount);
++		WARN_ON(nfsi->layout->refcount != 1);
++
++		/* Matched by refcount set to 1 in alloc_init_layout */
++		put_layout_locked(lo);
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/*
++ * Called by the state manger to remove all layouts established under an
++ * expired lease.
++ */
++void
++pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++	struct pnfs_layout_type *lo;
++
++	while (!list_empty(&clp->cl_layouts)) {
++		lo = list_entry(clp->cl_layouts.next, struct pnfs_layout_type,
++				lo_layouts);
++		dprintk("%s freeing layout for inode %lu\n", __func__,
++			lo->lo_inode->i_ino);
++		pnfs_destroy_layout(NFS_I(lo->lo_inode));
++	}
++}
++
++static inline void
++init_lseg(struct pnfs_layout_type *lo, struct pnfs_layout_segment *lseg)
++{
++	INIT_LIST_HEAD(&lseg->fi_list);
++	kref_init(&lseg->kref);
++	lseg->valid = true;
++	lseg->layout = lo;
++}
++
++static void
++destroy_lseg(struct kref *kref)
++{
++	struct pnfs_layout_segment *lseg =
++		container_of(kref, struct pnfs_layout_segment, kref);
++
++	dprintk("--> %s\n", __func__);
++	/* Matched by get_layout in pnfs_insert_layout */
++	put_layout_locked(lseg->layout);
++	PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
++}
++
++static void
++put_lseg_locked(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	kref_put(&lseg->kref, destroy_lseg);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++
++void
++put_lseg(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	kref_put(&lseg->kref, destroy_lseg);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++EXPORT_SYMBOL(put_lseg);
++
++void get_lseg(struct pnfs_layout_segment *lseg)
++{
++	kref_get(&lseg->kref);
++}
++EXPORT_SYMBOL(get_lseg);
++
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end: NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1: NFS4_MAX_UINT64;
++}
++
++/*
++ * is l2 fully contained in l1?
++ *   start1                             end1
++ *   [----------------------------------)
++ *           start2           end2
++ *           [----------------)
++ */
++static inline int
++lo_seg_contained(struct nfs4_pnfs_layout_segment *l1,
++		 struct nfs4_pnfs_layout_segment *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (start1 <= start2) && (end1 >= end2);
++}
++
++/*
++ * is l1 and l2 intersecting?
++ *   start1                             end1
++ *   [----------------------------------)
++ *                              start2           end2
++ *                              [----------------)
++ */
++static inline int
++lo_seg_intersecting(struct nfs4_pnfs_layout_segment *l1,
++		    struct nfs4_pnfs_layout_segment *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
++	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
++}
++
++void
++pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
++			const nfs4_stateid *stateid)
++{
++	write_seqlock(&lo->seqlock);
++	memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
++	write_sequnlock(&lo->seqlock);
++}
++
++void
++pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	do {
++		seq = read_seqbegin(&lo->seqlock);
++		memcpy(dst->u.data, lo->stateid.u.data,
++		       sizeof(lo->stateid.u.data));
++	} while (read_seqretry(&lo->seqlock, seq));
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void
++pnfs_layout_from_open_stateid(struct pnfs_layout_type *lo,
++			      struct nfs4_state *state)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	write_seqlock(&lo->seqlock);
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE))
++		do {
++			seq = read_seqbegin(&state->seqlock);
++			memcpy(lo->stateid.u.data, state->stateid.u.data,
++					sizeof(state->stateid.u.data));
++		} while (read_seqretry(&state->seqlock, seq));
++	write_sequnlock(&lo->seqlock);
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++* Get layout from server.
++*    for now, assume that whole file layouts are requested.
++*    arg->offset: 0
++*    arg->length: all ones
++*/
++static int
++send_layoutget(struct inode *ino,
++	   struct nfs_open_context *ctx,
++	   struct nfs4_pnfs_layout_segment *range,
++	   struct pnfs_layout_segment **lsegpp,
++	   struct pnfs_layout_type *lo)
++{
++	int status;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct nfs4_pnfs_layoutget *lgp;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
++	if (lgp == NULL) {
++		pnfs_layout_release(lo, NULL);
++		return -ENOMEM;
++	}
++	lgp->args.minlength = NFS4_MAX_UINT64;
++	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
++	lgp->args.lseg.iomode = range->iomode;
++	lgp->args.lseg.offset = 0;
++	lgp->args.lseg.length = NFS4_MAX_UINT64;
++	lgp->args.type = server->pnfs_curr_ld->id;
++	lgp->args.inode = ino;
++	lgp->lsegpp = lsegpp;
++
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
++		struct nfs_open_context *oldctx = ctx;
++
++		if (!oldctx) {
++			ctx = nfs_find_open_context(ino, NULL,
++					(range->iomode == IOMODE_READ) ?
++					FMODE_READ: FMODE_WRITE);
++			BUG_ON(!ctx);
++		}
++		/* Set the layout stateid from the open stateid */
++		pnfs_layout_from_open_stateid(NFS_I(ino)->layout, ctx->state);
++		if (!oldctx)
++			put_nfs_open_context(ctx);
++	}
++
++	/* Retrieve layout information from server */
++	status = pnfs4_proc_layoutget(lgp);
++
++	dprintk("<-- %s status %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	false
++ */
++static inline int
++should_free_lseg(struct pnfs_layout_segment *lseg,
++		   struct nfs4_pnfs_layout_segment *range)
++{
++	return (range->iomode == IOMODE_ANY ||
++		lseg->range.iomode == range->iomode) &&
++	       lo_seg_intersecting(&lseg->range, range);
++}
++
++static struct pnfs_layout_segment *
++has_layout_to_return(struct pnfs_layout_type *lo,
++		     struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *out = NULL, *lseg;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list)
++		if (should_free_lseg(lseg, range)) {
++			out = lseg;
++			break;
++		}
++
++	dprintk("%s:Return lseg=%p\n", __func__, out);
++	return out;
++}
++
++static inline bool
++_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
++{
++	return atomic_read(&lseg->kref.refcount) == 1;
++}
++
++
++static void
++pnfs_free_layout(struct pnfs_layout_type *lo,
++		 struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *lseg, *next;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
++		if (!should_free_lseg(lseg, range) ||
++		    !_pnfs_can_return_lseg(lseg))
++			continue;
++		dprintk("%s: freeing lseg %p iomode %d "
++			"offset %llu length %llu\n", __func__,
++			lseg, lseg->range.iomode, lseg->range.offset,
++			lseg->range.length);
++		list_del(&lseg->fi_list);
++		put_lseg_locked(lseg);
++	}
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp;
++
++		clp = PNFS_NFS_SERVER(lo)->nfs_client;
++		spin_lock(&clp->cl_lock);
++		list_del_init(&lo->lo_layouts);
++		spin_unlock(&clp->cl_lock);
++		pnfs_set_layout_stateid(lo, &zero_stateid);
++	}
++
++	dprintk("%s:Return\n", __func__);
++}
++
++static bool
++pnfs_return_layout_barrier(struct nfs_inode *nfsi,
++			   struct nfs4_pnfs_layout_segment *range)
++{
++	struct pnfs_layout_segment *lseg;
++	bool ret = false;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
++		if (!should_free_lseg(lseg, range))
++			continue;
++		lseg->valid = false;
++		if (!_pnfs_can_return_lseg(lseg)) {
++			dprintk("%s: wait on lseg %p refcount %d\n",
++				__func__, lseg,
++				atomic_read(&lseg->kref.refcount));
++			ret = true;
++		}
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	dprintk("%s:Return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
++	      enum pnfs_layoutreturn_type type, struct pnfs_layout_type *lo,
++	      bool wait)
++{
++	struct nfs4_pnfs_layoutreturn *lrp;
++	struct nfs_server *server = NFS_SERVER(ino);
++	int status = -ENOMEM;
++
++	dprintk("--> %s\n", __func__);
++
++	BUG_ON(type != RETURN_FILE);
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (lrp == NULL) {
++		if (lo && (type == RETURN_FILE))
++			pnfs_layout_release(lo, NULL);
++		goto out;
++	}
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = server->pnfs_curr_ld->id;
++	lrp->args.return_type = type;
++	lrp->args.lseg = *range;
++	lrp->args.inode = ino;
++
++	status = pnfs4_proc_layoutreturn(lrp, wait);
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++}
++
++int
++_pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
++		    const nfs4_stateid *stateid, /* optional */
++		    enum pnfs_layoutreturn_type type,
++		    bool wait)
++{
++	struct pnfs_layout_type *lo = NULL;
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs4_pnfs_layout_segment arg;
++	int status = 0;
++
++	dprintk("--> %s type %d\n", __func__, type);
++
++
++	arg.iomode = range ? range->iomode : IOMODE_ANY;
++	arg.offset = 0;
++	arg.length = NFS4_MAX_UINT64;
++
++	if (type == RETURN_FILE) {
++		spin_lock(&ino->i_lock);
++		lo = nfsi->layout;
++		if (lo && !has_layout_to_return(lo, &arg)) {
++			lo = NULL;
++		}
++		if (!lo) {
++			spin_unlock(&ino->i_lock);
++			dprintk("%s: no layout segments to return\n", __func__);
++			goto out;
++		}
++
++		/* Reference for layoutreturn matched in pnfs_layout_release */
++		get_layout(lo);
++
++		spin_unlock(&ino->i_lock);
++
++		if (pnfs_return_layout_barrier(nfsi, &arg)) {
++			if (stateid) { /* callback */
++				status = -EAGAIN;
++				goto out_put;
++			}
++			dprintk("%s: waiting\n", __func__);
++			wait_event(nfsi->lo_waitq,
++				   !pnfs_return_layout_barrier(nfsi, &arg));
++		}
++
++		if (layoutcommit_needed(nfsi)) {
++			if (stateid && !wait) { /* callback */
++				dprintk("%s: layoutcommit pending\n", __func__);
++				status = -EAGAIN;
++				goto out_put;
++			}
++			status = pnfs_layoutcommit_inode(ino, wait);
++			if (status) {
++				/* Return layout even if layoutcommit fails */
++				dprintk("%s: layoutcommit failed, status=%d. "
++					"Returning layout anyway\n",
++					__func__, status);
++			}
++		}
++
++		if (!stateid)
++			status = return_layout(ino, &arg, type, lo, wait);
++		else
++			pnfs_layout_release(lo, &arg);
++	}
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++out_put:
++	put_layout(ino);
++	goto out;
++}
++
++/*
++ * cmp two layout segments for sorting into layout cache
++ */
++static inline s64
++cmp_layout(struct nfs4_pnfs_layout_segment *l1,
++	   struct nfs4_pnfs_layout_segment *l2)
++{
++	s64 d;
++
++	/* higher offset > lower offset */
++	d = l1->offset - l2->offset;
++	if (d)
++		return d;
++
++	/* longer length > shorter length */
++	d = l1->length - l2->length;
++	if (d)
++		return d;
++
++	/* read > read/write */
++	return (int)(l1->iomode == IOMODE_READ) -
++	       (int)(l2->iomode == IOMODE_READ);
++}
++
++static void
++pnfs_insert_layout(struct pnfs_layout_type *lo,
++		   struct pnfs_layout_segment *lseg)
++{
++	struct pnfs_layout_segment *lp;
++	int found = 0;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp = PNFS_NFS_SERVER(lo)->nfs_client;
++
++		spin_lock(&clp->cl_lock);
++		BUG_ON(!list_empty(&lo->lo_layouts));
++		list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
++		spin_unlock(&clp->cl_lock);
++	}
++	list_for_each_entry (lp, &lo->segs, fi_list) {
++		if (cmp_layout(&lp->range, &lseg->range) > 0)
++			continue;
++		list_add_tail(&lseg->fi_list, &lp->fi_list);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu before "
++			"lp %p iomode %d offset %llu length %llu\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length,
++			lp, lp->range.iomode, lp->range.offset,
++			lp->range.length);
++		found = 1;
++		break;
++	}
++	if (!found) {
++		list_add_tail(&lseg->fi_list, &lo->segs);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu at tail\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length);
++	}
++	get_layout(lo);
++
++	dprintk("%s:Return\n", __func__);
++}
++
++/*
++ * Each layoutdriver embeds pnfs_layout_type as the first field in it's
++ * per-layout type layout cache structure and returns it ZEROed
++ * from layoutdriver_io_ops->alloc_layout
++ */
++static struct pnfs_layout_type *
++alloc_init_layout(struct inode *ino)
++{
++	struct pnfs_layout_type *lo;
++	struct layoutdriver_io_operations *io_ops;
++
++	io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
++	lo = io_ops->alloc_layout(ino);
++	if (!lo) {
++		printk(KERN_ERR
++			"%s: out of memory: io_ops->alloc_layout failed\n",
++			__func__);
++		return NULL;
++	}
++	lo->refcount = 1;
++	INIT_LIST_HEAD(&lo->lo_layouts);
++	INIT_LIST_HEAD(&lo->segs);
++	seqlock_init(&lo->seqlock);
++	lo->lo_inode = ino;
++	return lo;
++}
++
++/*
++ * Retrieve and possibly allocate the inode layout
++ *
++ * ino->i_lock must be taken by the caller.
++ */
++static struct pnfs_layout_type *
++pnfs_alloc_layout(struct inode *ino)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_type *new = NULL;
++
++	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
++
++	BUG_ON_UNLOCKED_INO(ino);
++	if (likely(nfsi->layout))
++		return nfsi->layout;
++
++	spin_unlock(&ino->i_lock);
++	new = alloc_init_layout(ino);
++	spin_lock(&ino->i_lock);
++
++	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
++		nfsi->layout = new;
++	} else if (new) {
++		/* Reference the layout accross i_lock release and grab */
++		get_layout(nfsi->layout);
++		spin_unlock(&ino->i_lock);
++		NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
++		spin_lock(&ino->i_lock);
++		put_layout_locked(nfsi->layout);
++	}
++	return nfsi->layout;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	true
++ */
++static inline int
++has_matching_lseg(struct pnfs_layout_segment *lseg,
++		  struct nfs4_pnfs_layout_segment *range)
++{
++	struct nfs4_pnfs_layout_segment range1;
++
++	if ((range->iomode == IOMODE_RW && lseg->range.iomode != IOMODE_RW) ||
++	    !lo_seg_intersecting(&lseg->range, range))
++		return 0;
++
++	/* range1 covers only the first byte in the range */
++	range1 = *range;
++	range1.length = 1;
++	return lo_seg_contained(&lseg->range, &range1);
++}
++
++/*
++ * lookup range in layout
++ */
++static struct pnfs_layout_segment *
++pnfs_has_layout(struct pnfs_layout_type *lo,
++		struct nfs4_pnfs_layout_segment *range,
++		bool take_ref,
++		bool only_valid)
++{
++	struct pnfs_layout_segment *lseg, *ret = NULL;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list) {
++		if (has_matching_lseg(lseg, range) &&
++		    (lseg->valid || !only_valid)) {
++			ret = lseg;
++			if (take_ref)
++				get_lseg(ret);
++			break;
++		}
++		if (cmp_layout(range, &lseg->range) > 0)
++			break;
++	}
++
++	dprintk("%s:Return lseg %p take_ref %d ref %d valid %d\n",
++		__func__, ret, take_ref,
++		ret ? atomic_read(&ret->kref.refcount) : 0,
++		ret ? ret->valid : 0);
++	return ret;
++}
++
++/* Update the file's layout for the given range and iomode.
++ * Layout is retreived from the server if needed.
++ * If lsegpp is given, the appropriate layout segment is referenced and
++ * returned to the caller.
++ */
++void
++_pnfs_update_layout(struct inode *ino,
++		   struct nfs_open_context *ctx,
++		   loff_t pos,
++		   u64 count,
++		   enum pnfs_iomode iomode,
++		   struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs4_pnfs_layout_segment arg = {
++		.iomode = iomode,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_type *lo;
++	struct pnfs_layout_segment *lseg = NULL;
++	bool take_ref = (lsegpp != NULL);
++
++	if (take_ref)
++		*lsegpp = NULL;
++	spin_lock(&ino->i_lock);
++	lo = pnfs_alloc_layout(ino);
++	if (lo == NULL) {
++		dprintk("%s ERROR: can't get pnfs_layout_type\n", __func__);
++		goto out_unlock;
++	}
++
++	/* Check to see if the layout for the given range already exists */
++	lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
++	if (lseg && !lseg->valid) {
++		if (take_ref)
++			put_lseg_locked(lseg);
++		/* someone is cleaning the layout */
++		lseg = NULL;
++		goto out_unlock;
++	}
++
++	if (lseg) {
++		dprintk("%s: Using cached lseg %p for %llu@%llu iomode %d)\n",
++			__func__,
++			lseg,
++			arg.length,
++			arg.offset,
++			arg.iomode);
++
++		goto out_unlock;
++	}
++
++	/* if get layout already failed once goto out */
++	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->pnfs_layout_state)) {
++		if (unlikely(nfsi->pnfs_layout_suspend &&
++		    get_seconds() >= nfsi->pnfs_layout_suspend)) {
++			dprintk("%s: layout_get resumed\n", __func__);
++			clear_bit(lo_fail_bit(iomode),
++				  &nfsi->layout->pnfs_layout_state);
++			nfsi->pnfs_layout_suspend = 0;
++		} else
++			goto out_unlock;
++	}
++
++	/* Reference the layout for layoutget matched in pnfs_layout_release */
++	get_layout(lo);
++	spin_unlock(&ino->i_lock);
++
++	send_layoutget(ino, ctx, &arg, lsegpp, lo);
++out:
++	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
++		nfsi->layout->pnfs_layout_state, lseg);
++	return;
++out_unlock:
++	if (lsegpp)
++		*lsegpp = lseg;
++	spin_unlock(&ino->i_lock);
++	goto out;
++}
++
++void
++pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
++{
++	struct pnfs_layout_segment *lseg = NULL;
++	struct nfs_inode *nfsi = NFS_I(lgp->args.inode);
++	time_t suspend = 0;
++
++	dprintk("-->%s\n", __func__);
++
++	lgp->status = rpc_status;
++	if (likely(!rpc_status)) {
++		if (unlikely(lgp->res.layout.len < 0)) {
++			printk(KERN_ERR
++			       "%s: ERROR Returned layout size is ZERO\n", __func__);
++			lgp->status = -EIO;
++		}
++		goto out;
++	}
++
++	dprintk("%s: ERROR retrieving layout %d\n", __func__, rpc_status);
++	switch (rpc_status) {
++	case -NFS4ERR_BADLAYOUT:
++		lgp->status = -ENOENT;
++		/* FALLTHROUGH */
++	case -EACCES:	/* NFS4ERR_ACCESS */
++		/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
++		goto out;
++
++	case -NFS4ERR_LAYOUTTRYLATER:
++	case -NFS4ERR_RECALLCONFLICT:
++	case -NFS4ERR_OLD_STATEID:
++	case -EAGAIN:	/* NFS4ERR_LOCKED */
++		lgp->status = -NFS4ERR_DELAY;	/* for nfs4_handle_exception */
++		/* FALLTHROUGH */
++	case -NFS4ERR_GRACE:
++	case -NFS4ERR_DELAY:
++		goto out;
++
++	case -NFS4ERR_ADMIN_REVOKED:
++	case -NFS4ERR_DELEG_REVOKED:
++		/* The layout is expected to be returned at this point.
++		 * This should clear the layout stateid as well */
++		suspend = get_seconds() + 1;
++		break;
++
++	case -NFS4ERR_LAYOUTUNAVAILABLE:
++		lgp->status = -ENOTSUPP;
++		break;
++
++	case -NFS4ERR_REP_TOO_BIG:
++	case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
++		lgp->status = -E2BIG;
++		break;
++
++	/* Leave the following errors untranslated */
++	case -NFS4ERR_DEADSESSION:
++	case -NFS4ERR_DQUOT:
++	case -EINVAL:		/* NFS4ERR_INVAL */
++	case -EIO:		/* NFS4ERR_IO */
++	case -NFS4ERR_FHEXPIRED:
++	case -NFS4ERR_MOVED:
++	case -NFS4ERR_NOSPC:
++	case -ESERVERFAULT:	/* NFS4ERR_SERVERFAULT */
++	case -ESTALE:		/* NFS4ERR_STALE */
++	case -ETOOSMALL:	/* NFS4ERR_TOOSMALL */
++		break;
++
++	/* The following errors are our fault and should never happen */
++	case -NFS4ERR_BADIOMODE:
++	case -NFS4ERR_BADXDR:
++	case -NFS4ERR_REQ_TOO_BIG:
++	case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
++	case -NFS4ERR_WRONG_TYPE:
++		lgp->status = -EINVAL;
++		/* FALLTHROUGH */
++	case -NFS4ERR_BAD_STATEID:
++	case -NFS4ERR_NOFILEHANDLE:
++	case -ENOTSUPP:	/* NFS4ERR_NOTSUPP */
++	case -NFS4ERR_OPENMODE:
++	case -NFS4ERR_OP_NOT_IN_SESSION:
++	case -NFS4ERR_TOO_MANY_OPS:
++		dprintk("%s: error %d: should never happen\n", __func__,
++			rpc_status);
++		break;
++
++	/* The following errors are the server's fault */
++	default:
++		dprintk("%s: illegal error %d\n", __func__, rpc_status);
++		lgp->status = -EIO;
++		break;
++	}
++
++	/* remember that get layout failed and suspend trying */
++	nfsi->pnfs_layout_suspend = suspend;
++	set_bit(lo_fail_bit(lgp->args.lseg.iomode),
++		&nfsi->layout->pnfs_layout_state);
++	dprintk("%s: layout_get suspended until %ld\n",
++		__func__, suspend);
++out:
++	dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
++		__func__, lgp->status, nfsi->layout->pnfs_layout_state, lseg);
++	return;
++}
++
++int
++pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
++{
++	struct pnfs_layout_type *lo = NFS_I(lgp->args.inode)->layout;
++	struct nfs4_pnfs_layoutget_res *res = &lgp->res;
++	struct pnfs_layout_segment *lseg;
++	struct inode *ino = PNFS_INODE(lo);
++	int status = 0;
++
++	/* Inject layout blob into I/O device driver */
++	lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, res);
++	if (!lseg || IS_ERR(lseg)) {
++		if (!lseg)
++			status = -ENOMEM;
++		else
++			status = PTR_ERR(lseg);
++		dprintk("%s: Could not allocate layout: error %d\n",
++		       __func__, status);
++		goto out;
++	}
++
++	spin_lock(&ino->i_lock);
++	init_lseg(lo, lseg);
++	lseg->range = res->lseg;
++	if (lgp->lsegpp) {
++		get_lseg(lseg);
++		*lgp->lsegpp = lseg;
++	}
++	pnfs_insert_layout(lo, lseg);
++
++	if (res->return_on_close) {
++		lo->roc_iomode |= res->lseg.iomode;
++		if (!lo->roc_iomode)
++			lo->roc_iomode = IOMODE_ANY;
++	}
++
++	/* Done processing layoutget. Set the layout stateid */
++	pnfs_set_layout_stateid(lo, &res->stateid);
++	spin_unlock(&ino->i_lock);
++out:
++	return status;
++}
++
++void
++readahead_range(struct inode *inode, struct list_head *pages, loff_t *offset,
++		size_t *count)
++{
++	struct page *first, *last;
++	loff_t foff, i_size = i_size_read(inode);
++	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
++	size_t range;
++
++
++	first = list_entry((pages)->prev, struct page, lru);
++	last = list_entry((pages)->next, struct page, lru);
++
++	foff = (loff_t)first->index << PAGE_CACHE_SHIFT;
++
++	range = (last->index - first->index) * PAGE_CACHE_SIZE;
++	if (last->index == end_index)
++		range += ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
++	else
++		range += PAGE_CACHE_SIZE;
++	dprintk("%s foff %lu, range %Zu\n", __func__, (unsigned long)foff,
++		range);
++	*offset = foff;
++	*count = range;
++}
++
++void
++pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)
++{
++	struct pnfs_layout_type *laytype;
++	struct pnfs_layoutdriver_type *ld;
++
++	pgio->pg_test = NULL;
++
++	laytype = NFS_I(inode)->layout;
++	ld = NFS_SERVER(inode)->pnfs_curr_ld;
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !laytype)
++		return;
++
++	if (ld->ld_policy_ops)
++		pgio->pg_test = ld->ld_policy_ops->pg_test;
++}
++
++static u32
++pnfs_getboundary(struct inode *inode)
++{
++	u32 stripe_size = 0;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct layoutdriver_policy_operations *policy_ops;
++
++	if (!nfss->pnfs_curr_ld)
++		goto out;
++
++	policy_ops = nfss->pnfs_curr_ld->ld_policy_ops;
++	if (!policy_ops || !policy_ops->get_stripesize)
++		goto out;
++
++	/* The default is to not gather across stripes */
++	if (pnfs_ld_gather_across_stripes(nfss->pnfs_curr_ld))
++		goto out;
++
++	spin_lock(&inode->i_lock);
++	if (NFS_I(inode)->layout)
++		stripe_size = policy_ops->get_stripesize(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++out:
++	return stripe_size;
++}
++
++/*
++ * rsize is already set by caller to MDS rsize.
++ */
++void
++pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
++		  struct inode *inode,
++		  struct nfs_open_context *ctx,
++		  struct list_head *pages,
++		  size_t *rsize)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	size_t count = 0;
++	loff_t loff;
++
++	pgio->pg_iswrite = 0;
++	pgio->pg_boundary = 0;
++	pgio->pg_test = NULL;
++	pgio->pg_lseg = NULL;
++
++	if (!pnfs_enabled_sb(nfss))
++		return;
++
++	/* Calculate the total read-ahead count */
++	readahead_range(inode, pages, &loff, &count);
++
++	if (count > 0) {
++		_pnfs_update_layout(inode, ctx, loff, count, IOMODE_READ,
++				    &pgio->pg_lseg);
++		if (!pgio->pg_lseg)
++			return;
++
++		*rsize = NFS_SERVER(inode)->ds_rsize;
++		pgio->pg_boundary = pnfs_getboundary(inode);
++		if (pgio->pg_boundary)
++			pnfs_set_pg_test(inode, pgio);
++	}
++}
++
++void
++pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
++		       size_t *wsize)
++{
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	pgio->pg_iswrite = 1;
++	if (!pnfs_enabled_sb(server)) {
++		pgio->pg_boundary = 0;
++		pgio->pg_test = NULL;
++		return;
++	}
++	pgio->pg_boundary = pnfs_getboundary(inode);
++	pnfs_set_pg_test(inode, pgio);
++	*wsize = server->ds_wsize;
++}
++
++/* Return I/O buffer size for a layout driver
++ * This value will determine what size reads and writes
++ * will be gathered into and sent to the data servers.
++ * blocksize must be a multiple of the page cache size.
++ */
++unsigned int
++pnfs_getiosize(struct nfs_server *server)
++{
++	if (!PNFS_EXISTS_LDPOLICY_OP(server, get_blocksize))
++		return 0;
++	return server->pnfs_curr_ld->ld_policy_ops->get_blocksize();
++}
++
++void
++pnfs_set_ds_iosize(struct nfs_server *server)
++{
++	unsigned dssize = pnfs_getiosize(server);
++
++	/* Set buffer size for data servers */
++	if (dssize > 0) {
++		server->ds_rsize = server->ds_wsize =
++			nfs_block_size(dssize, NULL);
++	} else {
++		server->ds_wsize = server->wsize;
++		server->ds_rsize = server->rsize;
++	}
++}
++
++static int
++pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
++{
++	put_lseg(pdata->lseg);
++	pdata->lseg = NULL;
++	pdata->call_ops->rpc_call_done(task, data);
++	if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
++		return -EAGAIN;
++	if (pdata->pnfsflags & PNFS_NO_RPC) {
++		pdata->call_ops->rpc_release(data);
++	} else {
++		/*
++		 * just restore original rpc call ops
++		 * rpc_release will be called later by the rpc scheduling layer.
++		 */
++		task->tk_ops = pdata->call_ops;
++	}
++	return 0;
++}
++
++/* Post-write completion function
++ * Invoked by all layout drivers when write_pagelist is done.
++ *
++ * NOTE: callers set data->pnfsflags PNFS_NO_RPC
++ * so that the NFS cleanup routines perform only the page cache
++ * cleanup.
++ */
++static void
++pnfs_write_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	struct nfs4_pnfs_layout_segment range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = wdata->args.offset;
++	range.length = wdata->args.count;
++	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
++			    wdata->pdata.call_ops, wdata->pdata.how);
++}
++
++static void
++pnfs_writeback_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	/* update last write offset and need layout commit
++	 * for non-files layout types (files layout calls
++	 * pnfs4_write_done for this)
++	 */
++	if ((pdata->pnfsflags & PNFS_NO_RPC) &&
++	    data->task.tk_status >= 0 && data->res.count > 0) {
++		struct nfs_inode *nfsi = NFS_I(data->inode);
++
++		pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++		pnfs_need_layoutcommit(nfsi, data->args.context);
++	}
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++static void _pnfs_clear_lseg_from_pages(struct list_head *head)
++{
++	struct nfs_page *req;
++
++	list_for_each_entry(req, head, wb_list) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem write function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ *
++ * TODO: Is wdata->how and wdata->args.stable always the same value?
++ * TODO: It seems in NFS, the server may not do a stable write even
++ * though it was requested (and vice-versa?).  To check, it looks
++ * in data->res.verf->committed.  Do we need this ability
++ * for non-file layout drivers?
++ */
++enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *wdata,
++			const struct rpc_call_ops *call_ops, int how)
++{
++	struct inode *inode = wdata->inode;
++	enum pnfs_try_status trypnfs;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = wdata->req->wb_lseg;
++
++	wdata->pdata.call_ops = call_ops;
++	wdata->pdata.pnfs_error = 0;
++	wdata->pdata.how = how;
++
++	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
++		inode->i_ino, wdata->args.count, wdata->args.offset, how);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		wdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	wdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->write_pagelist(wdata,
++		nfs_page_array_len(wdata->args.pgbase, wdata->args.count),
++								how);
++
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		wdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		wdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&wdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/* Post-read completion function.  Invoked by all layout drivers when
++ * read_pagelist is done
++ */
++static void
++pnfs_read_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	struct nfs4_pnfs_layout_segment range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = rdata->args.offset;
++	range.length = rdata->args.count;
++	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
++			   rdata->pdata.call_ops);
++}
++
++static void
++pnfs_read_done(struct nfs_read_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem read function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ */
++enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *rdata,
++		       const struct rpc_call_ops *call_ops)
++{
++	struct inode *inode = rdata->inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = rdata->req->wb_lseg;
++	enum pnfs_try_status trypnfs;
++
++	rdata->pdata.call_ops = call_ops;
++	rdata->pdata.pnfs_error = 0;
++
++	dprintk("%s: Reading ino:%lu %u@%llu\n",
++		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		rdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	rdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->read_pagelist(rdata,
++		nfs_page_array_len(rdata->args.pgbase, rdata->args.count));
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		rdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		rdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&rdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/*
++ * This gives the layout driver an opportunity to read in page "around"
++ * the data to be written.  It returns 0 on success, otherwise an error code
++ * which will either be passed up to user, or ignored if
++ * some previous part of write succeeded.
++ * Note the range [pos, pos+len-1] is entirely within the page.
++ */
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata)
++{
++	struct pnfs_fsdata *data;
++	int status = 0;
++
++	dprintk("--> %s: pos=%llu len=%u\n",
++		__func__, (unsigned long long)pos, len);
++	data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
++	if (!data) {
++		status = -ENOMEM;
++		goto out;
++	}
++	data->lseg = lseg; /* refcount passed into data to be managed there */
++	status = NFS_SERVER(inode)->pnfs_curr_ld->ld_io_ops->write_begin(
++						lseg, page, pos, len, data);
++	if (status) {
++		kfree(data);
++		data = NULL;
++	}
++out:
++	*fsdata = data;
++	dprintk("<-- %s: status=%d\n", __func__, status);
++	return status;
++}
++
++/* Return 0 on succes, negative on failure */
++/* CAREFUL - what happens if copied < len??? */
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status;
++
++	status = nfss->pnfs_curr_ld->ld_io_ops->write_end(inode, page,
++						pos, len, copied, lseg);
++	return status;
++}
++
++/* pNFS Commit callback function for all layout drivers */
++static void
++pnfs_commit_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		struct nfs4_pnfs_layout_segment range = {
++			.iomode = IOMODE_RW,
++			.offset = data->args.offset,
++			.length = data->args.count,
++		};
++		dprintk("%s: retrying\n", __func__);
++		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
++				    true);
++		pnfs_initiate_commit(data, NFS_CLIENT(data->inode),
++				     pdata->call_ops, pdata->how, 1);
++	}
++}
++
++enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		    const struct rpc_call_ops *call_ops, int sync)
++{
++	struct inode *inode = data->inode;
++	struct nfs_server *nfss = NFS_SERVER(data->inode);
++	enum pnfs_try_status trypnfs;
++
++	dprintk("%s: Begin\n", __func__);
++
++	/* We need to account for possibility that
++	 * each nfs_page can point to a different lseg (or be NULL).
++	 * For the immediate case of whole-file-only layouts, we at
++	 * least know there can be only a single lseg.
++	 * We still have to account for the possibility of some being NULL.
++	 * This will be done by passing the buck to the layout driver.
++	 */
++	data->pdata.call_ops = call_ops;
++	data->pdata.pnfs_error = 0;
++	data->pdata.how = sync;
++	data->pdata.lseg = NULL;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(data, sync);
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		data->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		_pnfs_clear_lseg_from_pages(&data->pages);
++	} else
++		nfs_inc_stats(inode, NFSIOS_PNFS_COMMIT);
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++void pnfs_cleanup_layoutcommit(struct pnfs_layoutcommit_data *data)
++{
++	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
++
++	/* TODO: Maybe we should avoid this by allowing the layout driver
++	* to directly xdr its layout on the wire.
++	*/
++	if (nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit)
++		nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit(
++					NFS_I(data->args.inode)->layout,
++					&data->args, data->status);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int
++pnfs_layoutcommit_setup(struct inode *inode,
++			struct pnfs_layoutcommit_data *data,
++			loff_t write_begin_pos, loff_t write_end_pos)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int result = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	data->args.inode = inode;
++	data->args.fh = NFS_FH(inode);
++	data->args.layout_type = nfss->pnfs_curr_ld->id;
++	data->res.fattr = &data->fattr;
++	nfs_fattr_init(&data->fattr);
++
++	/* TODO: Need to determine the correct values */
++	data->args.time_modify_changed = 0;
++
++	/* Set values from inode so it can be reset
++	 */
++	data->args.lseg.iomode = IOMODE_RW;
++	data->args.lseg.offset = write_begin_pos;
++	data->args.lseg.length = write_end_pos - write_begin_pos + 1;
++	data->args.lastbytewritten =  min(write_end_pos,
++					  i_size_read(inode) - 1);
++	data->args.bitmask = nfss->attr_bitmask;
++	data->res.server = nfss;
++
++	/* Call layout driver to set the arguments */
++	if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit)
++		result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
++				NFS_I(inode)->layout, &data->args);
++
++	dprintk("<-- %s Status %d\n", __func__, result);
++	return result;
++}
++
++/* Issue a async layoutcommit for an inode.
++ */
++int
++pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	struct pnfs_layoutcommit_data *data;
++	struct nfs_inode *nfsi = NFS_I(inode);
++	loff_t write_begin_pos;
++	loff_t write_end_pos;
++
++	int status = 0;
++
++	dprintk("%s Begin (sync:%d)\n", __func__, sync);
++
++	BUG_ON(!has_layout(nfsi));
++
++	data = pnfs_layoutcommit_alloc();
++	if (!data)
++		return -ENOMEM;
++
++	spin_lock(&inode->i_lock);
++	if (!layoutcommit_needed(nfsi)) {
++		spin_unlock(&inode->i_lock);
++		goto out_free;
++	}
++
++	/* Clear layoutcommit properties in the inode so
++	 * new lc info can be generated
++	 */
++	write_begin_pos = nfsi->layout->pnfs_write_begin_pos;
++	write_end_pos = nfsi->layout->pnfs_write_end_pos;
++	data->cred = nfsi->layout->lo_cred;
++	nfsi->layout->pnfs_write_begin_pos = 0;
++	nfsi->layout->pnfs_write_end_pos = 0;
++	nfsi->layout->lo_cred = NULL;
++	__clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state);
++	pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout);
++
++	/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
++	get_layout(NFS_I(inode)->layout);
++
++	spin_unlock(&inode->i_lock);
++
++	/* Set up layout commit args */
++	status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
++					 write_end_pos);
++	if (status) {
++		/* The layout driver failed to setup the layoutcommit */
++		put_rpccred(data->cred);
++		put_layout(inode);
++		goto out_free;
++	}
++	status = pnfs4_proc_layoutcommit(data, sync);
++out:
++	dprintk("%s end (err:%d)\n", __func__, status);
++	return status;
++out_free:
++	pnfs_layoutcommit_free(data);
++	goto out;
++}
++
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
++{
++	if (fsdata) {
++		/* lseg refcounting handled directly in nfs_Write_end */
++		kfree(fsdata);
++	}
++}
++
++/* Callback operations for layout drivers.
++ */
++struct pnfs_client_operations pnfs_ops = {
++	.nfs_getdevicelist = nfs4_pnfs_getdevicelist,
++	.nfs_getdeviceinfo = nfs4_pnfs_getdeviceinfo,
++	.nfs_readlist_complete = pnfs_read_done,
++	.nfs_writelist_complete = pnfs_writeback_done,
++	.nfs_commit_complete = pnfs_commit_done,
++};
++
++EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
++EXPORT_SYMBOL(pnfs_register_layoutdriver);
++
++
++/* Device ID cache. Supports one layout type per struct nfs_client */
++int
++nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
++			 void (*free_callback)(struct kref *))
++{
++	struct nfs4_deviceid_cache *c;
++
++	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
++	if (!c)
++		return -ENOMEM;
++	spin_lock(&clp->cl_lock);
++	if (clp->cl_devid_cache != NULL) {
++		kref_get(&clp->cl_devid_cache->dc_kref);
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [kref [%d]]\n", __func__,
++			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
++		kfree(c);
++	} else {
++		int i;
++
++		spin_lock_init(&c->dc_lock);
++		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
++			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
++		kref_init(&c->dc_kref);
++		c->dc_free_callback = free_callback;
++		clp->cl_devid_cache = c;
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [new]\n", __func__);
++	}
++	return 0;
++}
++EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
++
++void
++nfs4_init_deviceid_node(struct nfs4_deviceid *d)
++{
++	INIT_HLIST_NODE(&d->de_node);
++	kref_init(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_init_deviceid_node);
++
++/* Called from layoutdriver_io_operations->alloc_lseg */
++void
++nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = d;
++	kref_get(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_set_layout_deviceid);
++
++/* Called from layoutdriver_io_operations->free_lseg */
++void
++nfs4_unset_layout_deviceid(struct pnfs_layout_segment *l,
++			   struct nfs4_deviceid *d,
++			   void (*free_callback)(struct kref *))
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = NULL;
++	kref_put(&d->de_kref, free_callback);
++}
++EXPORT_SYMBOL(nfs4_unset_layout_deviceid);
++
++struct nfs4_deviceid *
++nfs4_find_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			rcu_read_unlock();
++			return d;
++		}
++	}
++	rcu_read_unlock();
++	return NULL;
++}
++EXPORT_SYMBOL(nfs4_find_deviceid);
++
++/*
++ * Add or kref_get a deviceid.
++ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
++ */
++struct nfs4_deviceid *
++nfs4_add_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(&new->de_id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			spin_unlock(&c->dc_lock);
++			dprintk("%s [discard]\n", __func__);
++			c->dc_free_callback(&new->de_kref);
++			return d;
++		}
++	}
++	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
++	spin_unlock(&c->dc_lock);
++	dprintk("%s [new]\n", __func__);
++	return new;
++}
++EXPORT_SYMBOL(nfs4_add_deviceid);
++
++static int
++nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash,
++		     struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (id && memcmp(id, &d->de_id, NFS4_PNFS_DEVICEID4_SIZE))
++			continue;
++		hlist_del_rcu(&d->de_node);
++		spin_unlock(&c->dc_lock);
++		synchronize_rcu();
++		dprintk("%s [%d]\n", __func__,
++			atomic_read(&d->de_kref.refcount));
++		kref_put(&d->de_kref, c->dc_free_callback);
++		return 1;
++	}
++	spin_unlock(&c->dc_lock);
++	return 0;
++}
++
++void
++nfs4_delete_device(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	long hash = nfs4_deviceid_hash(id);
++
++	nfs4_remove_deviceid(c, hash, id);
++}
++EXPORT_SYMBOL(nfs4_delete_device);
++
++static void
++nfs4_free_deviceid_cache(struct kref *kref)
++{
++	struct nfs4_deviceid_cache *cache =
++		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
++	long i;
++
++	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
++		while (nfs4_remove_deviceid(cache, i, NULL))
++			;
++	kfree(cache);
++}
++
++void
++nfs4_put_deviceid_cache(struct nfs_client *clp)
++{
++	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
++	int refcount;
++
++	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
++	spin_lock(&clp->cl_lock);
++	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
++	if (refcount == 1)
++		clp->cl_devid_cache = NULL;
++	spin_unlock(&clp->cl_lock);
++	dprintk("%s [%d]\n", __func__, refcount);
++	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
++}
++EXPORT_SYMBOL(nfs4_put_deviceid_cache);
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
+@@ -0,0 +1,355 @@
++/*
++ *  fs/nfs/pnfs.h
++ *
++ *  pNFS client data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_PNFS_H
++#define FS_NFS_PNFS_H
++
++#include <linux/nfs4_pnfs.h>
++
++#ifdef CONFIG_NFS_V4_1
++
++#include <linux/nfs_page.h>
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs_iostat.h>
++#include "iostat.h"
++
++/* nfs4proc.c */
++extern int nfs4_pnfs_getdevicelist(struct nfs_server *server,
++				   const struct nfs_fh *fh,
++				   struct pnfs_devicelist *devlist);
++extern int nfs4_pnfs_getdeviceinfo(struct nfs_server *server,
++				   struct pnfs_device *dev);
++extern int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp);
++extern int pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data,
++				   int issync);
++extern int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool wait);
++
++/* pnfs.c */
++extern const nfs4_stateid zero_stateid;
++
++void _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp);
++
++int _pnfs_return_layout(struct inode *, struct nfs4_pnfs_layout_segment *,
++			const nfs4_stateid *stateid, /* optional */
++			enum pnfs_layoutreturn_type, bool wait);
++void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *mntfh, u32 id);
++void unmount_pnfs_layoutdriver(struct nfs_server *);
++enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
++					     const struct rpc_call_ops *, int);
++enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
++					    const struct rpc_call_ops *);
++int pnfs_initialize(void);
++void pnfs_uninitialize(void);
++void pnfs_layoutcommit_free(struct pnfs_layoutcommit_data *data);
++void pnfs_cleanup_layoutcommit(struct pnfs_layoutcommit_data *data);
++int pnfs_layoutcommit_inode(struct inode *inode, int sync);
++void pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent);
++void pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx);
++unsigned int pnfs_getiosize(struct nfs_server *server);
++void pnfs_set_ds_iosize(struct nfs_server *server);
++enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
++					 const struct rpc_call_ops *, int);
++void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
++			   struct nfs_open_context *, struct list_head *,
++			   size_t *);
++void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
++			    size_t *);
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
++void pnfs_get_layout_done(struct nfs4_pnfs_layoutget *, int rpc_status);
++int pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp);
++void pnfs_layout_release(struct pnfs_layout_type *, struct nfs4_pnfs_layout_segment *range);
++void pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
++			     const nfs4_stateid *stateid);
++void pnfs_destroy_layout(struct nfs_inode *);
++void pnfs_destroy_all_layouts(struct nfs_client *);
++void put_layout(struct inode *inode);
++void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo);
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata);
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg);
++
++#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops->opname)
++#define PNFS_EXISTS_LDPOLICY_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_policy_ops && \
++				     (srv)->pnfs_curr_ld->ld_policy_ops->opname)
++
++#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
++
++static inline int lo_fail_bit(u32 iomode)
++{
++	return iomode == IOMODE_RW ?
++			 NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED;
++}
++
++/* Return true if a layout driver is being used for this mountpoint */
++static inline int pnfs_enabled_sb(struct nfs_server *nfss)
++{
++	return nfss->pnfs_curr_ld != NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return !fsdata  || ((struct pnfs_layout_segment *)fsdata == lseg) ||
++		!fsdata->bypass_eof;
++}
++
++/* Should the pNFS client commit and return the layout upon a setattr */
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
++		return false;
++	return NFS_SERVER(inode)->pnfs_curr_ld->ld_policy_ops->flags &
++		PNFS_LAYOUTRET_ON_SETATTR;
++}
++
++/* Should the pNFS client commit and return the layout on close
++ */
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return nfsi->layout->roc_iomode;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status = 0;
++
++	*fsdata = lseg;
++	if (lseg && PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++		status = _pnfs_write_begin(inode, page, pos, len, lseg,
++					   (struct pnfs_fsdata **) fsdata);
++	return status;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++
++	if (PNFS_EXISTS_LDIO_OP(nfss, write_end))
++		return _pnfs_write_end(inode, page, pos, len, copied, lseg);
++	else
++		return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_end_cleanup))
++			nfss->pnfs_curr_ld->ld_io_ops->write_end_cleanup(filp, fsdata);
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			pnfs_free_fsdata(fsdata);
++	}
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct nfs4_pnfs_layout_segment *lseg,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss) &&
++	    (type != RETURN_FILE || has_layout(nfsi)))
++		return _pnfs_return_layout(ino, lseg, stateid, type, wait);
++
++	return 0;
++}
++
++static inline void pnfs_update_layout(struct inode *ino,
++	struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss))
++		_pnfs_update_layout(ino, ctx, pos, count, access_type, lsegpp);
++	else {
++		if (lsegpp)
++			*lsegpp = NULL;
++	}
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	if (pnfs_enabled_sb(nfss))
++		return pnfs_ld_use_rpc_code(nfss->pnfs_curr_ld);
++
++	return 1;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			return ((struct pnfs_fsdata *) fsdata)->lseg;
++	}
++	return fsdata;
++}
++#else  /* CONFIG_NFS_V4_1 */
++
++static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++}
++
++static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++}
++
++static inline void get_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void put_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void
++pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	if (lsegpp)
++		*lsegpp = NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return 1;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *data,
++		      const struct rpc_call_ops *call_ops)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *data,
++		       const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		   const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	*fsdata = NULL;
++	return 0;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	return 1;
++}
++
++static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	return 0;
++}
++
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	return false;
++}
++
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct nfs4_pnfs_layout_segment *lseg,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	return 0;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	return NULL;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++#endif /* FS_NFS_PNFS_H */
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
+@@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	fattr = nfs_alloc_fattr();
+ 	status = -ENOMEM;
+ 	if (fh == NULL || fattr == NULL)
+-		goto out;
++		goto out_free;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -455,7 +455,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 */
+ 	if (status == 0)
+ 		status = nfs_instantiate(dentry, fh, fattr);
+-
++out_free:
+ 	nfs_free_fattr(fattr);
+ 	nfs_free_fhandle(fh);
+ out:
+@@ -694,6 +694,7 @@ const struct nfs_rpc_ops nfs_v2_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs_dir_inode_operations,
+ 	.file_inode_ops	= &nfs_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs_proc_get_root,
+ 	.getattr	= nfs_proc_getattr,
+ 	.setattr	= nfs_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
+@@ -18,8 +18,12 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_page.h>
++#include <linux/smp_lock.h>
++#include <linux/module.h>
+ 
+ #include <asm/system.h>
++#include <linux/module.h>
++#include "pnfs.h"
+ 
+ #include "nfs4_fs.h"
+ #include "internal.h"
+@@ -117,11 +121,14 @@ int nfs_readpage_async(struct nfs_open_c
+ 	LIST_HEAD(one_request);
+ 	struct nfs_page	*new;
+ 	unsigned int len;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	len = nfs_page_length(page);
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+-	new = nfs_create_request(ctx, inode, page, 0, len);
++	pnfs_update_layout(inode, ctx, 0, NFS4_MAX_UINT64, IOMODE_READ, &lseg);
++	new = nfs_create_request(ctx, inode, page, 0, len, lseg);
++	put_lseg(lseg);
+ 	if (IS_ERR(new)) {
+ 		unlock_page(page);
+ 		return PTR_ERR(new);
+@@ -155,24 +162,20 @@ static void nfs_readpage_release(struct 
+ 	nfs_release_request(req);
+ }
+ 
+-/*
+- * Set up the NFS read request struct
+- */
+-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset)
++int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		      const struct rpc_call_ops *call_ops)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+ 		.callback_data = data,
+@@ -180,9 +183,46 @@ static int nfs_read_rpcsetup(struct nfs_
+ 		.flags = RPC_TASK_ASYNC | swap_flags,
+ 	};
+ 
++	/* Set up the initial task struct. */
++	NFS_PROTO(inode)->read_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
++			data->task.tk_pid,
++			inode->i_sb->s_id,
++			(long long)NFS_FILEID(inode),
++			data->args.count,
++			(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_read);
++
++int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
++		return pnfs_get_read_status(data);
++
++	return nfs_initiate_read(data, clnt, call_ops);
++}
++
++/*
++ * Set up the NFS read request struct
++ */
++static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	data->req	  = req;
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -190,27 +230,14 @@ static int nfs_read_rpcsetup(struct nfs_
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.count   = count;
+ 	data->res.eof     = 0;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct. */
+-	NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-			data->task.tk_pid,
+-			inode->i_sb->s_id,
+-			(long long)NFS_FILEID(inode),
+-			count,
+-			(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	return pnfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ }
+ 
+ static void
+@@ -354,7 +381,14 @@ static void nfs_readpage_retry(struct rp
+ {
+ 	struct nfs_readargs *argp = &data->args;
+ 	struct nfs_readres *resp = &data->res;
++	struct nfs_client *clp = NFS_SERVER(data->inode)->nfs_client;
+ 
++#ifdef CONFIG_NFS_V4_1
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (resp->eof || resp->count == argp->count)
+ 		return;
+ 
+@@ -368,7 +402,10 @@ static void nfs_readpage_retry(struct rp
+ 	argp->offset += resp->count;
+ 	argp->pgbase += resp->count;
+ 	argp->count -= resp->count;
+-	nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++	data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++	nfs_restart_rpc(task, clp);
+ }
+ 
+ /*
+@@ -409,13 +446,19 @@ static void nfs_readpage_release_partial
+ void nfs_read_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_read_data *data = calldata;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	}
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
+ 				&data->args.seq_args, &data->res.seq_res,
+ 				0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_read_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_read_partial_ops = {
+@@ -568,7 +611,8 @@ readpage_async_filler(void *data, struct
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+ 
+-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
++	new = nfs_create_request(desc->ctx, inode, page, 0, len,
++				 desc->pgio->pg_lseg);
+ 	if (IS_ERR(new))
+ 		goto out_error;
+ 
+@@ -624,6 +668,9 @@ int nfs_readpages(struct file *filp, str
+ 	if (ret == 0)
+ 		goto read_complete; /* all pages were read */
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_read(&pgio, inode, desc.ctx, pages, &rsize);
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (rsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
+ 	else
+@@ -632,6 +679,7 @@ int nfs_readpages(struct file *filp, str
+ 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ 
+ 	nfs_pageio_complete(&pgio);
++	put_lseg(pgio.pg_lseg);
+ 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+ read_complete:
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
+@@ -64,6 +64,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -669,6 +670,28 @@ static int nfs_show_options(struct seq_f
+ 
+ 	return 0;
+ }
++#ifdef CONFIG_NFS_V4_1
++void show_sessions(struct seq_file *m, struct nfs_server *server)
++{
++	if (nfs4_has_session(server->nfs_client))
++		seq_printf(m, ",sessions");
++}
++#else
++void show_sessions(struct seq_file *m, struct nfs_server *server) {}
++#endif
++
++#ifdef CONFIG_NFS_V4_1
++void show_pnfs(struct seq_file *m, struct nfs_server *server)
++{
++	seq_printf(m, ",pnfs=");
++	if (server->pnfs_curr_ld)
++		seq_printf(m, "%s", server->pnfs_curr_ld->name);
++	else
++		seq_printf(m, "not configured");
++}
++#else  /* CONFIG_NFS_V4_1 */
++void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ /*
+  * Present statistical information for this VFS mountpoint
+@@ -707,6 +730,8 @@ static int nfs_show_stats(struct seq_fil
+ 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ 		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
++		show_sessions(m, nfss);
++		show_pnfs(m, nfss);
+ 	}
+ #endif
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
+@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
+ 	struct nfs_unlinkdata *data = calldata;
+ 	struct nfs_server *server = NFS_SERVER(data->dir);
+ 
+-	if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
++	if (nfs4_setup_sequence(server, NULL, &data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ #include <linux/nfs_page.h>
+ #include <linux/backing-dev.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -28,6 +29,7 @@
+ #include "iostat.h"
+ #include "nfs4_fs.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+ 
+@@ -59,6 +61,7 @@ struct nfs_write_data *nfs_commitdata_al
+ 	}
+ 	return p;
+ }
++EXPORT_SYMBOL(nfs_commitdata_alloc);
+ 
+ void nfs_commit_free(struct nfs_write_data *p)
+ {
+@@ -66,6 +69,7 @@ void nfs_commit_free(struct nfs_write_da
+ 		kfree(p->pagevec);
+ 	mempool_free(p, nfs_commit_mempool);
+ }
++EXPORT_SYMBOL(nfs_commit_free);
+ 
+ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+ {
+@@ -418,6 +422,17 @@ static void nfs_inode_remove_request(str
+ 	nfs_clear_request(req);
+ 	nfs_release_request(req);
+ }
++static void
++nfs_mark_request_nopnfs(struct nfs_page *req)
++{
++	struct pnfs_layout_segment *lseg = req->wb_lseg;
++
++	if (req->wb_lseg == NULL)
++		return;
++	req->wb_lseg = NULL;
++	put_lseg(lseg);
++	dprintk(" retry through MDS\n");
++}
+ 
+ static void
+ nfs_mark_request_dirty(struct nfs_page *req)
+@@ -523,7 +538,7 @@ nfs_need_commit(struct nfs_inode *nfsi)
+  * The requests are *not* checked to ensure that they form a contiguous set.
+  */
+ static int
+-nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
++nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int *use_pnfs)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	int ret;
+@@ -531,7 +546,8 @@ nfs_scan_commit(struct inode *inode, str
+ 	if (!nfs_need_commit(nfsi))
+ 		return 0;
+ 
+-	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
++	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT,
++			    use_pnfs);
+ 	if (ret > 0)
+ 		nfsi->ncommit -= ret;
+ 	if (nfs_need_commit(NFS_I(inode)))
+@@ -560,7 +576,8 @@ static inline int nfs_scan_commit(struct
+ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+ 		struct page *page,
+ 		unsigned int offset,
+-		unsigned int bytes)
++		unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page *req;
+ 	unsigned int rqend;
+@@ -585,8 +602,8 @@ static struct nfs_page *nfs_try_to_updat
+ 		 * Note: nfs_flush_incompatible() will already
+ 		 * have flushed out requests having wrong owners.
+ 		 */
+-		if (offset > rqend
+-		    || end < req->wb_offset)
++		if (offset > rqend || end < req->wb_offset ||
++		    req->wb_lseg != lseg)
+ 			goto out_flushme;
+ 
+ 		if (nfs_set_page_tag_locked(req))
+@@ -634,16 +651,17 @@ out_err:
+  * already called nfs_flush_incompatible() if necessary.
+  */
+ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+-		struct page *page, unsigned int offset, unsigned int bytes)
++		struct page *page, unsigned int offset, unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	struct nfs_page	*req;
+ 	int error;
+ 
+-	req = nfs_try_to_update_request(inode, page, offset, bytes);
++	req = nfs_try_to_update_request(inode, page, offset, bytes, lseg);
+ 	if (req != NULL)
+ 		goto out;
+-	req = nfs_create_request(ctx, inode, page, offset, bytes);
++	req = nfs_create_request(ctx, inode, page, offset, bytes, lseg);
+ 	if (IS_ERR(req))
+ 		goto out;
+ 	error = nfs_inode_add_request(inode, req);
+@@ -656,23 +674,27 @@ out:
+ }
+ 
+ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+-		unsigned int offset, unsigned int count)
++			       unsigned int offset, unsigned int count,
++			       struct pnfs_layout_segment *lseg,
++			       void *fsdata)
+ {
+ 	struct nfs_page	*req;
+ 
+-	req = nfs_setup_write_request(ctx, page, offset, count);
++	req = nfs_setup_write_request(ctx, page, offset, count, lseg);
+ 	if (IS_ERR(req))
+ 		return PTR_ERR(req);
+ 	nfs_mark_request_dirty(req);
+ 	/* Update file length */
+-	nfs_grow_file(page, offset, count);
++	if (pnfs_grow_ok(lseg, fsdata))
++		nfs_grow_file(page, offset, count);
+ 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	return 0;
+ }
+ 
+-int nfs_flush_incompatible(struct file *file, struct page *page)
++int nfs_flush_incompatible(struct file *file, struct page *page,
++			   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct nfs_page	*req;
+@@ -689,7 +711,10 @@ int nfs_flush_incompatible(struct file *
+ 		req = nfs_page_find_request(page);
+ 		if (req == NULL)
+ 			return 0;
+-		do_flush = req->wb_page != page || req->wb_context != ctx;
++		do_flush = req->wb_page != page || req->wb_context != ctx ||
++			req->wb_lock_context->lockowner != current->files ||
++			req->wb_lock_context->pid != current->tgid ||
++			req->wb_lseg != lseg;
+ 		nfs_release_request(req);
+ 		if (!do_flush)
+ 			return 0;
+@@ -716,7 +741,8 @@ static int nfs_write_pageuptodate(struct
+  * things with a page scheduled for an RPC call (e.g. invalidate it).
+  */
+ int nfs_updatepage(struct file *file, struct page *page,
+-		unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg, void *fsdata)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct inode	*inode = page->mapping->host;
+@@ -741,7 +767,7 @@ int nfs_updatepage(struct file *file, st
+ 		offset = 0;
+ 	}
+ 
+-	status = nfs_writepage_setup(ctx, page, offset, count);
++	status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata);
+ 	if (status < 0)
+ 		nfs_set_pageerror(page);
+ 
+@@ -771,25 +797,21 @@ static int flush_task_priority(int how)
+ 	return RPC_PRIORITY_NORMAL;
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_write_rpcsetup(struct nfs_page *req,
+-		struct nfs_write_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset,
+-		int how)
++int nfs_initiate_write(struct nfs_write_data *data,
++		       struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops,
++		       int how)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.task = &data->task,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+@@ -800,12 +822,62 @@ static int nfs_write_rpcsetup(struct nfs
+ 	};
+ 	int ret = 0;
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->write_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	if (how & FLUSH_SYNC) {
++		ret = rpc_wait_for_completion_task(task);
++		if (ret == 0)
++			ret = task->tk_status;
++	}
++	rpc_put_task(task);
++out:
++	return ret;
++}
++EXPORT_SYMBOL(nfs_initiate_write);
++
++int pnfs_initiate_write(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_write(data, clnt, call_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_write_rpcsetup(struct nfs_page *req,
++		struct nfs_write_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset,
++		int how)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	data->req = req;
+ 	data->inode = inode = req->wb_context->path.dentry->d_inode;
+-	data->cred = msg.rpc_cred;
++	data->cred = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -813,6 +885,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 	data->args.stable  = NFS_UNSTABLE;
+ 	if (how & FLUSH_STABLE) {
+ 		data->args.stable = NFS_DATA_SYNC;
+@@ -825,30 +898,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated write call "
+-		"(req %s/%lld, %u bytes @ offset %llu)\n",
+-		data->task.tk_pid,
+-		inode->i_sb->s_id,
+-		(long long)NFS_FILEID(inode),
+-		count,
+-		(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task)) {
+-		ret = PTR_ERR(task);
+-		goto out;
+-	}
+-	if (how & FLUSH_SYNC) {
+-		ret = rpc_wait_for_completion_task(task);
+-		if (ret == 0)
+-			ret = task->tk_status;
+-	}
+-	rpc_put_task(task);
+-out:
+-	return ret;
++	return pnfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ }
+ 
+ /* If a nfs_flush_* function fails, it should remove reqs from @head and
+@@ -859,6 +909,7 @@ static void nfs_redirty_request(struct n
+ {
+ 	struct page *page = req->wb_page;
+ 
++	nfs_mark_request_nopnfs(req);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	nfs_end_page_writeback(page);
+@@ -971,6 +1022,10 @@ static void nfs_pageio_init_write(struct
+ {
+ 	size_t wsize = NFS_SERVER(inode)->wsize;
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_write(pgio, inode, &wsize);
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	if (wsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
+ 	else
+@@ -1036,13 +1091,27 @@ out:
+ void nfs_write_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_write_data *data = calldata;
+-	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(clp, &data->args.seq_args,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	} else if (data->args.count > NFS_SERVER(data->inode)->wsize) {
++		/* retrying via MDS? */
++		data->pdata.orig_count = data->args.count;
++		data->args.count = NFS_SERVER(data->inode)->wsize;
++		dprintk("%s: trimmed count %u to wsize %u\n", __func__,
++		data->pdata.orig_count, data->args.count);
++	} else
++		data->pdata.orig_count = 0;
++
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
++				&data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_write_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_write_partial_ops = {
+@@ -1126,10 +1195,11 @@ int nfs_writeback_done(struct rpc_task *
+ 	struct nfs_writeargs	*argp = &data->args;
+ 	struct nfs_writeres	*resp = &data->res;
+ 	struct nfs_server	*server = NFS_SERVER(data->inode);
++	struct nfs_client	*clp = server->nfs_client;
+ 	int status;
+ 
+-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
+-		task->tk_pid, task->tk_status);
++	dprintk("NFS: %5u nfs_writeback_done (status %d count %u)\n",
++		task->tk_pid, task->tk_status, resp->count);
+ 
+ 	/*
+ 	 * ->write_done will attempt to use post-op attributes to detect
+@@ -1142,6 +1212,13 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (status != 0)
+ 		return status;
+ 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
++#ifdef CONFIG_NFS_V4_1
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+@@ -1158,7 +1235,7 @@ int nfs_writeback_done(struct rpc_task *
+ 		if (time_before(complain, jiffies)) {
+ 			dprintk("NFS:       faulty NFS server %s:"
+ 				" (committed = %d) != (stable = %d)\n",
+-				server->nfs_client->cl_hostname,
++				clp->cl_hostname,
+ 				resp->verf->committed, argp->stable);
+ 			complain = jiffies + 300 * HZ;
+ 		}
+@@ -1168,6 +1245,9 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (task->tk_status >= 0 && resp->count < argp->count) {
+ 		static unsigned long    complain;
+ 
++		dprintk("NFS:       short write:"
++			" (resp->count %u) < (argp->count = %u)\n",
++			resp->count, argp->count);
+ 		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ 
+ 		/* Has the server at least made some progress? */
+@@ -1184,7 +1264,10 @@ int nfs_writeback_done(struct rpc_task *
+ 				 */
+ 				argp->stable = NFS_FILE_SYNC;
+ 			}
+-			nfs_restart_rpc(task, server->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++			data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++			nfs_restart_rpc(task, clp);
+ 			return -EAGAIN;
+ 		}
+ 		if (time_before(complain, jiffies)) {
+@@ -1228,40 +1311,73 @@ static void nfs_commitdata_release(void 
+ 	nfs_commit_free(wdata);
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_commit_rpcsetup(struct list_head *head,
+-		struct nfs_write_data *data,
+-		int how)
++int nfs_initiate_commit(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
+ {
+-	struct nfs_page *first = nfs_list_entry(head->next);
+-	struct inode *inode = first->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = first->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+-		.callback_ops = &nfs_commit_ops,
++		.callback_ops = call_ops,
+ 		.callback_data = data,
+ 		.workqueue = nfsiod_workqueue,
+ 		.flags = RPC_TASK_ASYNC,
+ 		.priority = priority,
+ 	};
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->commit_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_commit);
++
++
++int pnfs_initiate_commit(struct nfs_write_data *data,
++			 struct rpc_clnt *clnt,
++			 const struct rpc_call_ops *call_ops,
++			 int how, int pnfs)
++{
++	if (pnfs &&
++	    (pnfs_try_to_commit(data, &nfs_commit_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_commit(data, clnt, &nfs_commit_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_commit_rpcsetup(struct list_head *head,
++		struct nfs_write_data *data,
++		int how, int pnfs)
++{
++	struct nfs_page *first = nfs_list_entry(head->next);
++	struct inode *inode = first->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	list_splice_init(head, &data->pages);
+ 
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = first->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(data->inode);
+ 	/* Note: we always request a commit of the entire inode */
+@@ -1272,45 +1388,47 @@ static int nfs_commit_rpcsetup(struct li
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
++	kref_init(&data->refcount);
++	data->parent      = NULL;
++	data->args.context = first->wb_context;  /* used by commit done */
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->commit_setup(data, &msg);
++	return pnfs_initiate_commit(data, NFS_CLIENT(inode), &nfs_commit_ops,
++				    how, pnfs);
++}
+ 
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++/* Handle memory error during commit */
++void nfs_mark_list_commit(struct list_head *head)
++{
++	struct nfs_page         *req;
+ 
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	while (!list_empty(head)) {
++		req = nfs_list_entry(head->next);
++		nfs_list_remove_request(req);
++		nfs_mark_request_commit(req);
++		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
++		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
++				BDI_RECLAIMABLE);
++		nfs_clear_page_tag_locked(req);
++	}
+ }
++EXPORT_SYMBOL(nfs_mark_list_commit);
+ 
+ /*
+  * Commit dirty pages
+  */
+ static int
+-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
++nfs_commit_list(struct inode *inode, struct list_head *head, int how, int pnfs)
+ {
+ 	struct nfs_write_data	*data;
+-	struct nfs_page         *req;
+ 
+ 	data = nfs_commitdata_alloc();
+-
+ 	if (!data)
+ 		goto out_bad;
+ 
+ 	/* Set up the argument struct */
+-	return nfs_commit_rpcsetup(head, data, how);
++	return nfs_commit_rpcsetup(head, data, how, pnfs);
+  out_bad:
+-	while (!list_empty(head)) {
+-		req = nfs_list_entry(head->next);
+-		nfs_list_remove_request(req);
+-		nfs_mark_request_commit(req);
+-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+-				BDI_RECLAIMABLE);
+-		nfs_clear_page_tag_locked(req);
+-	}
++	nfs_mark_list_commit(head);
+ 	nfs_commit_clear_lock(NFS_I(inode));
+ 	return -ENOMEM;
+ }
+@@ -1330,6 +1448,19 @@ static void nfs_commit_done(struct rpc_t
+ 		return;
+ }
+ 
++static inline void nfs_commit_cleanup(struct kref *kref)
++{
++	struct nfs_write_data *data;
++
++	data = container_of(kref, struct nfs_write_data, refcount);
++	/* Clear lock only when all cloned commits are finished */
++	if (data->parent)
++		kref_put(&data->parent->refcount, nfs_commit_cleanup);
++	else
++		nfs_commit_clear_lock(NFS_I(data->inode));
++	nfs_commitdata_release(data);
++}
++
+ static void nfs_commit_release(void *calldata)
+ {
+ 	struct nfs_write_data	*data = calldata;
+@@ -1347,6 +1478,11 @@ static void nfs_commit_release(void *cal
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 		if (status < 0) {
++			if (req->wb_lseg) {
++				nfs_mark_request_nopnfs(req);
++				nfs_mark_request_dirty(req);
++				goto next;
++			}
+ 			nfs_context_set_write_error(req->wb_context, status);
+ 			nfs_inode_remove_request(req);
+ 			dprintk(", error = %d\n", status);
+@@ -1363,12 +1499,12 @@ static void nfs_commit_release(void *cal
+ 		}
+ 		/* We have a mismatch. Write the page again */
+ 		dprintk(" mismatch\n");
++		nfs_mark_request_nopnfs(req);
+ 		nfs_mark_request_dirty(req);
+ 	next:
+ 		nfs_clear_page_tag_locked(req);
+ 	}
+-	nfs_commit_clear_lock(NFS_I(data->inode));
+-	nfs_commitdata_release(calldata);
++	kref_put(&data->refcount, nfs_commit_cleanup);
+ }
+ 
+ static const struct rpc_call_ops nfs_commit_ops = {
+@@ -1384,21 +1520,22 @@ int nfs_commit_inode(struct inode *inode
+ 	LIST_HEAD(head);
+ 	int may_wait = how & FLUSH_SYNC;
+ 	int res = 0;
++	int use_pnfs = 0;
+ 
+ 	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ 		goto out_mark_dirty;
+ 	spin_lock(&inode->i_lock);
+-	res = nfs_scan_commit(inode, &head, 0, 0);
++	res = nfs_scan_commit(inode, &head, 0, 0, &use_pnfs);
+ 	spin_unlock(&inode->i_lock);
+ 	if (res) {
+-		int error = nfs_commit_list(inode, &head, how);
++		int error = nfs_commit_list(inode, &head, how, use_pnfs);
+ 		if (error < 0)
+ 			return error;
+-		if (may_wait)
++		if (may_wait) {
+ 			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+ 					nfs_wait_bit_killable,
+ 					TASK_KILLABLE);
+-		else
++		} else
+ 			goto out_mark_dirty;
+ 	} else
+ 		nfs_commit_clear_lock(NFS_I(inode));
+@@ -1451,7 +1588,18 @@ static int nfs_commit_unstable_pages(str
+ 
+ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ {
+-	return nfs_commit_unstable_pages(inode, wbc);
++	int ret;
++	ret = nfs_commit_unstable_pages(inode, wbc);
++	if (ret >= 0 && layoutcommit_needed(NFS_I(inode))) {
++		int err, sync = wbc->sync_mode;
++
++		if (wbc->nonblocking || wbc->for_background)
++			sync = 0;
++		err = pnfs_layoutcommit_inode(inode, sync);
++		if (err < 0)
++			ret = err;
++	}
++	return ret;
+ }
+ 
+ /*
+@@ -1459,6 +1607,7 @@ int nfs_write_inode(struct inode *inode,
+  */
+ int nfs_wb_all(struct inode *inode)
+ {
++	int ret;
+ 	struct writeback_control wbc = {
+ 		.sync_mode = WB_SYNC_ALL,
+ 		.nr_to_write = LONG_MAX,
+@@ -1466,7 +1615,8 @@ int nfs_wb_all(struct inode *inode)
+ 		.range_end = LLONG_MAX,
+ 	};
+ 
+-	return sync_inode(inode, &wbc);
++	ret = sync_inode(inode, &wbc);
++	return ret;
+ }
+ 
+ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
+--- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
+@@ -2,6 +2,7 @@
+ #define LINUX_EXPORTFS_H 1
+ 
+ #include <linux/types.h>
++#include <linux/exp_xdr.h>
+ 
+ struct dentry;
+ struct inode;
+@@ -175,4 +176,62 @@ extern struct dentry *generic_fh_to_pare
+ 	struct fid *fid, int fh_len, int fh_type,
+ 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+ 
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct pnfs_filelayout_device;
++struct pnfs_filelayout_layout;
++
++extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				     const struct pnfs_filelayout_device *fdev);
++extern enum nfsstat4 filelayout_encode_layout(struct exp_xdr_stream *xdr,
++				      const struct pnfs_filelayout_layout *flp);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct list_head;
++
++extern int blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				      const struct list_head *volumes);
++
++extern enum nfsstat4 blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++					       const struct list_head *layouts);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_PNFSD)
++#include <linux/module.h>
++
++struct pnfsd_cb_operations;
++
++struct pnfsd_cb_ctl {
++	spinlock_t lock;
++	struct module *module;
++	const struct pnfsd_cb_operations *cb_op;
++};
++
++/* in expfs.c so that file systems can depend on it */
++extern struct pnfsd_cb_ctl pnfsd_cb_ctl;
++
++static inline int
++pnfsd_get_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	int ret = -ENOENT;
++
++	spin_lock(&pnfsd_cb_ctl.lock);
++	if (!pnfsd_cb_ctl.cb_op)
++		goto out;
++	if (!try_module_get(pnfsd_cb_ctl.module))
++		goto out;
++	ctl->cb_op = pnfsd_cb_ctl.cb_op;
++	ctl->module = pnfsd_cb_ctl.module;
++	ret = 0;
++out:
++	spin_unlock(&pnfsd_cb_ctl.lock);
++	return ret;
++}
++
++static inline void
++pnfsd_put_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	module_put(ctl->module);
++}
++#endif /* CONFIG_PNFSD */
+ #endif /* LINUX_EXPORTFS_H */
+diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
+@@ -0,0 +1,141 @@
++#ifndef _LINUX_EXP_XDR_H
++#define _LINUX_EXP_XDR_H
++
++#include <asm/byteorder.h>
++#include <asm/unaligned.h>
++#include <linux/string.h>
++
++struct exp_xdr_stream {
++	__be32 *p;
++	__be32 *end;
++};
++
++/**
++ * exp_xdr_qwords - Calculate the number of quad-words holding nbytes
++ * @nbytes: number of bytes to encode
++ */
++static inline size_t
++exp_xdr_qwords(__u32 nbytes)
++{
++	return DIV_ROUND_UP(nbytes, 4);
++}
++
++/**
++ * exp_xdr_qbytes - Calculate the number of bytes holding qwords
++ * @qwords: number of quad-words to encode
++ */
++static inline size_t
++exp_xdr_qbytes(size_t qwords)
++{
++	return qwords << 2;
++}
++
++/**
++ * exp_xdr_reserve_space - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nbytes: number of bytes to reserve
++ *
++ * Checks that we have enough buffer space to encode 'nbytes' more
++ * bytes of data. If so, update the xdr stream.
++ */
++static inline __be32 *
++exp_xdr_reserve_space(struct exp_xdr_stream *xdr, size_t nbytes)
++{
++	__be32 *p = xdr->p;
++	__be32 *q;
++
++	/* align nbytes on the next 32-bit boundary */
++	q = p + exp_xdr_qwords(nbytes);
++	if (unlikely(q > xdr->end || q < p))
++		return NULL;
++	xdr->p = q;
++	return p;
++}
++
++/**
++ * exp_xdr_reserve_qwords - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nwords: number of quad words (u32's) to reserve
++ */
++static inline __be32 *
++exp_xdr_reserve_qwords(struct exp_xdr_stream *xdr, size_t qwords)
++{
++	return exp_xdr_reserve_space(xdr, exp_xdr_qbytes(qwords));
++}
++
++/**
++ * exp_xdr_encode_u32 - Encode an unsigned 32-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u32(__be32 *p, __u32 val)
++{
++	*p = cpu_to_be32(val);
++	return p + 1;
++}
++
++/**
++ * exp_xdr_encode_u64 - Encode an unsigned 64-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u64(__be32 *p, __u64 val)
++{
++	put_unaligned_be64(val, p);
++	return p + 2;
++}
++
++/**
++ * exp_xdr_encode_bytes - Encode an array of bytes onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the array of bytes
++ * @nbytes: number of bytes to encode
++ */
++static inline __be32 *
++exp_xdr_encode_bytes(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	if (likely(nbytes != 0)) {
++		unsigned int qwords = exp_xdr_qwords(nbytes);
++		unsigned int padding = exp_xdr_qbytes(qwords) - nbytes;
++
++		memcpy(p, ptr, nbytes);
++		if (padding != 0)
++			memset((char *)p + nbytes, 0, padding);
++		p += qwords;
++	}
++	return p;
++}
++
++/**
++ * exp_xdr_encode_opaque - Encode an opaque type onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the opaque array
++ * @nbytes: number of bytes to encode
++ *
++ * Encodes the 32-bit opaque size in bytes followed by the opaque value.
++ */
++static inline __be32 *
++exp_xdr_encode_opaque(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	p = exp_xdr_encode_u32(p, nbytes);
++	return exp_xdr_encode_bytes(p, ptr, nbytes);
++}
++
++/**
++ * exp_xdr_encode_opaque_qlen - Encode the opaque length onto a xdr stream
++ * @lenp: pointer to the opaque length destination
++ * @endp: pointer to the end of the opaque array
++ *
++ * Encodes the 32-bit opaque size in bytes given the start and end pointers
++ */
++static inline __be32 *
++exp_xdr_encode_opaque_len(__be32 *lenp, const void *endp)
++{
++	size_t nbytes = (char *)endp - (char *)(lenp + 1);
++
++	exp_xdr_encode_u32(lenp, nbytes);
++	return lenp + 1 + exp_xdr_qwords(nbytes);
++}
++#endif /* _LINUX_EXP_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
+@@ -387,6 +387,7 @@ struct inodes_stat_t {
+ #include <asm/byteorder.h>
+ 
+ struct export_operations;
++struct pnfs_export_operations;
+ struct hd_geometry;
+ struct iovec;
+ struct nameidata;
+@@ -1329,6 +1330,7 @@ struct super_block {
+ 	const struct dquot_operations	*dq_op;
+ 	const struct quotactl_ops	*s_qcop;
+ 	const struct export_operations *s_export_op;
++	const struct pnfs_export_operations *s_pnfs_op;
+ 	unsigned long		s_flags;
+ 	unsigned long		s_magic;
+ 	struct dentry		*s_root;
+diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
+--- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
+@@ -17,7 +17,10 @@
+ 
+ #define NFS4_BITMAP_SIZE	2
+ #define NFS4_VERIFIER_SIZE	8
+-#define NFS4_STATEID_SIZE	16
++#define NFS4_CLIENTID_SIZE	8
++#define NFS4_STATEID_SEQID_SIZE 4
++#define NFS4_STATEID_OTHER_SIZE 12
++#define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
+ #define NFS4_FHSIZE		128
+ #define NFS4_MAXPATHLEN		PATH_MAX
+ #define NFS4_MAXNAMLEN		NAME_MAX
+@@ -119,6 +122,13 @@
+ #define EXCHGID4_FLAG_MASK_A			0x40070003
+ #define EXCHGID4_FLAG_MASK_R			0x80070003
+ 
++static inline bool
++is_ds_only_session(u32 exchange_flags)
++{
++	u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;
++	return (exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
++}
++
+ #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED	0x00000004
+@@ -166,8 +176,25 @@ struct nfs4_acl {
+ 	struct nfs4_ace	aces[0];
+ };
+ 
++struct nfs4_fsid {
++	u64	major;
++	u64	minor;
++};
++
+ typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
+-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
++typedef struct { char data[NFS4_CLIENTID_SIZE]; } nfs4_clientid;
++
++struct nfs41_stateid {
++	__be32 seqid;
++	char other[NFS4_STATEID_OTHER_SIZE];
++} __attribute__ ((packed));
++
++typedef struct {
++	union {
++		char data[NFS4_STATEID_SIZE];
++		struct nfs41_stateid stateid;
++	} u;
++} nfs4_stateid;
+ 
+ enum nfs_opnum4 {
+ 	OP_ACCESS = 3,
+@@ -471,6 +498,8 @@ enum lock_type4 {
+ #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
+ #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
+ #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
++#define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
++#define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+ 
+ #define NFSPROC4_NULL 0
+ #define NFSPROC4_COMPOUND 1
+@@ -523,6 +552,7 @@ enum {
+ 	NFSPROC4_CLNT_GETACL,
+ 	NFSPROC4_CLNT_SETACL,
+ 	NFSPROC4_CLNT_FS_LOCATIONS,
++	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
+ 
+ 	/* nfs41 */
+ 	NFSPROC4_CLNT_EXCHANGE_ID,
+@@ -531,6 +561,13 @@ enum {
+ 	NFSPROC4_CLNT_SEQUENCE,
+ 	NFSPROC4_CLNT_GET_LEASE_TIME,
+ 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
++	NFSPROC4_CLNT_PNFS_LAYOUTGET,
++	NFSPROC4_CLNT_PNFS_LAYOUTCOMMIT,
++	NFSPROC4_CLNT_PNFS_LAYOUTRETURN,
++	NFSPROC4_CLNT_PNFS_GETDEVICELIST,
++	NFSPROC4_CLNT_PNFS_GETDEVICEINFO,
++	NFSPROC4_CLNT_PNFS_WRITE,
++	NFSPROC4_CLNT_PNFS_COMMIT,
+ };
+ 
+ /* nfs41 types */
+@@ -549,6 +586,43 @@ enum state_protect_how4 {
+ 	SP4_SSV		= 2
+ };
+ 
++enum pnfs_layouttype {
++	LAYOUT_NFSV4_1_FILES  = 1,
++	LAYOUT_OSD2_OBJECTS = 2,
++	LAYOUT_BLOCK_VOLUME = 3,
++};
++
++/* used for both layout return and recall */
++enum pnfs_layoutreturn_type {
++	RETURN_FILE = 1,
++	RETURN_FSID = 2,
++	RETURN_ALL  = 3
++};
++
++enum pnfs_iomode {
++	IOMODE_READ = 1,
++	IOMODE_RW = 2,
++	IOMODE_ANY = 3,
++};
++
++enum pnfs_notify_deviceid_type4 {
++	NOTIFY_DEVICEID4_CHANGE = 1 << 1,
++	NOTIFY_DEVICEID4_DELETE = 1 << 2,
++};
++
++#define NFL4_UFLG_MASK			0x0000003F
++#define NFL4_UFLG_DENSE			0x00000001
++#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
++#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
++
++/* Encoded in the loh_body field of type layouthint4 */
++enum filelayout_hint_care4 {
++	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
++	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
++	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
++	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
++};
++
+ #endif
+ #endif
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
+@@ -0,0 +1,330 @@
++/*
++ *  include/linux/nfs4_pnfs.h
++ *
++ *  Common data structures needed by the pnfs client and pnfs layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_NFS4_PNFS_H
++#define LINUX_NFS4_PNFS_H
++
++#include <linux/pnfs_xdr.h>
++#include <linux/nfs_page.h>
++
++enum pnfs_try_status {
++	PNFS_ATTEMPTED     = 0,
++	PNFS_NOT_ATTEMPTED = 1,
++};
++
++#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
++
++/* Per-layout driver specific registration structure */
++struct pnfs_layoutdriver_type {
++	const u32 id;
++	const char *name;
++	struct layoutdriver_io_operations *ld_io_ops;
++	struct layoutdriver_policy_operations *ld_policy_ops;
++};
++
++struct pnfs_fsdata {
++	int bypass_eof;
++	struct pnfs_layout_segment *lseg;
++	void *private;
++};
++
++#if defined(CONFIG_NFS_V4_1)
++
++static inline struct nfs_inode *
++PNFS_NFS_INODE(struct pnfs_layout_type *lo)
++{
++	return NFS_I(lo->lo_inode);
++}
++
++static inline struct inode *
++PNFS_INODE(struct pnfs_layout_type *lo)
++{
++	return lo->lo_inode;
++}
++
++static inline struct nfs_server *
++PNFS_NFS_SERVER(struct pnfs_layout_type *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo));
++}
++
++static inline struct pnfs_layoutdriver_type *
++PNFS_LD(struct pnfs_layout_type *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo))->pnfs_curr_ld;
++}
++
++static inline struct layoutdriver_io_operations *
++PNFS_LD_IO_OPS(struct pnfs_layout_type *lo)
++{
++	return PNFS_LD(lo)->ld_io_ops;
++}
++
++static inline struct layoutdriver_policy_operations *
++PNFS_LD_POLICY_OPS(struct pnfs_layout_type *lo)
++{
++	return PNFS_LD(lo)->ld_policy_ops;
++}
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return nfsi->layout != NULL;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return has_layout(nfsi) &&
++	       test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->pnfs_layout_state);
++}
++
++extern void put_lseg(struct pnfs_layout_segment *lseg);
++extern void get_lseg(struct pnfs_layout_segment *lseg);
++
++#else /* CONFIG_NFS_V4_1 */
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return false;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++struct pnfs_layout_segment {
++	struct list_head fi_list;
++	struct nfs4_pnfs_layout_segment range;
++	struct kref kref;
++	bool valid;
++	struct pnfs_layout_type *layout;
++	struct nfs4_deviceid *deviceid;
++	u8 ld_data[];			/* layout driver private data */
++};
++
++static inline void *
++LSEG_LD_DATA(struct pnfs_layout_segment *lseg)
++{
++	return lseg->ld_data;
++}
++
++/* Layout driver I/O operations.
++ * Either the pagecache or non-pagecache read/write operations must be implemented
++ */
++struct layoutdriver_io_operations {
++	/* Functions that use the pagecache.
++	 * If use_pagecache == 1, then these functions must be implemented.
++	 */
++	/* read and write pagelist should return just 0 (to indicate that
++	 * the layout code has taken control) or 1 (to indicate that the
++	 * layout code wishes to fall back to normal nfs.)  If 0 is returned,
++	 * information can be passed back through nfs_data->res and
++	 * nfs_data->task.tk_status, and the appropriate pnfs done function
++	 * MUST be called.
++	 */
++	enum pnfs_try_status
++	(*read_pagelist) (struct nfs_read_data *nfs_data, unsigned nr_pages);
++	enum pnfs_try_status
++	(*write_pagelist) (struct nfs_write_data *nfs_data, unsigned nr_pages, int how);
++	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
++			    loff_t pos, unsigned count,
++			    struct pnfs_fsdata *fsdata);
++	int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
++			 unsigned count, unsigned copied,
++			 struct pnfs_layout_segment *lseg);
++	void (*write_end_cleanup)(struct file *filp,
++				  struct pnfs_fsdata *fsdata);
++
++	/* Consistency ops */
++	/* 2 problems:
++	 * 1) the page list contains nfs_pages, NOT pages
++	 * 2) currently the NFS code doesn't create a page array (as it does with read/write)
++	 */
++	enum pnfs_try_status
++	(*commit) (struct nfs_write_data *nfs_data, int how);
++
++	/* Layout information. For each inode, alloc_layout is executed once to retrieve an
++	 * inode specific layout structure.  Each subsequent layoutget operation results in
++	 * a set_layout call to set the opaque layout in the layout driver.*/
++	struct pnfs_layout_type * (*alloc_layout) (struct inode *inode);
++	void (*free_layout) (struct pnfs_layout_type *);
++	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_type *layoutid, struct nfs4_pnfs_layoutget_res *lgr);
++	void (*free_lseg) (struct pnfs_layout_segment *lseg);
++
++	int (*setup_layoutcommit) (struct pnfs_layout_type *layoutid,
++				struct pnfs_layoutcommit_arg *args);
++
++	void (*encode_layoutcommit) (struct pnfs_layout_type *layoutid,
++				struct xdr_stream *xdr,
++				const struct pnfs_layoutcommit_arg *args);
++	void (*cleanup_layoutcommit) (struct pnfs_layout_type *layoutid,
++				      struct pnfs_layoutcommit_arg *args,
++				      int status);
++	void (*encode_layoutreturn) (struct pnfs_layout_type *layoutid,
++				struct xdr_stream *xdr,
++				const struct nfs4_pnfs_layoutreturn_arg *args);
++
++	/* Registration information for a new mounted file system
++	 */
++	int (*initialize_mountpoint) (struct nfs_server *,
++				      const struct nfs_fh * mntfh);
++	int (*uninitialize_mountpoint) (struct nfs_server *server);
++};
++
++enum layoutdriver_policy_flags {
++	/* Should the full nfs rpc cleanup code be used after io */
++	PNFS_USE_RPC_CODE		= 1 << 0,
++
++	/* Should the NFS req. gather algorithm cross stripe boundaries? */
++	PNFS_GATHER_ACROSS_STRIPES	= 1 << 1,
++
++	/* Should the pNFS client commit and return the layout upon a setattr */
++	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 3,
++};
++
++struct layoutdriver_policy_operations {
++	unsigned flags;
++
++	/* The stripe size of the file system */
++	ssize_t (*get_stripesize) (struct pnfs_layout_type *layoutid);
++
++	/* test for nfs page cache coalescing */
++	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++
++	/* Retreive the block size of the file system.
++	 * If gather_across_stripes == 1, then the file system will gather
++	 * requests into the block size.
++	 * TODO: Where will the layout driver get this info?  It is hard
++	 * coded in PVFS2.
++	 */
++	ssize_t (*get_blocksize) (void);
++};
++
++/* Should the full nfs rpc cleanup code be used after io */
++static inline int
++pnfs_ld_use_rpc_code(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_USE_RPC_CODE;
++}
++
++/* Should the NFS req. gather algorithm cross stripe boundaries? */
++static inline int
++pnfs_ld_gather_across_stripes(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_GATHER_ACROSS_STRIPES;
++}
++
++struct pnfs_device {
++	struct pnfs_deviceid dev_id;
++	unsigned int  layout_type;
++	unsigned int  mincount;
++	struct page **pages;
++	void          *area;
++	unsigned int  pgbase;
++	unsigned int  pglen;
++	unsigned int  dev_notify_types;
++};
++
++struct pnfs_devicelist {
++	unsigned int		eof;
++	unsigned int		num_devs;
++	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
++};
++
++/*
++ * Device ID RCU cache. A device ID is unique per client ID and layout type.
++ */
++#define NFS4_DEVICE_ID_HASH_BITS	5
++#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
++#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
++
++static inline u32
++nfs4_deviceid_hash(struct pnfs_deviceid *id)
++{
++	unsigned char *cptr = (unsigned char *)id->data;
++	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
++	u32 x = 0;
++
++	while (nbytes--) {
++		x *= 37;
++		x += *cptr++;
++	}
++	return x & NFS4_DEVICE_ID_HASH_MASK;
++}
++
++struct nfs4_deviceid_cache {
++	spinlock_t		dc_lock;
++	struct kref		dc_kref;
++	void			(*dc_free_callback)(struct kref *);
++	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
++};
++
++/* Device ID cache node */
++struct nfs4_deviceid {
++	struct hlist_node	de_node;
++	struct pnfs_deviceid	de_id;
++	struct kref		de_kref;
++};
++
++extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_put_deviceid_cache(struct nfs_client *);
++extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
++extern struct nfs4_deviceid *nfs4_find_deviceid(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++extern struct nfs4_deviceid *nfs4_add_deviceid(struct nfs4_deviceid_cache *,
++				struct nfs4_deviceid *);
++extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *);
++extern void nfs4_unset_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_delete_device(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++
++/* pNFS client callback functions.
++ * These operations allow the layout driver to access pNFS client
++ * specific information or call pNFS client->server operations.
++ * E.g., getdeviceinfo, I/O callbacks, etc
++ */
++struct pnfs_client_operations {
++	int (*nfs_getdevicelist) (struct nfs_server *,
++				  const struct nfs_fh *fh,
++				  struct pnfs_devicelist *devlist);
++	int (*nfs_getdeviceinfo) (struct nfs_server *,
++				  struct pnfs_device *dev);
++
++	/* Post read callback. */
++	void (*nfs_readlist_complete) (struct nfs_read_data *nfs_data);
++
++	/* Post write callback. */
++	void (*nfs_writelist_complete) (struct nfs_write_data *nfs_data);
++
++	/* Post commit callback. */
++	void (*nfs_commit_complete) (struct nfs_write_data *nfs_data);
++	void (*nfs_return_layout) (struct inode *);
++};
++
++extern struct pnfs_client_operations pnfs_ops;
++
++extern struct pnfs_client_operations *pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
++extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
++
++#define NFS4_PNFS_MAX_LAYOUTS 4
++#define NFS4_PNFS_PRIVATE_LAYOUT 0x80000000
++
++#endif /* LINUX_NFS4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
+@@ -0,0 +1,101 @@
++#ifndef NFSD4_BLOCK
++#define NFSD4_BLOCK
++
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#define PNFS_BLOCK_SUCCESS		1
++#define PNFS_BLOCK_FAILURE		0
++
++#define PNFS_BLOCK_CTL_START		1
++#define PNFS_BLOCK_CTL_STOP		2
++#define PNFS_BLOCK_CTL_VERS		3 /* Allows daemon to request current
++					   * version from kernel via an upcall.
++					   */
++
++#define PNFS_UPCALL_MSG_STOP	0
++#define PNFS_UPCALL_MSG_GETSIG	1
++#define PNFS_UPCALL_MSG_GETSLICE	2
++#define PNFS_UPCALL_MSG_DMCHK	3	// See if dev_t is a DM volume
++#define PNFS_UPCALL_MSG_DMGET	4
++#define PNFS_UPCALL_MSG_VERS	5
++
++#define PNFS_UPCALL_VERS		8
++
++typedef struct stripe_dev {
++	int	major,
++		minor,
++		offset;
++} stripe_dev_t;
++
++typedef struct bl_comm_res {
++	int				res_status;
++	union {
++		struct {
++			long long	start,
++					length;
++		} slice;
++		struct {
++			int		num_stripes,
++					stripe_size;
++			stripe_dev_t	devs[];
++		} stripe;
++		struct {
++			long long	sector;
++			int		offset,
++					len;
++			char		sig[];
++		} sig;
++		int			vers,
++					dm_vol;
++	} u;
++} bl_comm_res_t;
++
++typedef struct bl_comm_msg {
++	int		msg_type,
++			msg_status;
++	union {
++		dev_t	msg_dev;
++		int	msg_vers;
++	} u;
++	bl_comm_res_t	*msg_res;
++} bl_comm_msg_t;
++
++#ifdef __KERNEL__
++
++typedef struct bl_comm {
++	/* ---- protects access to this structure ---- */
++	struct mutex		lock;
++	/* ---- protects access to rpc pipe ---- */
++	struct mutex		pipe_lock;
++	struct dentry		*pipe_dentry;
++	wait_queue_head_t	pipe_wq;
++	bl_comm_msg_t		msg;
++} bl_comm_t;
++
++int pnfs_block_enabled(struct inode *, int);
++int bl_layout_type(struct super_block *sb);
++int bl_getdeviceiter(struct super_block *, u32 layout_type,
++		     struct nfsd4_pnfs_dev_iter_res *);
++int bl_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++		     u32 layout_type,
++		     const struct nfsd4_pnfs_deviceid *);
++enum nfsstat4 bl_layoutget(struct inode *, struct exp_xdr_stream *,
++			   const struct nfsd4_pnfs_layoutget_arg *,
++			   struct nfsd4_pnfs_layoutget_res *);
++int bl_layoutcommit(struct inode *,
++		    const struct nfsd4_pnfs_layoutcommit_arg *,
++		    struct nfsd4_pnfs_layoutcommit_res *);
++int bl_layoutreturn(struct inode *,
++		    const struct nfsd4_pnfs_layoutreturn_arg *);
++int bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len);
++int bl_init_proc(void);
++int bl_upcall(bl_comm_t *, bl_comm_msg_t *, bl_comm_res_t **);
++
++extern bl_comm_t	*bl_comm_global;	// Ugly...
++#endif /* __KERNEL__ */
++
++#endif /* NFSD4_BLOCK */
++
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
+@@ -0,0 +1,345 @@
++/*
++ * include/linux/nfsd4_spnfs.h
++ *
++ * spNFS - simple pNFS implementation with userspace daemon
++ *
++ */
++
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#ifndef NFS_SPNFS_H
++#define NFS_SPNFS_H
++
++
++#ifdef __KERNEL__
++#include "exportfs.h"
++#include "sunrpc/svc.h"
++#include "nfsd/nfsfh.h"
++#else
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#define SPNFS_STATUS_INVALIDMSG		0x01
++#define SPNFS_STATUS_AGAIN		0x02
++#define SPNFS_STATUS_FAIL		0x04
++#define SPNFS_STATUS_SUCCESS		0x08
++
++#define SPNFS_TYPE_LAYOUTGET		0x01
++#define SPNFS_TYPE_LAYOUTCOMMIT		0x02
++#define SPNFS_TYPE_LAYOUTRETURN		0x03
++#define SPNFS_TYPE_GETDEVICEITER	0x04
++#define SPNFS_TYPE_GETDEVICEINFO	0x05
++#define SPNFS_TYPE_SETATTR		0x06
++#define SPNFS_TYPE_OPEN			0x07
++#define	SPNFS_TYPE_CLOSE		0x08
++#define SPNFS_TYPE_CREATE		0x09
++#define SPNFS_TYPE_REMOVE		0x0a
++#define SPNFS_TYPE_COMMIT		0x0b
++#define SPNFS_TYPE_READ			0x0c
++#define SPNFS_TYPE_WRITE		0x0d
++
++#define	SPNFS_MAX_DEVICES		1
++#define	SPNFS_MAX_DATA_SERVERS		16
++#define SPNFS_MAX_IO			512
++
++/* layout */
++struct spnfs_msg_layoutget_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_filelayout_list {
++	u_int32_t       fh_len;
++	unsigned char   fh_val[128]; /* DMXXX fix this const */
++};
++
++struct spnfs_msg_layoutget_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t stripe_size;
++	u_int32_t stripe_type;
++	u_int32_t stripe_count;
++	struct spnfs_filelayout_list flist[SPNFS_MAX_DATA_SERVERS];
++};
++
++/* layoutcommit */
++struct spnfs_msg_layoutcommit_args {
++	unsigned long inode;
++	unsigned long generation;
++	u_int64_t file_size;
++};
++
++struct spnfs_msg_layoutcommit_res {
++	int status;
++};
++
++/* layoutreturn */
++/* No op for the daemon */
++/*
++struct spnfs_msg_layoutreturn_args {
++};
++
++struct spnfs_msg_layoutreturn_res {
++};
++*/
++
++/* getdeviceiter */
++struct spnfs_msg_getdeviceiter_args {
++	unsigned long inode;
++	u_int64_t cookie;
++	u_int64_t verf;
++};
++
++struct spnfs_msg_getdeviceiter_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t cookie;
++	u_int64_t verf;
++	u_int32_t eof;
++};
++
++/* getdeviceinfo */
++struct spnfs_data_server {
++	u_int32_t dsid;
++	char netid[5];
++	char addr[29];
++};
++
++struct spnfs_device {
++	u_int64_t devid;
++	int dscount;
++	struct spnfs_data_server dslist[SPNFS_MAX_DATA_SERVERS];
++};
++
++struct spnfs_msg_getdeviceinfo_args {
++	u_int64_t devid;
++};
++
++struct spnfs_msg_getdeviceinfo_res {
++	int status;
++	struct spnfs_device devinfo;
++};
++
++/* setattr */
++struct spnfs_msg_setattr_args {
++	unsigned long inode;
++	unsigned long generation;
++	int file_size;
++};
++
++struct spnfs_msg_setattr_res {
++	int status;
++};
++
++/* open */
++struct spnfs_msg_open_args {
++	unsigned long inode;
++	unsigned long generation;
++	int create;
++	int createmode;
++	int truncate;
++};
++
++struct spnfs_msg_open_res {
++	int status;
++};
++
++/* close */
++/* No op for daemon */
++struct spnfs_msg_close_args {
++	int x;
++};
++
++struct spnfs_msg_close_res {
++	int y;
++};
++
++/* create */
++/*
++struct spnfs_msg_create_args {
++	int x;
++};
++
++struct spnfs_msg_create_res {
++	int y;
++};
++*/
++
++/* remove */
++struct spnfs_msg_remove_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_msg_remove_res {
++	int status;
++};
++
++/* commit */
++/*
++struct spnfs_msg_commit_args {
++	int x;
++};
++
++struct spnfs_msg_commit_res {
++	int y;
++};
++*/
++
++/* read */
++struct spnfs_msg_read_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++};
++
++struct spnfs_msg_read_res {
++	int status;
++	char data[SPNFS_MAX_IO];
++};
++
++/* write */
++struct spnfs_msg_write_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++	char data[SPNFS_MAX_IO];
++};
++
++struct spnfs_msg_write_res {
++	int status;
++};
++
++/* bundle args and responses */
++union spnfs_msg_args {
++	struct spnfs_msg_layoutget_args		layoutget_args;
++	struct spnfs_msg_layoutcommit_args	layoutcommit_args;
++/*
++	struct spnfs_msg_layoutreturn_args	layoutreturn_args;
++*/
++	struct spnfs_msg_getdeviceiter_args     getdeviceiter_args;
++	struct spnfs_msg_getdeviceinfo_args     getdeviceinfo_args;
++	struct spnfs_msg_setattr_args		setattr_args;
++	struct spnfs_msg_open_args		open_args;
++	struct spnfs_msg_close_args		close_args;
++/*
++	struct spnfs_msg_create_args		create_args;
++*/
++	struct spnfs_msg_remove_args		remove_args;
++/*
++	struct spnfs_msg_commit_args		commit_args;
++*/
++	struct spnfs_msg_read_args		read_args;
++	struct spnfs_msg_write_args		write_args;
++};
++
++union spnfs_msg_res {
++	struct spnfs_msg_layoutget_res		layoutget_res;
++	struct spnfs_msg_layoutcommit_res	layoutcommit_res;
++/*
++	struct spnfs_msg_layoutreturn_res	layoutreturn_res;
++*/
++	struct spnfs_msg_getdeviceiter_res      getdeviceiter_res;
++	struct spnfs_msg_getdeviceinfo_res      getdeviceinfo_res;
++	struct spnfs_msg_setattr_res		setattr_res;
++	struct spnfs_msg_open_res		open_res;
++	struct spnfs_msg_close_res		close_res;
++/*
++	struct spnfs_msg_create_res		create_res;
++*/
++	struct spnfs_msg_remove_res		remove_res;
++/*
++	struct spnfs_msg_commit_res		commit_res;
++*/
++	struct spnfs_msg_read_res		read_res;
++	struct spnfs_msg_write_res		write_res;
++};
++
++/* a spnfs message, args and response */
++struct spnfs_msg {
++	unsigned char		im_type;
++	unsigned char		im_status;
++	union spnfs_msg_args	im_args;
++	union spnfs_msg_res	im_res;
++};
++
++/* spnfs configuration info */
++struct spnfs_config {
++	unsigned char		dense_striping;
++	int			stripe_size;
++	int			num_ds;
++	char			ds_dir[SPNFS_MAX_DATA_SERVERS][80];  /* XXX */
++};
++
++#if defined(__KERNEL__) && defined(CONFIG_SPNFS)
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* pipe mgmt structure.  messages flow through here */
++struct spnfs {
++	struct dentry		*spnfs_dentry;    /* dentry for pipe */
++	wait_queue_head_t	spnfs_wq;
++	struct spnfs_msg	spnfs_im;         /* spnfs message */
++	struct mutex		spnfs_lock;       /* Serializes upcalls */
++	struct mutex		spnfs_plock;
++};
++
++struct nfsd4_open;
++
++int spnfs_layout_type(struct super_block *);
++enum nfsstat4 spnfs_layoutget(struct inode *, struct exp_xdr_stream *xdr,
++			      const struct nfsd4_pnfs_layoutget_arg *,
++			      struct nfsd4_pnfs_layoutget_res *);
++int spnfs_layoutcommit(void);
++int spnfs_layoutreturn(struct inode *,
++		       const struct nfsd4_pnfs_layoutreturn_arg *);
++int spnfs_getdeviceiter(struct super_block *,
++			u32 layout_type,
++			struct nfsd4_pnfs_dev_iter_res *);
++int spnfs_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++			u32 layout_type,
++			const struct nfsd4_pnfs_deviceid *);
++int spnfs_setattr(void);
++int spnfs_open(struct inode *, struct nfsd4_open *);
++int spnfs_get_state(struct inode *, struct knfsd_fh *, struct pnfs_get_state *);
++int spnfs_remove(unsigned long, unsigned long);
++__be32 spnfs_read(struct inode *, loff_t, unsigned long *,
++		  int, struct svc_rqst *);
++__be32 spnfs_write(struct inode *, loff_t, size_t, int, struct svc_rqst *);
++int spnfs_getfh(int, struct nfs_fh *);
++int spnfs_test_layoutrecall(char *, u64, u64);
++int spnfs_layoutrecall(struct inode *, int, u64, u64);
++
++int nfsd_spnfs_new(void);
++void nfsd_spnfs_delete(void);
++int spnfs_upcall(struct spnfs *, struct spnfs_msg *, union spnfs_msg_res *);
++int spnfs_enabled(void);
++int spnfs_init_proc(void);
++
++extern struct spnfs_config *spnfs_config;
++
++#endif /* __KERNEL__ && CONFIG_SPNFS */
++
++#endif /* NFS_SPNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
+--- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
+@@ -29,6 +29,7 @@
+ #ifdef __KERNEL__
+ 
+ #include <linux/sunrpc/msg_prot.h>
++#include <linux/sunrpc/svc.h>
+ 
+ /*
+  * Largest number of bytes we need to allocate for an NFS
+diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
+--- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
+@@ -32,6 +32,8 @@
+ #define NFSDDBG_REPCACHE	0x0080
+ #define NFSDDBG_XDR		0x0100
+ #define NFSDDBG_LOCKD		0x0200
++#define NFSDDBG_PNFS		0x0400
++#define NFSDDBG_FILELAYOUT	0x0800
+ #define NFSDDBG_ALL		0x7FFF
+ #define NFSDDBG_NOCHANGE	0xFFFF
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
+--- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
+@@ -100,6 +100,7 @@ struct svc_export {
+ 	uid_t			ex_anon_uid;
+ 	gid_t			ex_anon_gid;
+ 	int			ex_fsid;
++	int			ex_pnfs;
+ 	unsigned char *		ex_uuid; /* 16 byte fsid */
+ 	struct nfsd4_fs_locations ex_fslocs;
+ 	int			ex_nflavors;
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
+@@ -0,0 +1,132 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef NFSD_NFS4LAYOUTXDR_H
++#define NFSD_NFS4LAYOUTXDR_H
++
++#include <linux/sunrpc/xdr.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
++struct pnfs_filelayout_devaddr {
++	struct xdr_netobj	r_netid;
++	struct xdr_netobj	r_addr;
++};
++
++/* list of multipath servers */
++struct pnfs_filelayout_multipath {
++	u32				fl_multipath_length;
++	struct pnfs_filelayout_devaddr 	*fl_multipath_list;
++};
++
++struct pnfs_filelayout_device {
++	u32					fl_stripeindices_length;
++	u32       		 		*fl_stripeindices_list;
++	u32					fl_device_length;
++	struct pnfs_filelayout_multipath 	*fl_device_list;
++};
++
++struct pnfs_filelayout_layout {
++	u32                             lg_layout_type; /* response */
++	u32                             lg_stripe_type; /* response */
++	u32                             lg_commit_through_mds; /* response */
++	u64                             lg_stripe_unit; /* response */
++	u64                             lg_pattern_offset; /* response */
++	u32                             lg_first_stripe_index;	/* response */
++	struct nfsd4_pnfs_deviceid	device_id;		/* response */
++	u32                             lg_fh_length;		/* response */
++	struct knfsd_fh                 *lg_fh_list;		/* response */
++};
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++enum pnfs_block_extent_state4 {
++        PNFS_BLOCK_READWRITE_DATA       = 0,
++        PNFS_BLOCK_READ_DATA            = 1,
++        PNFS_BLOCK_INVALID_DATA         = 2,
++        PNFS_BLOCK_NONE_DATA            = 3
++};
++
++enum pnfs_block_volume_type4 {
++        PNFS_BLOCK_VOLUME_SIMPLE = 0,
++        PNFS_BLOCK_VOLUME_SLICE = 1,
++        PNFS_BLOCK_VOLUME_CONCAT = 2,
++        PNFS_BLOCK_VOLUME_STRIPE = 3,
++};
++typedef enum pnfs_block_volume_type4 pnfs_block_volume_type4;
++
++enum bl_cache_state {
++	BLOCK_LAYOUT_NEW	= 0,
++	BLOCK_LAYOUT_CACHE	= 1,
++	BLOCK_LAYOUT_UPDATE	= 2,
++};
++
++typedef struct pnfs_blocklayout_layout {
++        struct list_head                bll_list;
++        struct nfsd4_pnfs_deviceid      bll_vol_id;
++        u64                             bll_foff;	// file offset
++        u64                             bll_len;
++        u64                             bll_soff;	// storage offset
++	int				bll_recalled;
++        enum pnfs_block_extent_state4   bll_es;
++	enum bl_cache_state		bll_cache_state;
++} pnfs_blocklayout_layout_t;
++
++typedef struct pnfs_blocklayout_devinfo {
++        struct list_head                bld_list;
++        pnfs_block_volume_type4         bld_type;
++        struct nfsd4_pnfs_deviceid      bld_devid;
++        int                             bld_index_loc;
++        union {
++                struct {
++                        u64             bld_offset;
++                        u32             bld_sig_len,
++                                        *bld_sig;
++                } simple;
++                struct {
++                        u64             bld_start,
++                                        bld_len;
++                        u32             bld_index;      /* Index of Simple Volume */
++                } slice;
++                struct {
++                        u32             bld_stripes;
++                        u64             bld_chunk_size;
++                        u32             *bld_stripe_indexs;
++                } stripe;
++        } u;
++} pnfs_blocklayout_devinfo_t;
++
++#endif /* NFSD_NFS4LAYOUTXDR_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
+@@ -0,0 +1,54 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++#include <linux/genhd.h>
++
++/*
++ * Length of comma separated pnfs data server IPv4 addresses. Enough room for
++ * 32 addresses.
++ */
++#define NFSD_DLM_DS_LIST_MAX   512
++/*
++ * Length of colon separated pnfs dlm device of the form
++ * disk_name:comma separated data server IPv4 address
++ */
++#define NFSD_PNFS_DLM_DEVICE_MAX (NFSD_DLM_DS_LIST_MAX + DISK_NAME_LEN + 1)
++
++#ifdef CONFIG_PNFSD
++
++/* For use by DLM cluster file systems exported by pNFSD */
++extern const struct pnfs_export_operations pnfs_dlm_export_ops;
++
++int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);
++
++void nfsd4_pnfs_dlm_shutdown(void);
++
++ssize_t nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen);
++
++#else /* CONFIG_PNFSD */
++
++static inline void nfsd4_pnfs_dlm_shutdown(void)
++{
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
+@@ -0,0 +1,271 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef _LINUX_NFSD_NFSD4_PNFS_H
++#define _LINUX_NFSD_NFSD4_PNFS_H
++
++#include <linux/exportfs.h>
++#include <linux/exp_xdr.h>
++#include <linux/nfs_xdr.h>
++
++struct nfsd4_pnfs_deviceid {
++	u64	sbid;			/* per-superblock unique ID */
++	u64	devid;			/* filesystem-wide unique device ID */
++};
++
++struct nfsd4_pnfs_dev_iter_res {
++	u64		gd_cookie;	/* request/repsonse */
++	u64		gd_verf;	/* request/repsonse */
++	u64		gd_devid;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++/* Arguments for set_device_notify */
++struct pnfs_devnotify_arg {
++	struct nfsd4_pnfs_deviceid dn_devid;	/* request */
++	u32 dn_layout_type;			/* request */
++	u32 dn_notify_types;			/* request/response */
++};
++
++struct nfsd4_layout_seg {
++	u64	clientid;
++	u32	layout_type;
++	u32	iomode;
++	u64	offset;
++	u64	length;
++};
++
++/* Used by layout_get to encode layout (loc_body var in spec)
++ * Args:
++ * minlength - min number of accessible bytes given by layout
++ * fsid - Major part of struct pnfs_deviceid.  File system uses this
++ * to build the deviceid returned in the layout.
++ * fh - fs can modify the file handle for use on data servers
++ * seg - layout info requested and layout info returned
++ * xdr - xdr info
++ * return_on_close - true if layout to be returned on file close
++ */
++
++struct nfsd4_pnfs_layoutget_arg {
++	u64			lg_minlength;
++	u64			lg_sbid;
++	const struct knfsd_fh	*lg_fh;
++};
++
++struct nfsd4_pnfs_layoutget_res {
++	struct nfsd4_layout_seg	lg_seg;	/* request/resopnse */
++	u32			lg_return_on_close;
++};
++
++struct nfsd4_pnfs_layoutcommit_arg {
++	struct nfsd4_layout_seg	lc_seg;		/* request */
++	u32			lc_reclaim;	/* request */
++	u32			lc_newoffset;	/* request */
++	u64			lc_last_wr;	/* request */
++	struct nfstime4		lc_mtime;	/* request */
++	u32			lc_up_len;	/* layout length */
++	void			*lc_up_layout;	/* decoded by callback */
++};
++
++struct nfsd4_pnfs_layoutcommit_res {
++	u32			lc_size_chg;	/* boolean for response */
++	u64			lc_newsize;	/* response */
++};
++
++#define PNFS_LAST_LAYOUT_NO_RECALLS ((void *)-1) /* used with lr_cookie below */
++
++struct nfsd4_pnfs_layoutreturn_arg {
++	u32			lr_return_type;	/* request */
++	struct nfsd4_layout_seg	lr_seg;		/* request */
++	u32			lr_reclaim;	/* request */
++	u32			lrf_body_len;	/* request */
++	void			*lrf_body;	/* request */
++	void			*lr_cookie;	/* fs private */
++};
++
++/* pNFS Metadata to Data server state communication */
++struct pnfs_get_state {
++	u32			dsid;    /* request */
++	u64			ino;      /* request */
++	nfs4_stateid		stid;     /* request;response */
++	nfs4_clientid		clid;     /* response */
++	u32			access;    /* response */
++	u32			stid_gen;    /* response */
++	u32			verifier[2]; /* response */
++};
++
++/*
++ * pNFS export operations vector.
++ *
++ * The filesystem must implement the following methods:
++ *   layout_type
++ *   get_device_info
++ *   layout_get
++ *
++ * All other methods are optional and can be set to NULL if not implemented.
++ */
++struct pnfs_export_operations {
++	/* Returns the supported pnfs_layouttype4. */
++	int (*layout_type) (struct super_block *);
++
++	/* Encode device info onto the xdr stream. */
++	int (*get_device_info) (struct super_block *,
++				struct exp_xdr_stream *,
++				u32 layout_type,
++				const struct nfsd4_pnfs_deviceid *);
++
++	/* Retrieve all available devices via an iterator.
++	 * arg->cookie == 0 indicates the beginning of the list,
++	 * otherwise arg->verf is used to verify that the list hasn't changed
++	 * while retrieved.
++	 *
++	 * On output, the filesystem sets the devid based on the current cookie
++	 * and sets res->cookie and res->verf corresponding to the next entry.
++	 * When the last entry in the list is retrieved, res->eof is set to 1.
++	 */
++	int (*get_device_iter) (struct super_block *,
++				u32 layout_type,
++				struct nfsd4_pnfs_dev_iter_res *);
++
++	int (*set_device_notify) (struct super_block *,
++				  struct pnfs_devnotify_arg *);
++
++	/* Retrieve and encode a layout for inode onto the xdr stream.
++	 * arg->minlength is the minimum number of accessible bytes required
++	 *   by the client.
++	 * The maximum number of bytes to encode the layout is given by
++	 *   the xdr stream end pointer.
++	 * arg->fsid contains the major part of struct pnfs_deviceid.
++	 *   The file system uses this to build the deviceid returned
++	 *   in the layout.
++	 * res->seg - layout segment requested and layout info returned.
++	 * res->fh can be modified the file handle for use on data servers
++	 * res->return_on_close - true if layout to be returned on file close
++	 *
++	 * return one of the following nfs errors:
++	 * NFS_OK			Success
++	 * NFS4ERR_ACCESS		Permission error
++	 * NFS4ERR_BADIOMODE		Server does not support requested iomode
++	 * NFS4ERR_BADLAYOUT		No layout matching loga_minlength rules
++	 * NFS4ERR_INVAL		Parameter other than layout is invalid
++	 * NFS4ERR_IO			I/O error
++	 * NFS4ERR_LAYOUTTRYLATER	Layout may be retrieved later
++	 * NFS4ERR_LAYOUTUNAVAILABLE	Layout unavailable for this file
++	 * NFS4ERR_LOCKED		Lock conflict
++	 * NFS4ERR_NOSPC		Out-of-space error occured
++	 * NFS4ERR_RECALLCONFLICT	Layout currently unavialable due to
++	 *				a conflicting CB_LAYOUTRECALL
++	 * NFS4ERR_SERVERFAULT		Server went bezerk
++	 * NFS4ERR_TOOSMALL		loga_maxcount too small to fit layout
++	 * NFS4ERR_WRONG_TYPE		Wrong file type (not a regular file)
++	 */
++	enum nfsstat4 (*layout_get) (struct inode *,
++				     struct exp_xdr_stream *xdr,
++				     const struct nfsd4_pnfs_layoutget_arg *,
++				     struct nfsd4_pnfs_layoutget_res *);
++
++	/* Commit changes to layout */
++	int (*layout_commit) (struct inode *,
++			      const struct nfsd4_pnfs_layoutcommit_arg *,
++			      struct nfsd4_pnfs_layoutcommit_res *);
++
++	/* Returns the layout */
++	int (*layout_return) (struct inode *,
++			      const struct nfsd4_pnfs_layoutreturn_arg *);
++
++	/* Can layout segments be merged for this layout type? */
++	int (*can_merge_layouts) (u32 layout_type);
++
++	/* pNFS Files layout specific operations */
++
++	/* Get the write verifier for DS (called on MDS only) */
++	void (*get_verifier) (struct super_block *, u32 *p);
++	/* Call fs on DS only */
++	int (*get_state) (struct inode *, struct knfsd_fh *,
++			  struct pnfs_get_state *);
++};
++
++struct nfsd4_pnfs_cb_layout {
++	u32			cbl_recall_type;	/* request */
++	struct nfsd4_layout_seg cbl_seg;		/* request */
++	u32			cbl_layoutchanged;	/* request */
++	nfs4_stateid		cbl_sid;		/* request */
++	struct nfs4_fsid	cbl_fsid;
++	void			*cbl_cookie;		/* fs private */
++};
++
++/* layoutrecall request (from exported filesystem) */
++struct nfs4_layoutrecall {
++	struct kref			clr_ref;
++	struct nfsd4_pnfs_cb_layout	cb;	/* request */
++	struct list_head		clr_perclnt; /* on cl_layoutrecalls */
++	struct nfs4_client	       *clr_client;
++	struct nfs4_file	       *clr_file;
++	struct timespec			clr_time;	/* last activity */
++	struct super_block 		*clr_sb; /* We might not have a file */
++	struct nfs4_layoutrecall	*parent; /* The initiating recall */
++
++	void				*clr_args;	/* nfsd internal */
++};
++
++struct nfsd4_pnfs_cb_dev_item {
++	u32			cbd_notify_type;	/* request */
++	u32			cbd_layout_type;	/* request */
++	struct nfsd4_pnfs_deviceid cbd_devid;		/* request */
++	u32			cbd_immediate;		/* request */
++};
++
++struct nfsd4_pnfs_cb_dev_list {
++	u32				cbd_len;  /* request */
++	struct nfsd4_pnfs_cb_dev_item  *cbd_list; /* request */
++};
++
++/*
++ * callbacks provided by the nfsd
++ */
++struct pnfsd_cb_operations {
++	/* Generic callbacks */
++	int (*cb_layout_recall) (struct super_block *, struct inode *,
++				 struct nfsd4_pnfs_cb_layout *);
++	int (*cb_device_notify) (struct super_block *,
++				 struct nfsd4_pnfs_cb_dev_list *);
++
++	/* pNFS Files layout specific callbacks */
++
++	/* Callback from fs on MDS only */
++	int (*cb_get_state) (struct super_block *, struct pnfs_get_state *);
++	/* Callback from fs on DS only */
++	int (*cb_change_state) (struct pnfs_get_state *);
++};
++
++#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
+--- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
+@@ -29,6 +29,7 @@
+ /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
+ #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
+ #define	NFSCTL_GETFS		8	/* get an fh by path with max FH len */
++#define	NFSCTL_FD2FH		9	/* get a fh from a fd */
+ 
+ /* SVC */
+ struct nfsctl_svc {
+@@ -71,6 +72,11 @@ struct nfsctl_fsparm {
+ 	int			gd_maxlen;
+ };
+ 
++/* FD2FH */
++struct nfsctl_fd2fh {
++	int			fd;
++};
++
+ /*
+  * This is the argument union.
+  */
+@@ -82,6 +88,7 @@ struct nfsctl_arg {
+ 		struct nfsctl_export	u_export;
+ 		struct nfsctl_fdparm	u_getfd;
+ 		struct nfsctl_fsparm	u_getfs;
++		struct nfsctl_fd2fh	u_fd2fh;
+ 		/*
+ 		 * The following dummy member is needed to preserve binary compatibility
+ 		 * on platforms where alignof(void*)>alignof(int).  It's needed because
+@@ -95,6 +102,7 @@ struct nfsctl_arg {
+ #define ca_export	u.u_export
+ #define ca_getfd	u.u_getfd
+ #define	ca_getfs	u.u_getfs
++#define	ca_fd2fh	u.u_fd2fh
+ };
+ 
+ union nfsctl_res {
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
+@@ -72,13 +72,20 @@ struct nfs_access_entry {
+ 	int			mask;
+ };
+ 
++struct nfs_lock_context {
++	atomic_t count;
++	struct list_head list;
++	struct nfs_open_context *open_context;
++	fl_owner_t lockowner;
++	pid_t pid;
++};
++
+ struct nfs4_state;
+ struct nfs_open_context {
+-	atomic_t count;
++	struct nfs_lock_context lock_context;
+ 	struct path path;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+-	fl_owner_t lockowner;
+ 	fmode_t mode;
+ 
+ 	unsigned long flags;
+@@ -97,6 +104,26 @@ struct nfs_delegation;
+ 
+ struct posix_acl;
+ 
++struct pnfs_layout_type {
++	int refcount;
++	struct list_head lo_layouts;	/* other client layouts */
++	struct list_head segs;		/* layout segments list */
++	int roc_iomode;			/* iomode to return on close, 0=none */
++	seqlock_t seqlock;		/* Protects the stateid */
++	nfs4_stateid stateid;
++	unsigned long pnfs_layout_state;
++	#define NFS_INO_RO_LAYOUT_FAILED 0      /* get ro layout failed stop trying */
++	#define NFS_INO_RW_LAYOUT_FAILED 1      /* get rw layout failed stop trying */
++	#define NFS_INO_LAYOUTCOMMIT     3      /* LAYOUTCOMMIT needed */
++	struct rpc_cred         *lo_cred; /* layoutcommit credential */
++	/* DH: These vars keep track of the maximum write range
++	 * so the values can be used for layoutcommit.
++	 */
++	loff_t                  pnfs_write_begin_pos;
++	loff_t                  pnfs_write_end_pos;
++	struct inode		*lo_inode;
++};
++
+ /*
+  * nfs fs inode data in memory
+  */
+@@ -181,6 +208,13 @@ struct nfs_inode {
+ 	struct nfs_delegation	*delegation;
+ 	fmode_t			 delegation_state;
+ 	struct rw_semaphore	rwsem;
++
++	/* pNFS layout information */
++#if defined(CONFIG_NFS_V4_1)
++	wait_queue_head_t lo_waitq;
++	struct pnfs_layout_type *layout;
++	time_t pnfs_layout_suspend;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif /* CONFIG_NFS_V4*/
+ #ifdef CONFIG_NFS_FSCACHE
+ 	struct fscache_cookie	*fscache;
+@@ -353,6 +387,8 @@ extern void nfs_setattr_update_inode(str
+ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
+ extern void put_nfs_open_context(struct nfs_open_context *ctx);
+ extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
++extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
++extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
+@@ -481,8 +517,12 @@ extern void nfs_unblock_sillyrename(stru
+ extern int  nfs_congestion_kb;
+ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
+ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
+-extern int  nfs_flush_incompatible(struct file *file, struct page *page);
+-extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
++struct pnfs_layout_segment;
++extern int  nfs_flush_incompatible(struct file *file, struct page *page,
++				   struct pnfs_layout_segment *lseg);
++extern int  nfs_updatepage(struct file *, struct page *,
++			   unsigned int offset, unsigned int count,
++			   struct pnfs_layout_segment *lseg, void *fsdata);
+ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+ 
+ /*
+@@ -604,6 +644,8 @@ extern void * nfs_root_data(void);
+ #define NFSDBG_CLIENT		0x0200
+ #define NFSDBG_MOUNT		0x0400
+ #define NFSDBG_FSCACHE		0x0800
++#define NFSDBG_PNFS		0x1000
++#define NFSDBG_PNFS_LD		0x2000
+ #define NFSDBG_ALL		0xFFFF
+ 
+ #ifdef __KERNEL__
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
+@@ -15,6 +15,7 @@ struct nlm_host;
+ struct nfs4_sequence_args;
+ struct nfs4_sequence_res;
+ struct nfs_server;
++struct nfs4_minor_version_ops;
+ 
+ /*
+  * The nfs_client identifies our client state to the server.
+@@ -70,11 +71,7 @@ struct nfs_client {
+ 	 */
+ 	char			cl_ipaddr[48];
+ 	unsigned char		cl_id_uniquifier;
+-	int		     (* cl_call_sync)(struct nfs_server *server,
+-					      struct rpc_message *msg,
+-					      struct nfs4_sequence_args *args,
+-					      struct nfs4_sequence_res *res,
+-					      int cache_reply);
++	const struct nfs4_minor_version_ops *cl_mvops;
+ #endif /* CONFIG_NFS_V4 */
+ 
+ #ifdef CONFIG_NFS_V4_1
+@@ -85,6 +82,8 @@ struct nfs_client {
+ 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_session	*cl_session; 	/* sharred session */
++	struct list_head	cl_layouts;
++	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ #ifdef CONFIG_NFS_FSCACHE
+@@ -92,6 +91,16 @@ struct nfs_client {
+ #endif
+ };
+ 
++static inline bool
++is_ds_only_client(struct nfs_client *clp)
++{
++#ifdef CONFIG_NFS_V4_1
++	return is_ds_only_session(clp->cl_exchange_flags);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * NFS client parameters stored in the superblock.
+  */
+@@ -136,7 +145,7 @@ struct nfs_server {
+ #endif
+ 
+ #ifdef CONFIG_NFS_V4
+-	u32			attr_bitmask[2];/* V4 bitmask representing the set
++	u32			attr_bitmask[3];/* V4 bitmask representing the set
+ 						   of attributes supported on this
+ 						   filesystem */
+ 	u32			cache_consistency_bitmask[2];
+@@ -148,6 +157,15 @@ struct nfs_server {
+ 						   that are supported on this
+ 						   filesystem */
+ #endif
++
++#ifdef CONFIG_NFS_V4_1
++	u32				pnfs_blksize; /* layout_blksize attr */
++	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
++	void			       *pnfs_ld_data; /* Per-mount data */
++	unsigned int			ds_rsize;  /* Data server read size */
++	unsigned int			ds_wsize;  /* Data server write size */
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	void (*destroy)(struct nfs_server *);
+ 
+ 	atomic_t active; /* Keep trace of any activity to this server */
+diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
+--- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
+@@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
+ 	NFSIOS_SHORTREAD,
+ 	NFSIOS_SHORTWRITE,
+ 	NFSIOS_DELAY,
++	NFSIOS_PNFS_READ,
++	NFSIOS_PNFS_WRITE,
++	NFSIOS_PNFS_COMMIT,
+ 	__NFSIOS_COUNTSMAX,
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
+--- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
+@@ -39,6 +39,7 @@ struct nfs_page {
+ 	struct list_head	wb_list;	/* Defines state of page: */
+ 	struct page		*wb_page;	/* page to read in/write out */
+ 	struct nfs_open_context	*wb_context;	/* File state context info */
++	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
+ 	atomic_t		wb_complete;	/* i/os we're waiting for */
+ 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
+ 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+@@ -47,6 +48,7 @@ struct nfs_page {
+ 	struct kref		wb_kref;	/* reference count */
+ 	unsigned long		wb_flags;
+ 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
++	struct pnfs_layout_segment *wb_lseg;	/* Pnfs layout info */
+ };
+ 
+ struct nfs_pageio_descriptor {
+@@ -60,6 +62,12 @@ struct nfs_pageio_descriptor {
+ 	int			(*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
+ 	int 			pg_ioflags;
+ 	int			pg_error;
++	struct pnfs_layout_segment *pg_lseg;
++#ifdef CONFIG_NFS_V4_1
++	int			pg_iswrite;
++	int			pg_boundary;
++	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
+@@ -68,13 +76,15 @@ extern	struct nfs_page *nfs_create_reque
+ 					    struct inode *inode,
+ 					    struct page *page,
+ 					    unsigned int offset,
+-					    unsigned int count);
++					    unsigned int count,
++					    struct pnfs_layout_segment *lseg);
+ extern	void nfs_clear_request(struct nfs_page *req);
+ extern	void nfs_release_request(struct nfs_page *req);
+ 
+ 
+ extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+-			  pgoff_t idx_start, unsigned int npages, int tag);
++			  pgoff_t idx_start, unsigned int npages, int tag,
++			  int *use_pnfs);
+ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ 			     struct inode *inode,
+ 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
+@@ -3,6 +3,8 @@
+ 
+ #include <linux/nfsacl.h>
+ #include <linux/nfs3.h>
++#include <linux/nfs4.h>
++#include <linux/sunrpc/sched.h>
+ 
+ /*
+  * To change the maximum rsize and wsize supported by the NFS client, adjust
+@@ -10,7 +12,7 @@
+  * support a megabyte or more.  The default is left at 4096 bytes, which is
+  * reasonable for NFS over UDP.
+  */
+-#define NFS_MAX_FILE_IO_SIZE	(1048576U)
++#define NFS_MAX_FILE_IO_SIZE	(4U * 1048576U)
+ #define NFS_DEF_FILE_IO_SIZE	(4096U)
+ #define NFS_MIN_FILE_IO_SIZE	(1024U)
+ 
+@@ -113,6 +115,10 @@ struct nfs_fsinfo {
+ 	__u32			dtpref;	/* pref. readdir transfer size */
+ 	__u64			maxfilesize;
+ 	__u32			lease_time; /* in seconds */
++#if defined(CONFIG_NFS_V4_1)
++	__u32			layouttype; /* supported pnfs layout driver */
++	__u32			blksize; /* preferred pnfs io block size */
++#endif
+ };
+ 
+ struct nfs_fsstat {
+@@ -196,8 +202,10 @@ struct nfs_openargs {
+ 	__u64                   clientid;
+ 	__u64                   id;
+ 	union {
+-		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+-		nfs4_verifier   verifier; /* EXCLUSIVE */
++		struct {
++			struct iattr *  attrs;    /* UNCHECKED, GUARDED */
++			nfs4_verifier   verifier; /* EXCLUSIVE */
++		};
+ 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
+ 		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
+ 	} u;
+@@ -313,6 +321,10 @@ struct nfs_lockt_res {
+ 	struct nfs4_sequence_res	seq_res;
+ };
+ 
++struct nfs_release_lockowner_args {
++	struct nfs_lowner	lock_owner;
++};
++
+ struct nfs4_delegreturnargs {
+ 	const struct nfs_fh *fhandle;
+ 	const nfs4_stateid *stateid;
+@@ -332,6 +344,7 @@ struct nfs4_delegreturnres {
+ struct nfs_readargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	unsigned int		pgbase;
+@@ -352,6 +365,7 @@ struct nfs_readres {
+ struct nfs_writeargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	enum nfs3_stable_how	stable;
+@@ -846,7 +860,7 @@ struct nfs4_server_caps_arg {
+ };
+ 
+ struct nfs4_server_caps_res {
+-	u32				attr_bitmask[2];
++	u32				attr_bitmask[3];
+ 	u32				acl_bitmask;
+ 	u32				has_links;
+ 	u32				has_symlinks;
+@@ -961,6 +975,27 @@ struct nfs_page;
+ 
+ #define NFS_PAGEVEC_SIZE	(8U)
+ 
++#if defined(CONFIG_NFS_V4_1)
++/* pnfsflag values */
++#define PNFS_NO_RPC		0x0001   /* non rpc result callback switch */
++
++/* pnfs-specific data needed for read, write, and commit calls */
++struct pnfs_call_data {
++	struct pnfs_layout_segment *lseg;
++	const struct rpc_call_ops *call_ops;
++	u32			orig_count;	/* for retry via MDS */
++	int			pnfs_error;
++	u8			pnfsflags;
++	u8			how;		/* for FLUSH_STABLE */
++};
++
++/* files layout-type specific data for read, write, and commit */
++struct pnfs_fl_call_data {
++	struct nfs_client	*ds_nfs_client;
++	__u64			orig_offset;
++};
++#endif /* CONFIG_NFS_V4_1 */
++
+ struct nfs_read_data {
+ 	int			flags;
+ 	struct rpc_task		task;
+@@ -976,10 +1011,16 @@ struct nfs_read_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+ struct nfs_write_data {
++	struct kref		refcount;	/* For pnfs commit splitting */
++	struct nfs_write_data	*parent;	/* For pnfs commit splitting */
+ 	int			flags;
+ 	struct rpc_task		task;
+ 	struct inode		*inode;
+@@ -995,6 +1036,10 @@ struct nfs_write_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+@@ -1008,6 +1053,7 @@ struct nfs_rpc_ops {
+ 	const struct dentry_operations *dentry_ops;
+ 	const struct inode_operations *dir_inode_ops;
+ 	const struct inode_operations *file_inode_ops;
++	const struct file_operations *file_ops;
+ 
+ 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
+ 			    struct nfs_fsinfo *);
+@@ -1072,6 +1118,7 @@ struct nfs_rpc_ops {
+ extern const struct nfs_rpc_ops	nfs_v2_clientops;
+ extern const struct nfs_rpc_ops	nfs_v3_clientops;
+ extern const struct nfs_rpc_ops	nfs_v4_clientops;
++extern const struct nfs_rpc_ops	pnfs_v4_clientops;
+ extern struct rpc_version	nfs_version2;
+ extern struct rpc_version	nfs_version3;
+ extern struct rpc_version	nfs_version4;
+diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
+@@ -0,0 +1,57 @@
++#ifndef _PANFS_SHIM_API_H
++#define _PANFS_SHIM_API_H
++
++/*
++ * imported panfs functions
++ */
++struct panfs_export_operations {
++	int (*convert_rc)(pan_status_t rc);
++
++	int (*sm_sec_t_get_size_otw)(
++		pan_sm_sec_otw_t *var,
++		pan_size_t *core_sizep,
++		pan_size_t *wire_size,
++		void *buf_end);
++
++	int (*sm_sec_t_unmarshall)(
++		pan_sm_sec_otw_t *in,
++		pan_sm_sec_t *out,
++		void *buf,
++		pan_size_t size,
++		pan_size_t *otw_consumed,
++		pan_size_t *in_core_consumed);
++
++	int (*ucreds_get)(void **ucreds_pp);
++
++	void (*ucreds_put)(void *ucreds);
++
++	int (*sam_read)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_read_args_t    *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_read_cb_t       closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_read_res_t     *res_p);
++
++	int (*sam_write)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_write_args_t   *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_write_cb_t      closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_write_res_t    *res_p);
++};
++
++extern int
++panfs_shim_register(struct panfs_export_operations *ops);
++
++extern int
++panfs_shim_unregister(void);
++
++#endif /* _PANFS_SHIM_API_H */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
+@@ -0,0 +1,440 @@
++/*
++ *  pnfs_osd_xdr.h
++ *
++ *  pNFS-osd on-the-wire data structures
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#ifndef __PNFS_OSD_XDR_H__
++#define __PNFS_OSD_XDR_H__
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/exp_xdr.h>
++#include <linux/pnfs_xdr.h>
++#include <scsi/osd_protocol.h>
++
++#define PNFS_OSD_OSDNAME_MAXSIZE 256
++
++/*
++ * START OF "GENERIC" DECODE ROUTINES.
++ *   These may look a little ugly since they are imported from a "generic"
++ * set of XDR encode/decode routines which are intended to be shared by
++ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
++ *
++ * If the pain of reading these is too great, it should be a straightforward
++ * task to translate them into Linux-specific versions which are more
++ * consistent with the style used in NFSv2/v3...
++ */
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {			\
++	(x) = (u64)ntohl(*p++) << 32;		\
++	(x) |= ntohl(*p++);			\
++} while (0)
++#define COPYMEM(x, nbytes) do {			\
++	memcpy((x), p, nbytes);			\
++	p += XDR_QUADLEN(nbytes);		\
++} while (0)
++
++/*
++ * draft-ietf-nfsv4-minorversion-22
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/* Layout Structure */
++
++enum pnfs_osd_raid_algorithm4 {
++	PNFS_OSD_RAID_0		= 1,
++	PNFS_OSD_RAID_4		= 2,
++	PNFS_OSD_RAID_5		= 3,
++	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
++};
++
++/*   struct pnfs_osd_data_map4 {
++ *       uint32_t                    odm_num_comps;
++ *       length4                     odm_stripe_unit;
++ *       uint32_t                    odm_group_width;
++ *       uint32_t                    odm_group_depth;
++ *       uint32_t                    odm_mirror_cnt;
++ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
++ *   };
++ */
++struct pnfs_osd_data_map {
++	u32	odm_num_comps;
++	u64	odm_stripe_unit;
++	u32	odm_group_width;
++	u32	odm_group_depth;
++	u32	odm_mirror_cnt;
++	u32	odm_raid_algorithm;
++};
++
++static inline int
++pnfs_osd_data_map_xdr_sz(void)
++{
++	return 1 + 2 + 1 + 1 + 1 + 1;
++}
++
++static inline size_t
++pnfs_osd_data_map_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_data_map);
++}
++
++/*   struct pnfs_osd_objid4 {
++ *       deviceid4       oid_device_id;
++ *       uint64_t        oid_partition_id;
++ *       uint64_t        oid_object_id;
++ *   };
++ */
++struct pnfs_osd_objid {
++	struct pnfs_deviceid	oid_device_id;
++	u64			oid_partition_id;
++	u64			oid_object_id;
++};
++
++/* For printout. I use "dev(%llx:%llx)", _DEVID_LO(), _DEVID_HI BE style */
++#define _DEVID_LO(oid_device_id) \
++	(unsigned long long)be64_to_cpup((__be64 *)oid_device_id.data)
++
++#define _DEVID_HI(oid_device_id) \
++	(unsigned long long)be64_to_cpup(((__be64 *)oid_device_id.data) + 1)
++
++static inline int
++pnfs_osd_objid_xdr_sz(void)
++{
++	return (NFS4_PNFS_DEVICEID4_SIZE / 4) + 2 + 2;
++}
++
++static inline size_t
++pnfs_osd_objid_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_objid);
++}
++
++enum pnfs_osd_version {
++	PNFS_OSD_MISSING              = 0,
++	PNFS_OSD_VERSION_1            = 1,
++	PNFS_OSD_VERSION_2            = 2
++};
++
++struct pnfs_osd_opaque_cred {
++	u32 cred_len;
++	u8 *cred;
++};
++
++static inline int
++pnfs_osd_opaque_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	READ32(n);
++	p += XDR_QUADLEN(n);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_opaque_cred_incore_sz(u32 *p)
++{
++	u32 n;
++
++	READ32(n);
++	return XDR_QUADLEN(n) * 4;
++}
++
++enum pnfs_osd_cap_key_sec {
++	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
++	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
++};
++
++/*   struct pnfs_osd_object_cred4 {
++ *       pnfs_osd_objid4         oc_object_id;
++ *       pnfs_osd_version4       oc_osd_version;
++ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
++ *       opaque                  oc_capability_key<>;
++ *       opaque                  oc_capability<>;
++ *   };
++ */
++struct pnfs_osd_object_cred {
++	struct pnfs_osd_objid		oc_object_id;
++	u32				oc_osd_version;
++	u32				oc_cap_key_sec;
++	struct pnfs_osd_opaque_cred	oc_cap_key;
++	struct pnfs_osd_opaque_cred	oc_cap;
++};
++
++static inline int
++pnfs_osd_object_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_object_cred_incore_sz(u32 *p)
++{
++	size_t sz = sizeof(struct pnfs_osd_object_cred);
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	return sz;
++}
++
++/*   struct pnfs_osd_layout4 {
++ *       pnfs_osd_data_map4      olo_map;
++ *       uint32_t                olo_comps_index;
++ *       pnfs_osd_object_cred4   olo_components<>;
++ *   };
++ */
++struct pnfs_osd_layout {
++	struct pnfs_osd_data_map	olo_map;
++	u32				olo_comps_index;
++	u32				olo_num_comps;
++	struct pnfs_osd_object_cred	*olo_comps;
++};
++
++static inline int
++pnfs_osd_layout_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	while ((int)(n--) > 0)
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_layout_incore_sz(u32 *p)
++{
++	u32 n;
++	size_t sz;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	sz = sizeof(struct pnfs_osd_layout);
++	while ((int)(n--) > 0) {
++		sz += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++	return sz;
++}
++
++/* Device Address */
++
++enum pnfs_osd_targetid_type {
++	OBJ_TARGET_ANON = 1,
++	OBJ_TARGET_SCSI_NAME = 2,
++	OBJ_TARGET_SCSI_DEVICE_ID = 3,
++};
++
++/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
++ *       case OBJ_TARGET_SCSI_NAME:
++ *           string              oti_scsi_name<>;
++ *
++ *       case OBJ_TARGET_SCSI_DEVICE_ID:
++ *           opaque              oti_scsi_device_id<>;
++ *
++ *       default:
++ *           void;
++ *   };
++ *
++ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
++ *       case TRUE:
++ *           netaddr4            ota_netaddr;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_deviceaddr4 {
++ *       pnfs_osd_targetid4      oda_targetid;
++ *       pnfs_osd_targetaddr4    oda_targetaddr;
++ *       uint64_t                oda_lun;
++ *       opaque                  oda_systemid<>;
++ *       pnfs_osd_object_cred4   oda_root_obj_cred;
++ *       opaque                  oda_osdname<>;
++ *   };
++ */
++struct pnfs_osd_targetid {
++	u32				oti_type;
++	struct nfs4_string		oti_scsi_device_id;
++};
++
++enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
++
++/*   struct netaddr4 {
++ *       // see struct rpcb in RFC1833
++ *       string r_netid<>;    // network id
++ *       string r_addr<>;     // universal address
++ *   };
++ */
++struct pnfs_osd_net_addr {
++	struct nfs4_string	r_netid;
++	struct nfs4_string	r_addr;
++};
++
++struct pnfs_osd_targetaddr {
++	u32				ota_available;
++	struct pnfs_osd_net_addr	ota_netaddr;
++};
++
++enum {
++	NETWORK_ID_MAX = 16 / 4,
++	UNIVERSAL_ADDRESS_MAX = 64 / 4,
++	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
++};
++
++struct pnfs_osd_deviceaddr {
++	struct pnfs_osd_targetid	oda_targetid;
++	struct pnfs_osd_targetaddr	oda_targetaddr;
++	u8				oda_lun[8];
++	struct nfs4_string		oda_systemid;
++	struct pnfs_osd_object_cred	oda_root_obj_cred;
++	struct nfs4_string		oda_osdname;
++};
++
++enum {
++	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
++	PNFS_OSD_DEVICEADDR_MAX =
++		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
++		2 /*oda_lun*/ +
++		1 + OSD_SYSTEMID_LEN +
++		1 + ODA_OSDNAME_MAX,
++};
++
++/* LAYOUTCOMMIT: layoutupdate */
++
++/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
++ *       case TRUE:
++ *           int64_t     dsu_delta;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_layoutupdate4 {
++ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
++ *       bool                        olu_ioerr_flag;
++ *   };
++ */
++struct pnfs_osd_layoutupdate {
++	u32	dsu_valid;
++	s64	dsu_delta;
++	u32	olu_ioerr_flag;
++};
++
++/* LAYOUTRETURN: I/O Rrror Report */
++
++enum pnfs_osd_errno {
++	PNFS_OSD_ERR_EIO		= 1,
++	PNFS_OSD_ERR_NOT_FOUND		= 2,
++	PNFS_OSD_ERR_NO_SPACE		= 3,
++	PNFS_OSD_ERR_BAD_CRED		= 4,
++	PNFS_OSD_ERR_NO_ACCESS		= 5,
++	PNFS_OSD_ERR_UNREACHABLE	= 6,
++	PNFS_OSD_ERR_RESOURCE		= 7
++};
++
++/*   struct pnfs_osd_ioerr4 {
++ *       pnfs_osd_objid4     oer_component;
++ *       length4             oer_comp_offset;
++ *       length4             oer_comp_length;
++ *       bool                oer_iswrite;
++ *       pnfs_osd_errno4     oer_errno;
++ *   };
++ */
++struct pnfs_osd_ioerr {
++	struct pnfs_osd_objid	oer_component;
++	u64			oer_comp_offset;
++	u64			oer_comp_length;
++	u32			oer_iswrite;
++	u32			oer_errno;
++};
++
++static inline unsigned
++pnfs_osd_ioerr_xdr_sz(void)
++{
++	return pnfs_osd_objid_xdr_sz() + 2 + 2 + 1 + 1;
++}
++
++/* OSD XDR API */
++
++/* Layout helpers */
++extern struct pnfs_osd_layout *pnfs_osd_xdr_decode_layout(
++	struct pnfs_osd_layout *layout, u32 *p);
++
++extern int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *layout);
++
++/* Device Info helpers */
++
++/* First pass calculate total size for space needed */
++extern size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p);
++
++/* Note: some strings pointed to inside @deviceaddr might point
++ * to space inside @p. @p should stay valid while @deviceaddr
++ * is in use.
++ * It is assumed that @deviceaddr points to bigger memory of size
++ * calculated in first pass by pnfs_osd_xdr_deviceaddr_incore_sz()
++ */
++extern void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p);
++
++/* For Servers */
++extern int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr);
++
++/* layoutupdate (layout_commit) xdr helpers */
++extern int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou);
++extern __be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p);
++
++/* osd_ioerror encoding/decoding (layout_return) */
++extern int
++pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr, struct pnfs_osd_ioerr *ioerr);
++extern __be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p);
++
++#endif /* __PNFS_OSD_XDR_H__ */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
+@@ -0,0 +1,134 @@
++/*
++ *  include/linux/pnfs_xdr.h
++ *
++ *  Common xdr data structures needed by pnfs client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ * Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_PNFS_XDR_H
++#define LINUX_PNFS_XDR_H
++
++#define PNFS_LAYOUT_MAXSIZE 4096
++#define NFS4_PNFS_DEVICEID4_SIZE 16
++
++struct pnfs_deviceid {
++	char data[NFS4_PNFS_DEVICEID4_SIZE];
++};
++
++struct nfs4_pnfs_layout {
++	__u32 len;
++	void *buf;
++};
++
++struct nfs4_pnfs_layout_segment {
++	u32 iomode;
++	u64 offset;
++	u64 length;
++};
++
++struct nfs4_pnfs_layoutget_arg {
++	__u32 type;
++	struct nfs4_pnfs_layout_segment lseg;
++	__u64 minlength;
++	__u32 maxcount;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_layoutget_res {
++	__u32 return_on_close;
++	struct nfs4_pnfs_layout_segment lseg;
++	__u32 type;
++	nfs4_stateid stateid;
++	struct nfs4_pnfs_layout layout;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_pnfs_layoutget {
++	struct nfs4_pnfs_layoutget_arg args;
++	struct nfs4_pnfs_layoutget_res res;
++	struct pnfs_layout_segment **lsegpp;
++	int status;
++};
++
++struct pnfs_layoutcommit_arg {
++	nfs4_stateid stateid;
++	__u64 lastbytewritten;
++	__u32 time_modify_changed;
++	struct timespec time_modify;
++	const u32 *bitmask;
++	struct nfs_fh *fh;
++	struct inode *inode;
++
++	/* Values set by layout driver */
++	struct nfs4_pnfs_layout_segment lseg;
++	__u32 layout_type;
++	void *layoutdriver_data;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct pnfs_layoutcommit_res {
++	__u32 sizechanged;
++	__u64 newsize;
++	struct nfs_fattr *fattr;
++	const struct nfs_server *server;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct pnfs_layoutcommit_data {
++	struct rpc_task task;
++	struct rpc_cred *cred;
++	struct nfs_fattr fattr;
++	struct pnfs_layoutcommit_arg args;
++	struct pnfs_layoutcommit_res res;
++	int status;
++};
++
++struct nfs4_pnfs_layoutreturn_arg {
++	__u32	reclaim;
++	__u32	layout_type;
++	__u32	return_type;
++	struct nfs4_pnfs_layout_segment lseg;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_layoutreturn_res {
++	struct nfs4_sequence_res seq_res;
++	u32 lrs_present;
++	nfs4_stateid stateid;
++};
++
++struct nfs4_pnfs_layoutreturn {
++	struct nfs4_pnfs_layoutreturn_arg args;
++	struct nfs4_pnfs_layoutreturn_res res;
++	struct rpc_cred *cred;
++	int rpc_status;
++};
++
++struct nfs4_pnfs_getdevicelist_arg {
++	const struct nfs_fh *fh;
++	u32 layoutclass;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_getdevicelist_res {
++	struct pnfs_devicelist *devlist;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_pnfs_getdeviceinfo_arg {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_pnfs_getdeviceinfo_res {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_res seq_res;
++};
++
++#endif /* LINUX_PNFS_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
+--- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
+@@ -8,6 +8,7 @@
+ #ifndef __LINUX_POSIX_ACL_H
+ #define __LINUX_POSIX_ACL_H
+ 
++#include <linux/fs.h>
+ #include <linux/slab.h>
+ 
+ #define ACL_UNDEFINED_ID	(-1)
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
+@@ -14,6 +14,8 @@
+ /* size of an XDR encoding unit in bytes, i.e. 32bit */
+ #define XDR_UNIT	(4)
+ 
++#include <linux/types.h>
++
+ /* spec defines authentication flavor as an unsigned 32 bit integer */
+ typedef u32	rpc_authflavor_t;
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
+@@ -3,6 +3,7 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/fs.h>
+ #include <linux/workqueue.h>
+ 
+ struct rpc_pipe_msg {
+@@ -11,6 +12,10 @@ struct rpc_pipe_msg {
+ 	size_t len;
+ 	size_t copied;
+ 	int errno;
++#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
++#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
++#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
++	u8 flags;
+ };
+ 
+ struct rpc_pipe_ops {
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
+@@ -0,0 +1,111 @@
++/*
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#ifndef _SIMPLE_RPC_PIPEFS_H_
++#define _SIMPLE_RPC_PIPEFS_H_
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++
++
++#define payload_of(headerp)  ((void *)(headerp + 1))
++
++/*
++ * struct pipefs_hdr -- the generic message format for simple_rpc_pipefs.
++ * Messages may simply be the header itself, although having an optional
++ * data payload follow the header allows much more flexibility.
++ *
++ * Messages are created using pipefs_alloc_init_msg() and
++ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
++ * (optional) data payload.
++ *
++ * Given a struct pipefs_hdr *msg that has a struct foo payload, the data
++ * can be accessed using: struct foo *foop = payload_of(msg)
++ */
++struct pipefs_hdr {
++	u32 msgid;
++	u8  type;
++	u8  flags;
++	u16 totallen; /* length of entire message, including hdr itself */
++	u32 status;
++};
++
++/*
++ * struct pipefs_list -- a type of list used for tracking callers who've made an
++ * upcall and are blocked waiting for a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
++ */
++struct pipefs_list {
++	struct list_head list;
++	spinlock_t list_lock;
++};
++
++
++/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
++extern struct dentry *pipefs_mkpipe(const char *name,
++				    const struct rpc_pipe_ops *ops,
++				    int wait_for_open);
++extern void pipefs_closepipe(struct dentry *pipe);
++extern void pipefs_init_list(struct pipefs_list *list);
++extern struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++						void *data, u16 datalen);
++extern struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
++						       u8 flags, void *data,
++						       u16 datalen, u16 padlen);
++extern struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++							struct pipefs_hdr *msg,
++							struct pipefs_list
++							*uplist, u8 upflags,
++							u32 timeout);
++extern int pipefs_queue_upcall_noreply(struct dentry *pipe,
++				       struct pipefs_hdr *msg, u8 upflags);
++extern int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++				      struct pipefs_list *uplist);
++extern struct pipefs_hdr *pipefs_readmsg(struct file *filp,
++					 const char __user *src, size_t len);
++extern ssize_t pipefs_generic_upcall(struct file *filp,
++				     struct rpc_pipe_msg *rpcmsg,
++				     char __user *dst, size_t buflen);
++extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
++
++#endif /* _SIMPLE_RPC_PIPEFS_H_ */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
+@@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
+ 
+ 	return buf;
+ }
++
++/*
++ * Print a network address in a universal format (see rfc1833 and nfsv4.1)
++ */
++static inline int __svc_print_netaddr(struct sockaddr *addr,
++				      struct xdr_netobj *na)
++{
++	u16 port;
++	ssize_t len;
++
++	switch (addr->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *sin = (struct sockaddr_in *)addr;
++		port = ntohs(sin->sin_port);
++
++		len = snprintf(na->data, na->len, "%pI4.%u.%u",
++				&sin->sin_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
++		port = ntohs(sin6->sin6_port);
++
++		len = snprintf(na->data, na->len, "%pI6.%u.%u",
++				&sin6->sin6_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	default:
++		snprintf(na->data, na->len, "unknown address type: %d",
++			 addr->sa_family);
++		len = -EINVAL;
++		break;
++	}
++	return len;
++}
+ #endif /* SUNRPC_SVC_XPRT_H */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
+@@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
+ 	return p + 2;
+ }
+ 
++static inline __be32 *
++xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
++{
++	memcpy(ptr, p, len);
++	return p + XDR_QUADLEN(len);
++}
++
+ /*
+  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
+  */
+@@ -197,6 +204,7 @@ struct xdr_stream {
+ 
+ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
++extern __be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q);
+ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
+ 		unsigned int base, unsigned int len);
+ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
+--- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
++++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
+@@ -0,0 +1 @@
++-pnfs
+diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
+@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
+ 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+ 	    addr.o rpcb_clnt.o timer.o xdr.o \
+ 	    sunrpc_syms.o cache.o rpc_pipe.o \
+-	    svc_xprt.o
++	    svc_xprt.o simple_rpc_pipefs.o
+ sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+ sunrpc-$(CONFIG_PROC_FS) += stats.o
+ sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
+@@ -0,0 +1,424 @@
++/*
++ *  net/sunrpc/simple_rpc_pipefs.c
++ *
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#include <linux/completion.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++
++/*
++ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
++ * filesystem.
++ *
++ * If @wait_for_open is non-zero and an upcall is later queued but the userland
++ * end of the pipe has not yet been opened, the upcall will remain queued until
++ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
++ */
++struct dentry *pipefs_mkpipe(const char *name, const struct rpc_pipe_ops *ops,
++			     int wait_for_open)
++{
++	struct dentry *dir, *pipe;
++	struct vfsmount *mnt;
++
++	mnt = rpc_get_mount();
++	if (IS_ERR(mnt)) {
++		pipe = ERR_CAST(mnt);
++		goto out;
++	}
++	dir = mnt->mnt_root;
++	if (!dir) {
++		pipe = ERR_PTR(-ENOENT);
++		goto out;
++	}
++	pipe = rpc_mkpipe(dir, name, NULL, ops,
++			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
++out:
++	return pipe;
++}
++EXPORT_SYMBOL(pipefs_mkpipe);
++
++/*
++ * Shutdown a pipe made by pipefs_mkpipe().
++ * XXX: do we need to retain an extra reference on the mount?
++ */
++void pipefs_closepipe(struct dentry *pipe)
++{
++	rpc_unlink(pipe);
++	rpc_put_mount();
++}
++EXPORT_SYMBOL(pipefs_closepipe);
++
++/*
++ * Initialize a struct pipefs_list -- which are a way to keep track of callers
++ * who're blocked having made an upcall and are awaiting a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
++ * to use them.
++ */
++inline void pipefs_init_list(struct pipefs_list *list)
++{
++	INIT_LIST_HEAD(&list->list);
++	spin_lock_init(&list->list_lock);
++}
++EXPORT_SYMBOL(pipefs_init_list);
++
++/*
++ * Alloc/init a generic pipefs message header and copy into its message body
++ * an arbitrary data payload.
++ *
++ * struct pipefs_hdr's are meant to serve as generic, general-purpose message
++ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
++ * struct pipefs_hdr is assigned to a struct rpc_pipe_msg and delivered
++ * therein.  --And yes, the naming can seem a little confusing at first:
++ *
++ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
++ * struct pipefs_hdr (possibly with an attached message body).  A
++ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
++ * message is delivered and processed.
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
++					   void *data, u16 datalen, u16 padlen)
++{
++	u16 totallen;
++	struct pipefs_hdr *msg = NULL;
++
++	totallen = sizeof(*msg) + datalen + padlen;
++	if (totallen > PAGE_SIZE) {
++		msg = ERR_PTR(-E2BIG);
++		goto out;
++	}
++
++	msg = kzalloc(totallen, GFP_KERNEL);
++	if (!msg) {
++		msg = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	msg->msgid = msgid;
++	msg->type = type;
++	msg->flags = flags;
++	msg->totallen = totallen;
++	memcpy(payload_of(msg), data, datalen);
++out:
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
++
++/*
++ * See the description of pipefs_alloc_init_msg_padded().
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++				    void *data, u16 datalen)
++{
++	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
++					    datalen, 0);
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg);
++
++
++static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg,
++			       struct pipefs_hdr *msg, u8 upflags)
++{
++	memset(rpcmsg, 0, sizeof(*rpcmsg));
++	rpcmsg->data = msg;
++	rpcmsg->len = msg->totallen;
++	rpcmsg->flags = upflags;
++}
++
++static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(struct pipefs_hdr *msg,
++						     u8 upflags)
++{
++	struct rpc_pipe_msg *rpcmsg;
++
++	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
++	if (!rpcmsg)
++		return ERR_PTR(-ENOMEM);
++
++	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
++	return rpcmsg;
++}
++
++
++/* represents an upcall that'll block and wait for a reply */
++struct pipefs_upcall {
++	u32 msgid;
++	struct rpc_pipe_msg rpcmsg;
++	struct list_head list;
++	wait_queue_head_t waitq;
++	struct pipefs_hdr *reply;
++};
++
++
++static void pipefs_init_upcall_waitreply(struct pipefs_upcall *upcall,
++					 struct pipefs_hdr *msg, u8 upflags)
++{
++	upcall->reply = NULL;
++	upcall->msgid = msg->msgid;
++	INIT_LIST_HEAD(&upcall->list);
++	init_waitqueue_head(&upcall->waitq);
++	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
++}
++
++static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					   struct pipefs_upcall *upcall,
++					   struct pipefs_list *uplist,
++					   u32 timeout)
++{
++	int err = 0;
++	DECLARE_WAITQUEUE(wq, current);
++
++	add_wait_queue(&upcall->waitq, &wq);
++	spin_lock(&uplist->list_lock);
++	list_add(&upcall->list, &uplist->list);
++	spin_unlock(&uplist->list_lock);
++
++	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
++	if (err < 0)
++		goto out;
++
++	if (timeout) {
++		/* retval of 0 means timer expired */
++		err = schedule_timeout_uninterruptible(timeout);
++		if (err == 0 && upcall->reply == NULL)
++			err = -ETIMEDOUT;
++	} else {
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule();
++		__set_current_state(TASK_RUNNING);
++	}
++
++out:
++	spin_lock(&uplist->list_lock);
++	list_del_init(&upcall->list);
++	spin_unlock(&uplist->list_lock);
++	remove_wait_queue(&upcall->waitq, &wq);
++	return err;
++}
++
++/*
++ * Queue a pipefs msg for an upcall to userspace, place the calling thread
++ * on @uplist, and block the thread to wait for a reply.  If @timeout is
++ * nonzero, the thread will be blocked for at most @timeout jiffies.
++ *
++ * (To convert time units into jiffies, consider the functions
++ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
++ *  timespec_to_jiffies().)
++ *
++ * Once a reply is received by your downcall handler, call
++ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
++ * assign the reply, and wake the waiting thread.
++ *
++ * This function's return value pointer may be an error and should be checked
++ * with IS_ERR() before attempting to access the reply message.
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					    struct pipefs_hdr *msg,
++					    struct pipefs_list *uplist,
++					    u8 upflags, u32 timeout)
++{
++	int err = 0;
++	struct pipefs_upcall upcall;
++
++	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
++	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
++	if (err < 0) {
++		kfree(upcall.reply);
++		upcall.reply = ERR_PTR(err);
++	}
++
++	return upcall.reply;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
++
++/*
++ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
++ * no reply is expected).
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++int pipefs_queue_upcall_noreply(struct dentry *pipe, struct pipefs_hdr *msg,
++				u8 upflags)
++{
++	int err = 0;
++	struct rpc_pipe_msg *rpcmsg;
++
++	upflags |= PIPEFS_AUTOFREE_RPCMSG;
++	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
++	if (IS_ERR(rpcmsg)) {
++		err = PTR_ERR(rpcmsg);
++		goto out;
++	}
++	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
++
++
++static struct pipefs_upcall *pipefs_find_upcall_msgid(u32 msgid,
++						 struct pipefs_list *uplist)
++{
++	struct pipefs_upcall *upcall;
++
++	spin_lock(&uplist->list_lock);
++	list_for_each_entry(upcall, &uplist->list, list)
++		if (upcall->msgid == msgid)
++			goto out;
++	upcall = NULL;
++out:
++	spin_unlock(&uplist->list_lock);
++	return upcall;
++}
++
++/*
++ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
++ * message and have determined that it is a reply to a waiting upcall,
++ * you can use this function to find the appropriate upcall, assign the result,
++ * and wake the upcall thread.
++ *
++ * The reply message must have the same msgid as the original upcall message's.
++ *
++ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
++ */
++int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++			       struct pipefs_list *uplist)
++{
++	int err = 0;
++	struct pipefs_upcall *upcall;
++
++	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
++	if (!upcall) {
++		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
++			"for msgid %d\n", __func__, reply->msgid);
++		err = -ENOENT;
++		goto out;
++	}
++	upcall->reply = reply;
++	wake_up(&upcall->waitq);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_assign_upcall_reply);
++
++/*
++ * Generic method to read-in and return a newly-allocated message which begins
++ * with a struct pipefs_hdr.
++ */
++struct pipefs_hdr *pipefs_readmsg(struct file *filp, const char __user *src,
++			     size_t len)
++{
++	int err = 0, hdrsize;
++	struct pipefs_hdr *msg = NULL;
++
++	hdrsize = sizeof(*msg);
++	if (len < hdrsize) {
++		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
++		       __func__, (int) len, hdrsize);
++		err = -EINVAL;
++		goto out;
++	}
++
++	msg = kzalloc(len, GFP_KERNEL);
++	if (!msg) {
++		err = -ENOMEM;
++		goto out;
++	}
++	if (copy_from_user(msg, src, len))
++		err = -EFAULT;
++out:
++	if (err) {
++		kfree(msg);
++		msg = ERR_PTR(err);
++	}
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_readmsg);
++
++/*
++ * Generic rpc_pipe_ops->upcall() handler implementation.
++ *
++ * Don't call this directly: to make an upcall, use
++ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
++ */
++ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
++			      char __user *dst, size_t buflen)
++{
++	char *data;
++	ssize_t len, left;
++
++	data = (char *)rpcmsg->data + rpcmsg->copied;
++	len = rpcmsg->len - rpcmsg->copied;
++	if (len > buflen)
++		len = buflen;
++
++	left = copy_to_user(dst, data, len);
++	if (left < 0) {
++		rpcmsg->errno = left;
++		return left;
++	}
++
++	len -= left;
++	rpcmsg->copied += len;
++	rpcmsg->errno = 0;
++	return len;
++}
++EXPORT_SYMBOL(pipefs_generic_upcall);
++
++/*
++ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
++ *
++ * Items are only freed if @rpcmsg->flags has been set appropriately.
++ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
++ */
++void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
++{
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
++		kfree(rpcmsg->data);
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
++		kfree(rpcmsg);
++}
++EXPORT_SYMBOL(pipefs_generic_destroy_msg);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
+@@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
+ 
+ 	/* Shift the tail first */
+ 	if (tail->iov_len != 0) {
+-		p = (char *)tail->iov_base + len;
+-		if (tail->iov_len > len) {
+-			copy = tail->iov_len - len;
+-			memmove(p, tail->iov_base, copy);
+-		} else
+-			buf->buflen -= len;
+-		/* Copy from the inlined pages into the tail */
+ 		copy = len;
+-		if (copy > tail->iov_len)
++		if (tail->iov_len > len) {
++			p = (char *)tail->iov_base + len;
++			memmove(p, tail->iov_base, tail->iov_len - len);
++		} else {
+ 			copy = tail->iov_len;
++		}
++		/* Copy from the inlined pages into the tail */
+ 		_copy_from_pages((char *)tail->iov_base,
+ 				buf->pages, buf->page_base + pglen - len,
+ 				copy);
+@@ -496,6 +494,27 @@ __be32 * xdr_reserve_space(struct xdr_st
+ EXPORT_SYMBOL_GPL(xdr_reserve_space);
+ 
+ /**
++ * xdr_rewind_stream - rewind a stream back to some checkpoint
++ * @xdr: pointer to xdr_stream
++ * @q: some checkpoint at historical place of @xdr
++ *
++ * Restors an xdr stream to some historical point. @q must be
++ * a logical xdr point in the past that was sampled by @q = @xdr->p.
++ */
++__be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q)
++{
++	size_t nbytes = (xdr->p - q) << 2;
++
++	BUG_ON(xdr->p < q);
++	BUG_ON(nbytes > xdr->iov->iov_len || nbytes > xdr->buf->len);
++	xdr->p = q;
++	xdr->iov->iov_len -= nbytes;
++	xdr->buf->len -= nbytes;
++	return q;
++}
++EXPORT_SYMBOL_GPL(xdr_rewind_stream);
++
++/**
+  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
+  * @xdr: pointer to xdr_stream
+  * @pages: list of pages

From d38bc48c5fab3bc830566f201c6236a0f18c395e Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 14:15:46 -0400
Subject: [PATCH 12/20] Fixed a couple compile errors in the server code.

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 nfsd-35-fc.patch | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
index ef99b4995..2825464af 100644
--- a/nfsd-35-fc.patch
+++ b/nfsd-35-fc.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
 --- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 14:12:24.165356789 -0400
 @@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
     | READ                 | REQ        |              | Section 18.22  |
     | READDIR              | REQ        |              | Section 18.23  |
@@ -12,7 +12,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig
     | RENAME               | REQ        |              | Section 18.26  |
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
 --- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 14:12:24.519356675 -0400
 @@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
  	.alloc		= expkey_alloc,
  };
@@ -108,7 +108,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  out_put_clp:
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 14:12:52.625429773 -0400
 @@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
  					cb_sequence_dec_sz +            \
  					op_dec_sz)
@@ -211,7 +211,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  	int status;
  
 -	status = rpc_call_async(cb->cb_client, &msg,
-+	status = rpc_call_async(cb->cl_cb_client, &msg,
++	status = rpc_call_async(clp->cl_cb_client, &msg,
  				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
  				&nfsd4_cb_probe_ops, (void *)clp);
 -	if (status) {
@@ -402,7 +402,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  }
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 14:12:25.698356909 -0400
 @@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
  static const char *nfsd4_op_name(unsigned opnum);
  
@@ -490,7 +490,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 14:12:25.700356284 -0400
 @@ -45,8 +45,8 @@
  #define NFSDDBG_FACILITY                NFSDDBG_PROC
  
@@ -1280,9 +1280,21 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
 -{
 -	user_lease_time = leasetime;
 -}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-23 14:14:22.882428704 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 14:14:33.418376589 -0400
+@@ -1900,7 +1900,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ 	if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+ 		if ((buflen -= 4) < 0)
+ 			goto out_resource;
+-		WRITE32(NFSD_LEASE_TIME);
++		WRITE32(nfsd4_lease);
+ 	}
+ 	if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
+ 		if ((buflen -= 4) < 0)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 14:12:25.821359224 -0400
 @@ -46,6 +46,7 @@ enum {
  	 */
  #ifdef CONFIG_NFSD_V4
@@ -1403,7 +1415,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  		/* last one */ {""}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 14:12:25.835418441 -0400
 @@ -82,7 +82,6 @@ int nfs4_state_init(void);
  void nfsd4_free_slabs(void);
  int nfs4_state_start(void);
@@ -1440,7 +1452,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  /*
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
 --- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 14:12:25.836366516 -0400
 @@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
  	struct nfs4_client	*cbs_clp;
  };
@@ -1558,7 +1570,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
  nfs4_put_stateowner(struct nfs4_stateowner *so)
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
 --- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 14:12:25.837387292 -0400
 @@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
  	struct nfs4_sessionid	sessionid;
  };
@@ -1600,7 +1612,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 14:12:25.838377224 -0400
 @@ -40,12 +40,12 @@ struct nfs_fhbase_old {
   * This is the new flexible, extensible style NFSv2/v3 file handle.
   * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
@@ -1619,7 +1631,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch
   * This might allow a file to be confirmed to be in a writable part of a
 diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
 --- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
++++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 14:12:25.839376838 -0400
 @@ -49,11 +49,17 @@ static void cache_init(struct cache_head
  	h->last_refresh = now;
  }
@@ -1686,7 +1698,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sun
  		/* entry is valid */
 diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
 --- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 14:12:25.840384371 -0400
 @@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
  			dprintk("svc: recvfrom returned error %d\n", -err);
  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1753,7 +1765,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/s
  error:
 diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
 --- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
++++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 14:12:25.841371223 -0400
 @@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
  		if (rqstp->rq_deferred) {
  			svc_xprt_received(xprt);
@@ -1782,7 +1794,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/
  void svc_close_xprt(struct svc_xprt *xprt)
 diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
 --- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 14:12:25.842376584 -0400
 @@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
  		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
  		rqstp->rq_arg.head[0].iov_len);

From 268a34d036fc07cca40dcb828de2ef224502ce8c Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 24 Aug 2010 14:49:23 -0400
Subject: [PATCH 13/20] Removed the localversion-pnfs file from the pnfs patch

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |   2 +-
 pnfs-all-2.6.35-2010-08-19-f13.patch | 395 +++++++++++++--------------
 2 files changed, 196 insertions(+), 201 deletions(-)

diff --git a/kernel.spec b/kernel.spec
index 6e4442efc..2a47977aa 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs_all_2.6.35_2010_08_19
+%define buildid .pnfs34.2010.08.19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
index a9d78ba0e..10df9b15c 100644
--- a/pnfs-all-2.6.35-2010-08-19-f13.patch
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
---- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
-+++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-24 14:14:03.643355000 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-24 14:17:48.415730000 -0400
 @@ -13,6 +13,7 @@
  #include <sys/stat.h>
  #include <sys/mman.h>
@@ -11,7 +11,7 @@ diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arc
  #include "os.h"
 diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
 --- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-24 14:17:48.421730000 -0400
 @@ -1009,6 +1009,7 @@ static void disk_release(struct device *
  struct class block_class = {
  	.name		= "block",
@@ -21,8 +21,8 @@ diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.
  static char *block_devnode(struct device *dev, mode_t *mode)
  {
 diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
---- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-24 14:17:48.423729000 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-24 14:17:48.425730000 -0400
 @@ -0,0 +1,211 @@
 +(c) 2007 Network Appliance Inc.
 +
@@ -237,7 +237,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.
 +
 diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
 --- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-24 14:17:48.430730000 -0400
 @@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
  	return r;
  }
@@ -292,7 +292,7 @@ diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/driv
  	int r;
 diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
 --- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-24 14:17:48.435733000 -0400
 @@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
  	put_device(&class_to_shost(dev)->shost_gendev);
  }
@@ -304,7 +304,7 @@ diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drive
  };
 diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
 --- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-24 14:17:48.440733000 -0400
 @@ -36,13 +36,9 @@
  #include <linux/fs.h>
  #include <linux/time.h>
@@ -360,8 +360,8 @@ diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/
 +
  #endif
 diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
---- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
-+++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-24 14:17:48.444731000 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-24 14:17:48.446730000 -0400
 @@ -0,0 +1,396 @@
 +/*
 + * export.c - Implementation of the pnfs_export_operations
@@ -761,7 +761,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs
 +}
 diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
 --- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-24 14:17:48.452730000 -0400
 @@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
  	if (unlikely(wait_obj_created(oi)))
  		goto fail;
@@ -781,7 +781,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/
   * Fill in an inode read from the OSD and set it up for use
 diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
 --- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-24 14:17:48.457733000 -0400
 @@ -13,4 +13,5 @@
  #
  
@@ -790,7 +790,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/K
  obj-$(CONFIG_EXOFS_FS) += exofs.o
 diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
 --- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-24 14:17:48.462739000 -0400
 @@ -1,6 +1,7 @@
  config EXOFS_FS
  	tristate "exofs: OSD based file system support"
@@ -801,7 +801,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/
  	  as its backing storage.
 diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
 --- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-24 14:17:48.468730000 -0400
 @@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
  	sb->s_fs_info = sbi;
  	sb->s_op = &exofs_sops;
@@ -812,7 +812,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/
  		EXOFS_ERR("ERROR: exofs_iget failed\n");
 diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
 --- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-24 14:17:48.473730000 -0400
 @@ -16,6 +16,13 @@
  #include <linux/namei.h>
  #include <linux/sched.h>
@@ -829,7 +829,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exp
  
 diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
 --- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-24 14:17:48.478733000 -0400
 @@ -3,4 +3,7 @@
  
  obj-$(CONFIG_EXPORTFS) += exportfs.o
@@ -840,8 +840,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/ex
 +exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
 +exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-24 14:17:48.482731000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-24 14:17:48.484734000 -0400
 @@ -0,0 +1,158 @@
 +/*
 + *  linux/fs/nfsd/nfs4blocklayoutxdr.c
@@ -1002,8 +1002,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.
 +}
 +EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-24 14:17:48.487733000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-24 14:17:48.489734000 -0400
 @@ -0,0 +1,218 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -1224,8 +1224,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.n
 +}
 +EXPORT_SYMBOL(filelayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
---- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-24 14:17:48.493729000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-24 14:17:48.494735000 -0400
 @@ -0,0 +1,289 @@
 +/*
 + *  pnfs_osd_xdr_enc.c
@@ -1518,7 +1518,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.no
 +EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
 diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
 --- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-24 14:17:48.499730000 -0400
 @@ -19,6 +19,7 @@
  #include <linux/gfs2_ondisk.h>
  #include <linux/slow-work.h>
@@ -1539,7 +1539,7 @@ diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gf
  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
 --- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-24 14:17:48.505733000 -0400
 @@ -224,6 +224,31 @@ config LOCKD_V4
  config EXPORTFS
  	tristate
@@ -1573,8 +1573,8 @@ diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
  	tristate
  	select FS_POSIX_ACL
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-24 14:17:48.509734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-24 14:17:48.511732000 -0400
 @@ -0,0 +1,66 @@
 +#include <linux/module.h>
 +#include <linux/uaccess.h>
@@ -1643,8 +1643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.or
 +	return;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-24 14:17:48.514733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-24 14:17:48.516731000 -0400
 @@ -0,0 +1,1160 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.c
@@ -2807,8 +2807,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.
 +module_init(nfs4blocklayout_init);
 +module_exit(nfs4blocklayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-24 14:17:48.519731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-24 14:17:48.521730000 -0400
 @@ -0,0 +1,335 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdev.c
@@ -3146,8 +3146,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.
 +	goto out;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-24 14:17:48.523733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-24 14:17:48.525730000 -0400
 @@ -0,0 +1,120 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdm.c
@@ -3270,8 +3270,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.3
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-24 14:17:48.528729000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-24 14:17:48.529735000 -0400
 @@ -0,0 +1,303 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -3577,8 +3577,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.
 +
 +#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-24 14:17:48.532731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-24 14:17:48.534734000 -0400
 @@ -0,0 +1,948 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -4529,8 +4529,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noar
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
---- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-24 14:17:48.537729000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-24 14:17:48.538739000 -0400
 @@ -0,0 +1,6 @@
 +#
 +# Makefile for the pNFS block layout driver kernel module
@@ -4540,7 +4540,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarc
 +			extents.o block-device-discovery-pipe.o
 diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
 --- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-24 14:17:48.544730000 -0400
 @@ -8,6 +8,8 @@
  #ifndef __LINUX_FS_NFS_CALLBACK_H
  #define __LINUX_FS_NFS_CALLBACK_H
@@ -4613,7 +4613,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/c
  extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
 --- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-24 14:17:48.562731000 -0400
 @@ -8,10 +8,15 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -5096,7 +5096,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/
  	return status;
 diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
 --- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-24 14:17:48.568730000 -0400
 @@ -22,6 +22,8 @@
  #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
  
@@ -5298,8 +5298,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/n
  		.process_op = (callback_process_op_t)nfs4_callback_sequence,
  		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
---- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-24 14:14:13.062705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-24 14:17:48.575730000 -0400
 @@ -39,6 +39,7 @@
  #include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
@@ -5508,8 +5508,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/cli
  		goto error;
  
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
---- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-24 14:17:48.578729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-24 14:17:48.579735000 -0400
 @@ -0,0 +1,292 @@
 +#if defined(CONFIG_SPNFS_BLOCK)
 +
@@ -5804,8 +5804,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +}
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
---- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-24 14:17:48.584729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-24 14:17:48.586730000 -0400
 @@ -0,0 +1,1672 @@
 +/*
 + *  bl_ops.c
@@ -7480,8 +7480,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
---- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-24 14:14:13.068705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-24 14:17:48.592730000 -0400
 @@ -104,7 +104,8 @@ again:
  			continue;
  		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
@@ -7558,7 +7558,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs
  	rcu_read_unlock();
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
 --- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-24 14:17:48.597733000 -0400
 @@ -34,9 +34,7 @@ enum {
  int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
  void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -7571,8 +7571,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs
  
  struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
---- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-24 14:14:13.612707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-24 14:17:48.604730000 -0400
 @@ -17,11 +17,19 @@
  #include <linux/module.h>
  #include <linux/exportfs.h>
@@ -7750,7 +7750,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  	svcauth_unix_purge();
 diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
 --- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-24 14:17:48.610730000 -0400
 @@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
  	.rpc_release = nfs_direct_read_release,
  };
@@ -7996,7 +7996,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/dir
  		user_addr += bytes;
 diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
 --- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-24 14:17:48.616730000 -0400
 @@ -79,3 +79,52 @@ config NFSD_V4
  	  available from http://linux-nfs.org/.
  
@@ -8052,7 +8052,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kc
 +	  If unsure, say N.
 diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
 --- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-24 14:17:48.621733000 -0400
 @@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
@@ -8062,8 +8062,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/M
 +nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
 +nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-24 14:14:13.618705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-24 14:17:48.628730000 -0400
 @@ -40,7 +40,6 @@
  
  #define NFSPROC4_CB_NULL 0
@@ -8603,8 +8603,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
 +}
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-24 14:17:48.633729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-24 14:17:48.641730000 -0400
 @@ -0,0 +1,1679 @@
 +/******************************************************************************
 + *
@@ -10286,8 +10286,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfs
 +	return status;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-24 14:17:48.645731000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-24 14:17:48.647730000 -0400
 @@ -0,0 +1,461 @@
 +/******************************************************************************
 + *
@@ -10751,8 +10751,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/n
 +};
 +EXPORT_SYMBOL(pnfs_dlm_export_ops);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-24 14:17:48.651729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-24 14:17:48.652735000 -0400
 @@ -0,0 +1,620 @@
 +/*
 +*  linux/fs/nfsd/nfs4pnfsds.c
@@ -11375,8 +11375,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nf
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-24 14:14:13.623707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-24 14:17:48.658733000 -0400
 @@ -34,10 +34,14 @@
   */
  #include <linux/file.h>
@@ -11851,8 +11851,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-24 14:14:13.632707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-24 14:17:48.667732000 -0400
 @@ -42,6 +42,8 @@
  #include "xdr4.h"
  #include "vfs.h"
@@ -12368,8 +12368,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
  }
  
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-24 14:14:13.639707000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-24 14:17:48.675730000 -0400
 @@ -47,9 +47,14 @@
  #include <linux/nfsd_idmap.h>
  #include <linux/nfs4_acl.h>
@@ -12988,8 +12988,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
  	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
  	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
---- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-24 14:14:13.645705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-24 14:17:48.681730000 -0400
 @@ -13,10 +13,15 @@
  #include <linux/nfsd/syscall.h>
  #include <linux/lockd/lockd.h>
@@ -13166,8 +13166,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  	remove_proc_entry("fs/nfs/exports", NULL);
  	remove_proc_entry("fs/nfs", NULL);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
---- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-24 14:14:13.651705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-24 14:17:48.687730000 -0400
 @@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
  #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
  	NFSD4_SUPPORTED_ATTRS_WORD0
@@ -13189,7 +13189,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-24 14:17:48.693730000 -0400
 @@ -10,6 +10,7 @@
  #include <linux/exportfs.h>
  
@@ -13227,7 +13227,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nf
  		__u32 tfh[2];
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-24 14:17:48.698733000 -0400
 @@ -14,6 +14,7 @@ enum nfsd_fsid {
  	FSID_UUID8,
  	FSID_UUID16,
@@ -13280,8 +13280,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nf
 +
  #endif /* _LINUX_NFSD_FH_INT_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
---- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-24 14:14:06.365163000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-24 14:17:48.704731000 -0400
 @@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
  
  };
@@ -13292,8 +13292,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/n
  int nfsd_vers(int vers, enum vers_op change)
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
---- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-24 14:17:48.708729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-24 14:17:48.710730000 -0400
 @@ -0,0 +1,143 @@
 +/*
 + *  Copyright (c) 2005 The Regents of the University of Michigan.
@@ -13439,8 +13439,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pn
 +
 +#endif /* LINUX_NFSD_PNFSD_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
---- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-24 14:17:48.713731000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-24 14:17:48.715730000 -0400
 @@ -0,0 +1,225 @@
 +/*
 + * linux/fs/nfsd/pnfs_lexp.c
@@ -13668,8 +13668,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nf
 +	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-24 14:17:48.719729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-24 14:17:48.720735000 -0400
 @@ -0,0 +1,535 @@
 +/*
 + * fs/nfsd/spnfs_com.c
@@ -14207,8 +14207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfs
 +}
 +#endif /* CONFIG_PROC_FS */
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-24 14:17:48.724733000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-24 14:17:48.726730000 -0400
 @@ -0,0 +1,878 @@
 +/*
 + * fs/nfsd/spnfs_ops.c
@@ -15089,8 +15089,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfs
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
---- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-24 14:14:13.656705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-24 14:17:48.731738000 -0400
 @@ -242,6 +242,12 @@ struct nfs4_client {
  	u32			cl_cb_seq_nr;
  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
@@ -15207,8 +15207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
 +
  #endif   /* NFSD4_STATE_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
---- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-24 14:14:06.371160000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-24 14:17:48.737742000 -0400
 @@ -37,7 +37,12 @@
  #ifdef CONFIG_NFSD_V4
  #include <linux/nfs4_acl.h>
@@ -15335,8 +15335,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.
  out_nfserr:
  	err = nfserrno(host_err);
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
---- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-24 14:14:13.661705000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-24 14:17:48.743747000 -0400
 @@ -37,6 +37,8 @@
  #ifndef _LINUX_NFSD_XDR4_H
  #define _LINUX_NFSD_XDR4_H
@@ -15413,8 +15413,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  	struct nfs4_replay *			replay;
  };
 diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
---- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
-+++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-24 14:14:13.079708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-24 14:17:48.749746000 -0400
 @@ -28,6 +28,7 @@
  #include <linux/aio.h>
  #include <linux/gfp.h>
@@ -15540,8 +15540,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.
  	if (!ret)
  		return VM_FAULT_LOCKED;
 diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
---- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-24 14:14:13.095705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-24 14:17:48.757730000 -0400
 @@ -48,6 +48,7 @@
  #include "internal.h"
  #include "fscache.h"
@@ -15755,8 +15755,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inod
  	nfs_fs_proc_exit();
  	nfsiod_stop();
 diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
---- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-24 14:14:13.100708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-24 14:17:48.763734000 -0400
 @@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
  					   struct nfs_fattr *);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
@@ -15817,7 +15817,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/i
  		struct page *, struct page *);
 diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
 --- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-24 14:17:48.769730000 -0400
 @@ -79,10 +79,48 @@ config NFS_V4_1
  	depends on NFS_V4 && EXPERIMENTAL
  	help
@@ -15870,7 +15870,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kcon
  	depends on NFS_FS=y && IP_PNP
 diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
 --- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-24 14:17:48.774730000 -0400
 @@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
  			   delegation.o idmap.o \
  			   callback.o callback_xdr.o callback_proc.o \
@@ -15885,8 +15885,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Mak
 +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 +obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-24 14:14:13.119708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-24 14:17:48.780730000 -0400
 @@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
  	.dentry_ops	= &nfs_dentry_operations,
  	.dir_inode_ops	= &nfs3_dir_inode_operations,
@@ -15896,8 +15896,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.getattr	= nfs3_proc_getattr,
  	.setattr	= nfs3_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-24 14:17:48.784731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-24 14:17:48.786730000 -0400
 @@ -0,0 +1,765 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayout.c
@@ -16665,8 +16665,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs
 +module_init(nfs4filelayout_init);
 +module_exit(nfs4filelayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-24 14:17:48.790731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-24 14:17:48.792730000 -0400
 @@ -0,0 +1,636 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayoutdev.c
@@ -17305,8 +17305,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch
 +}
 +
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-24 14:17:48.795731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-24 14:17:48.796742000 -0400
 @@ -0,0 +1,97 @@
 +/*
 + *  pnfs_nfs4filelayout.h
@@ -17406,8 +17406,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs
 +
 +#endif /* FS_NFS_NFS4FILELAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
---- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-24 14:14:13.130705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-24 14:17:48.802730000 -0400
 @@ -45,8 +45,28 @@ enum nfs4_client_state {
  	NFS4CLNT_RECLAIM_NOGRACE,
  	NFS4CLNT_DELEGRETURN,
@@ -17556,8 +17556,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nf
  
  /* nfs4xdr.c */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-24 14:14:13.143709000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-24 14:17:48.811734000 -0400
 @@ -49,12 +49,15 @@
  #include <linux/mount.h>
  #include <linux/module.h>
@@ -19223,7 +19223,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.setattr	= nfs4_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
 --- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-24 14:17:48.818733000 -0400
 @@ -54,17 +54,17 @@
  void
  nfs4_renew_state(struct work_struct *work)
@@ -19246,8 +19246,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs
  	spin_lock(&clp->cl_lock);
  	lease = clp->cl_lease_time;
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
---- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-24 14:14:13.150705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-24 14:17:48.825730000 -0400
 @@ -53,6 +53,9 @@
  #include "callback.h"
  #include "delegation.h"
@@ -19566,8 +19566,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/
  			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
  			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-24 14:14:13.159705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-24 14:17:48.834738000 -0400
 @@ -50,8 +50,11 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -21078,8 +21078,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nf
  };
  
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
---- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-24 14:17:48.839734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-24 14:17:48.840742000 -0400
 @@ -0,0 +1,11 @@
 +#
 +# Makefile for the pNFS Objects Layout Driver kernel module
@@ -21093,8 +21093,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs
 +panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
 +obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-24 14:17:48.843735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-24 14:17:48.845739000 -0400
 @@ -0,0 +1,1087 @@
 +/*
 + *  objio_osd.c
@@ -22184,8 +22184,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noar
 +module_init(objlayout_init);
 +module_exit(objlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-24 14:17:48.848735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-24 14:17:48.851730000 -0400
 @@ -0,0 +1,790 @@
 +/*
 + *  objlayout.c
@@ -22978,8 +22978,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noar
 +	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
 +};
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-24 14:17:48.852735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-24 14:17:48.854746000 -0400
 @@ -0,0 +1,171 @@
 +/*
 + *  objlayout.h
@@ -23153,8 +23153,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noar
 +
 +#endif /* _OBJLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-24 14:17:48.857735000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-24 14:17:48.860740000 -0400
 @@ -0,0 +1,734 @@
 +/*
 + *  panfs_shim.c
@@ -23891,8 +23891,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noa
 +module_init(panlayout_init);
 +module_exit(panlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-24 14:17:48.863734000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-24 14:17:48.864730000 -0400
 @@ -0,0 +1,482 @@
 +/*
 + *  panfs_shim.h
@@ -24377,8 +24377,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noa
 +
 +#endif /* _PANLAYOUT_PANFS_SHIM_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-24 14:17:48.868731000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-24 14:17:48.869739000 -0400
 @@ -0,0 +1,435 @@
 +/*
 + *  pnfs_osd_xdr.c
@@ -24816,8 +24816,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
---- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-24 14:14:13.169705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-24 14:17:48.875733000 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  
@@ -24940,8 +24940,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/p
  				if (res == INT_MAX)
  					goto out;
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
---- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-24 14:17:48.880733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-24 14:17:48.883730000 -0400
 @@ -0,0 +1,2027 @@
 +/*
 + *  linux/fs/nfs/pnfs.c
@@ -26971,8 +26971,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +}
 +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
---- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-24 14:17:48.886733000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-24 14:17:48.887735000 -0400
 @@ -0,0 +1,355 @@
 +/*
 + *  fs/nfs/pnfs.h
@@ -27330,8 +27330,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +
 +#endif /* FS_NFS_PNFS_H */
 diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
---- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-24 14:14:13.174707000 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-24 14:17:48.893730000 -0400
 @@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
  	fattr = nfs_alloc_fattr();
  	status = -ENOMEM;
@@ -27359,8 +27359,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.
  	.getattr	= nfs_proc_getattr,
  	.setattr	= nfs_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
---- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-24 14:14:13.179708000 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-24 14:17:48.899733000 -0400
 @@ -18,8 +18,12 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
@@ -27575,8 +27575,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.
  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
  read_complete:
 diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
---- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
-+++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-24 14:14:13.186707000 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-24 14:17:48.907729000 -0400
 @@ -64,6 +64,7 @@
  #include "iostat.h"
  #include "internal.h"
@@ -27624,8 +27624,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/supe
  #endif
  
 diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
---- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
-+++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-24 14:14:13.192705000 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-24 14:17:48.913730000 -0400
 @@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
  	struct nfs_unlinkdata *data = calldata;
  	struct nfs_server *server = NFS_SERVER(data->dir);
@@ -27636,8 +27636,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unl
  		return;
  	rpc_call_start(task);
 diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
---- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
-+++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-24 14:14:06.360160000 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-24 14:17:48.921712000 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  #include <linux/nfs_page.h>
@@ -28326,7 +28326,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/writ
  int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
 --- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-24 14:17:48.933713000 -0400
 @@ -2,6 +2,7 @@
  #define LINUX_EXPORTFS_H 1
  
@@ -28399,8 +28399,8 @@ diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/i
 +#endif /* CONFIG_PNFSD */
  #endif /* LINUX_EXPORTFS_H */
 diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
---- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
-+++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-24 14:17:48.945690000 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-24 14:17:48.946693000 -0400
 @@ -0,0 +1,141 @@
 +#ifndef _LINUX_EXP_XDR_H
 +#define _LINUX_EXP_XDR_H
@@ -28544,8 +28544,8 @@ diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/in
 +}
 +#endif /* _LINUX_EXP_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
---- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
-+++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-24 14:14:13.014707000 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-24 14:17:48.961675000 -0400
 @@ -387,6 +387,7 @@ struct inodes_stat_t {
  #include <asm/byteorder.h>
  
@@ -28564,7 +28564,7 @@ diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include
  	struct dentry		*s_root;
 diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
 --- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-24 14:17:48.974681000 -0400
 @@ -17,7 +17,10 @@
  
  #define NFS4_BITMAP_SIZE	2
@@ -28694,8 +28694,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/inclu
  #endif
  
 diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-24 14:17:48.986670000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-24 14:17:48.989666000 -0400
 @@ -0,0 +1,330 @@
 +/*
 + *  include/linux/nfs4_pnfs.h
@@ -29028,8 +29028,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/
 +
 +#endif /* LINUX_NFS4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
---- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-24 14:17:48.998668000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-24 14:17:49.000665000 -0400
 @@ -0,0 +1,101 @@
 +#ifndef NFSD4_BLOCK
 +#define NFSD4_BLOCK
@@ -29133,8 +29133,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarc
 +#endif /* NFSD4_BLOCK */
 +
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-24 14:17:49.012664000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-24 14:17:49.013671000 -0400
 @@ -0,0 +1,345 @@
 +/*
 + * include/linux/nfsd4_spnfs.h
@@ -29483,7 +29483,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarc
 +#endif /* NFS_SPNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
 --- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-24 14:17:49.018668000 -0400
 @@ -29,6 +29,7 @@
  #ifdef __KERNEL__
  
@@ -29494,7 +29494,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch
   * Largest number of bytes we need to allocate for an NFS
 diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
 --- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-24 14:17:49.024673000 -0400
 @@ -32,6 +32,8 @@
  #define NFSDDBG_REPCACHE	0x0080
  #define NFSDDBG_XDR		0x0100
@@ -29506,7 +29506,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
 --- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-24 14:17:49.030665000 -0400
 @@ -100,6 +100,7 @@ struct svc_export {
  	uid_t			ex_anon_uid;
  	gid_t			ex_anon_gid;
@@ -29516,8 +29516,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarc
  	struct nfsd4_fs_locations ex_fslocs;
  	int			ex_nflavors;
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-24 14:17:49.033666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-24 14:17:49.034665000 -0400
 @@ -0,0 +1,132 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29652,8 +29652,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.3
 +
 +#endif /* NFSD_NFS4LAYOUTXDR_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-24 14:17:49.037666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-24 14:17:49.039665000 -0400
 @@ -0,0 +1,54 @@
 +/******************************************************************************
 + *
@@ -29710,8 +29710,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-24 14:17:49.042666000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-24 14:17:49.044665000 -0400
 @@ -0,0 +1,271 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29986,7 +29986,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.n
 +#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
 --- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-24 14:17:49.049665000 -0400
 @@ -29,6 +29,7 @@
  /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
  #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
@@ -30024,8 +30024,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noar
  
  union nfsctl_res {
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
---- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-24 14:14:13.201710000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-24 14:17:49.063666000 -0400
 @@ -72,13 +72,20 @@ struct nfs_access_entry {
  	int			mask;
  };
@@ -30124,8 +30124,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/inc
  
  #ifdef __KERNEL__
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
---- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-24 14:14:13.206708000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-24 14:17:49.077665000 -0400
 @@ -15,6 +15,7 @@ struct nlm_host;
  struct nfs4_sequence_args;
  struct nfs4_sequence_res;
@@ -30200,7 +30200,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/
  	atomic_t active; /* Keep trace of any activity to this server */
 diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
 --- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-24 14:17:49.089668000 -0400
 @@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
  	NFSIOS_SHORTREAD,
  	NFSIOS_SHORTWRITE,
@@ -30213,7 +30213,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
 --- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-24 14:17:49.103665000 -0400
 @@ -39,6 +39,7 @@ struct nfs_page {
  	struct list_head	wb_list;	/* Defines state of page: */
  	struct page		*wb_page;	/* page to read in/write out */
@@ -30262,8 +30262,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/i
  			     struct inode *inode,
  			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
 diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
---- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-24 14:14:13.211708000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-24 14:17:49.116665000 -0400
 @@ -3,6 +3,8 @@
  
  #include <linux/nfsacl.h>
@@ -30415,8 +30415,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/in
  extern struct rpc_version	nfs_version3;
  extern struct rpc_version	nfs_version4;
 diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
---- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
-+++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-24 14:17:49.128664000 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-24 14:17:49.129670000 -0400
 @@ -0,0 +1,57 @@
 +#ifndef _PANFS_SHIM_API_H
 +#define _PANFS_SHIM_API_H
@@ -30476,8 +30476,8 @@ diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.no
 +
 +#endif /* _PANFS_SHIM_API_H */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-24 14:17:49.141664000 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-24 14:17:49.142670000 -0400
 @@ -0,0 +1,440 @@
 +/*
 + *  pnfs_osd_xdr.h
@@ -30920,8 +30920,8 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noar
 +
 +#endif /* __PNFS_OSD_XDR_H__ */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-24 14:17:49.153666000 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-24 14:17:49.155665000 -0400
 @@ -0,0 +1,134 @@
 +/*
 + *  include/linux/pnfs_xdr.h
@@ -31059,7 +31059,7 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/i
 +#endif /* LINUX_PNFS_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
 --- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-24 14:17:49.168668000 -0400
 @@ -8,6 +8,7 @@
  #ifndef __LINUX_POSIX_ACL_H
  #define __LINUX_POSIX_ACL_H
@@ -31070,7 +31070,7 @@ diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/
  #define ACL_UNDEFINED_ID	(-1)
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-24 14:17:49.174665000 -0400
 @@ -14,6 +14,8 @@
  /* size of an XDR encoding unit in bytes, i.e. 32bit */
  #define XDR_UNIT	(4)
@@ -31082,7 +31082,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.n
  
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-24 14:17:49.179667000 -0400
 @@ -3,6 +3,7 @@
  
  #ifdef __KERNEL__
@@ -31103,8 +31103,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.3
  
  struct rpc_pipe_ops {
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
---- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-24 14:17:49.183664000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-24 14:17:49.184674000 -0400
 @@ -0,0 +1,111 @@
 +/*
 + *  Copyright (c) 2008 The Regents of the University of Michigan.
@@ -31219,7 +31219,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux
 +#endif /* _SIMPLE_RPC_PIPEFS_H_ */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-24 14:17:49.190665000 -0400
 @@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
  
  	return buf;
@@ -31263,8 +31263,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.n
 +}
  #endif /* SUNRPC_SVC_XPRT_H */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
---- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-24 14:14:13.258707000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-24 14:17:49.195672000 -0400
 @@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
  	return p + 2;
  }
@@ -31287,14 +31287,9 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch
  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
  		unsigned int base, unsigned int len);
  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
---- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
-+++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
-@@ -0,0 +1 @@
-+-pnfs
 diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
 --- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-24 14:17:49.204668000 -0400
 @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
  	    addr.o rpcb_clnt.o timer.o xdr.o \
@@ -31305,8 +31300,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/su
  sunrpc-$(CONFIG_PROC_FS) += stats.o
  sunrpc-$(CONFIG_SYSCTL) += sysctl.o
 diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
---- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-24 14:17:49.208664000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-24 14:17:49.209670000 -0400
 @@ -0,0 +1,424 @@
 +/*
 + *  net/sunrpc/simple_rpc_pipefs.c
@@ -31733,8 +31728,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.no
 +}
 +EXPORT_SYMBOL(pipefs_generic_destroy_msg);
 diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
---- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-24 14:14:13.447705000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-24 14:17:49.215665000 -0400
 @@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
  
  	/* Shift the tail first */

From 93be1cd0134bac1b112038c2c6376d69e4511197 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 24 Aug 2010 15:13:05 -0400
Subject: [PATCH 14/20] set the kernel flags

--with firmware
--with debuginfo
--without vdso_install
--without debug
--without headers

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel.spec b/kernel.spec
index 2a47977aa..f3e776e20 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -101,23 +101,23 @@ Summary: The Linux kernel
 # kernel-smp (only valid for ppc 32-bit)
 %define with_smp       %{?_without_smp:       0} %{?!_without_smp:       1}
 # kernel-debug
-%define with_debug     %{?_without_debug:     0} %{?!_without_debug:     1}
+%define with_debug     %{?_without_debug:     0} %{?!_without_debug:     0}
 # kernel-doc
-%define with_doc       %{?_without_doc:       0} %{?!_without_doc:       1}
+%define with_doc       %{?_without_doc:       0} %{?!_without_doc:       0}
 # kernel-headers
-%define with_headers   %{?_without_headers:   0} %{?!_without_headers:   1}
+%define with_headers   %{?_without_headers:   0} %{?!_without_headers:   0}
 # kernel-firmware
 %define with_firmware  %{?_with_firmware:     1} %{?!_with_firmware:     1}
 # tools/perf
-%define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  1}
+%define with_perftool  %{?_without_perftool:  0} %{?!_without_perftool:  0}
 # perf noarch subpkg
-%define with_perf      %{?_without_perf:      0} %{?!_without_perf:      1}
+%define with_perf      %{?_without_perf:      0} %{?!_without_perf:      0}
 # kernel-debuginfo
-%define with_debuginfo %{?_without_debuginfo: 0} %{?!_without_debuginfo: 1}
+%define with_debuginfo %{?_without_debuginfo: 1} %{?!_without_debuginfo: 1}
 # kernel-bootwrapper (for creating zImages from kernel + initrd)
 %define with_bootwrapper %{?_without_bootwrapper: 0} %{?!_without_bootwrapper: 1}
 # Want to build a the vsdo directories installed
-%define with_vdso_install %{?_without_vdso_install: 0} %{?!_without_vdso_install: 1}
+%define with_vdso_install %{?_without_vdso_install: 0} %{?!_without_vdso_install: 0}
 
 # Build the kernel-doc package, but don't fail the build if it botches.
 # Here "true" means "continue" and "false" means "fail the build".

From 27f38a2984d252110bc12e5f2938f55701c22493 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 12:20:57 -0400
Subject: [PATCH 15/20] Updated to the latest pNFS tag:
 pnfs-all-2.6.35-2010-08-19

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |   2 +-
 nfsd-35-fc.patch                     |  62 +++
 pnfs-all-2.6.35-2010-08-19-f13.patch | 550 +++++++++++++++++++++++++++
 3 files changed, 613 insertions(+), 1 deletion(-)

diff --git a/kernel.spec b/kernel.spec
index f3e776e20..f9c6ff212 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs34.2010.08.19
+%define buildid .pnfs_all_2.6.35_2010_08_19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
index 2825464af..9a97fc6ec 100644
--- a/nfsd-35-fc.patch
+++ b/nfsd-35-fc.patch
@@ -1,6 +1,10 @@
 diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
 --- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 14:12:24.165356789 -0400
+=======
++++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
     | READ                 | REQ        |              | Section 18.22  |
     | READDIR              | REQ        |              | Section 18.23  |
@@ -12,7 +16,11 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig
     | RENAME               | REQ        |              | Section 18.26  |
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
 --- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 14:12:24.519356675 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
  	.alloc		= expkey_alloc,
  };
@@ -108,7 +116,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  out_put_clp:
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 14:12:52.625429773 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
  					cb_sequence_dec_sz +            \
  					op_dec_sz)
@@ -211,7 +223,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  	int status;
  
 -	status = rpc_call_async(cb->cb_client, &msg,
+<<<<<<< HEAD
 +	status = rpc_call_async(clp->cl_cb_client, &msg,
+=======
++	status = rpc_call_async(cb->cl_cb_client, &msg,
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
  				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
  				&nfsd4_cb_probe_ops, (void *)clp);
 -	if (status) {
@@ -402,7 +418,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  }
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 14:12:25.698356909 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
  static const char *nfsd4_op_name(unsigned opnum);
  
@@ -490,7 +510,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 14:12:25.700356284 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -45,8 +45,8 @@
  #define NFSDDBG_FACILITY                NFSDDBG_PROC
  
@@ -1280,6 +1304,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
 -{
 -	user_lease_time = leasetime;
 -}
+<<<<<<< HEAD
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-23 14:14:22.882428704 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 14:14:33.418376589 -0400
@@ -1295,6 +1320,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 14:12:25.821359224 -0400
+=======
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -46,6 +46,7 @@ enum {
  	 */
  #ifdef CONFIG_NFSD_V4
@@ -1415,7 +1445,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  		/* last one */ {""}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 14:12:25.835418441 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -82,7 +82,6 @@ int nfs4_state_init(void);
  void nfsd4_free_slabs(void);
  int nfs4_state_start(void);
@@ -1452,7 +1486,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  /*
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
 --- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 14:12:25.836366516 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
  	struct nfs4_client	*cbs_clp;
  };
@@ -1570,7 +1608,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
  nfs4_put_stateowner(struct nfs4_stateowner *so)
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
 --- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 14:12:25.837387292 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
  	struct nfs4_sessionid	sessionid;
  };
@@ -1612,7 +1654,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 14:12:25.838377224 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -40,12 +40,12 @@ struct nfs_fhbase_old {
   * This is the new flexible, extensible style NFSv2/v3 file handle.
   * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
@@ -1631,7 +1677,11 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch
   * This might allow a file to be confirmed to be in a writable part of a
 diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
 --- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 14:12:25.839376838 -0400
+=======
++++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -49,11 +49,17 @@ static void cache_init(struct cache_head
  	h->last_refresh = now;
  }
@@ -1698,7 +1748,11 @@ diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sun
  		/* entry is valid */
 diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
 --- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 14:12:25.840384371 -0400
+=======
++++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
  			dprintk("svc: recvfrom returned error %d\n", -err);
  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1765,7 +1819,11 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/s
  error:
 diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
 --- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 14:12:25.841371223 -0400
+=======
++++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
  		if (rqstp->rq_deferred) {
  			svc_xprt_received(xprt);
@@ -1794,7 +1852,11 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/
  void svc_close_xprt(struct svc_xprt *xprt)
 diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
 --- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 14:12:25.842376584 -0400
+=======
++++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
  		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
  		rqstp->rq_arg.head[0].iov_len);
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
index 10df9b15c..ecc100c30 100644
--- a/pnfs-all-2.6.35-2010-08-19-f13.patch
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -1,6 +1,11 @@
 diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-24 14:14:03.643355000 -0400
 +++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-24 14:17:48.415730000 -0400
+=======
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -13,6 +13,7 @@
  #include <sys/stat.h>
  #include <sys/mman.h>
@@ -11,7 +16,11 @@ diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arc
  #include "os.h"
 diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
 --- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/block/genhd.c	2010-08-24 14:17:48.421730000 -0400
+=======
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -1009,6 +1009,7 @@ static void disk_release(struct device *
  struct class block_class = {
  	.name		= "block",
@@ -21,8 +30,13 @@ diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.
  static char *block_devnode(struct device *dev, mode_t *mode)
  {
 diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-24 14:17:48.423729000 -0400
 +++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-24 14:17:48.425730000 -0400
+=======
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,211 @@
 +(c) 2007 Network Appliance Inc.
 +
@@ -237,7 +251,11 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.
 +
 diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
 --- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-24 14:17:48.430730000 -0400
+=======
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
  	return r;
  }
@@ -292,7 +310,11 @@ diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/driv
  	int r;
 diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
 --- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-24 14:17:48.435733000 -0400
+=======
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
  	put_device(&class_to_shost(dev)->shost_gendev);
  }
@@ -304,7 +326,11 @@ diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drive
  };
 diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
 --- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-24 14:17:48.440733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -36,13 +36,9 @@
  #include <linux/fs.h>
  #include <linux/time.h>
@@ -360,8 +386,13 @@ diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/
 +
  #endif
 diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-24 14:17:48.444731000 -0400
 +++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-24 14:17:48.446730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,396 @@
 +/*
 + * export.c - Implementation of the pnfs_export_operations
@@ -761,7 +792,11 @@ diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs
 +}
 diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
 --- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-24 14:17:48.452730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
  	if (unlikely(wait_obj_created(oi)))
  		goto fail;
@@ -781,7 +816,11 @@ diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/
   * Fill in an inode read from the OSD and set it up for use
 diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
 --- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-24 14:17:48.457733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -13,4 +13,5 @@
  #
  
@@ -790,7 +829,11 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/K
  obj-$(CONFIG_EXOFS_FS) += exofs.o
 diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
 --- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-24 14:17:48.462739000 -0400
+=======
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -1,6 +1,7 @@
  config EXOFS_FS
  	tristate "exofs: OSD based file system support"
@@ -801,7 +844,11 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/
  	  as its backing storage.
 diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
 --- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-24 14:17:48.468730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
  	sb->s_fs_info = sbi;
  	sb->s_op = &exofs_sops;
@@ -812,7 +859,11 @@ diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/
  		EXOFS_ERR("ERROR: exofs_iget failed\n");
 diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
 --- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-24 14:17:48.473730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -16,6 +16,13 @@
  #include <linux/namei.h>
  #include <linux/sched.h>
@@ -829,7 +880,11 @@ diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exp
  
 diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
 --- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-24 14:17:48.478733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -3,4 +3,7 @@
  
  obj-$(CONFIG_EXPORTFS) += exportfs.o
@@ -840,8 +895,13 @@ diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/ex
 +exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
 +exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-24 14:17:48.482731000 -0400
 +++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-24 14:17:48.484734000 -0400
+=======
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,158 @@
 +/*
 + *  linux/fs/nfsd/nfs4blocklayoutxdr.c
@@ -1002,8 +1062,13 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.
 +}
 +EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-24 14:17:48.487733000 -0400
 +++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-24 14:17:48.489734000 -0400
+=======
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,218 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -1224,8 +1289,13 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.n
 +}
 +EXPORT_SYMBOL(filelayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-24 14:17:48.493729000 -0400
 +++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-24 14:17:48.494735000 -0400
+=======
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,289 @@
 +/*
 + *  pnfs_osd_xdr_enc.c
@@ -1518,7 +1588,11 @@ diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.no
 +EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
 diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
 --- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-24 14:17:48.499730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -19,6 +19,7 @@
  #include <linux/gfs2_ondisk.h>
  #include <linux/slow-work.h>
@@ -1539,7 +1613,11 @@ diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gf
  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
 --- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/Kconfig	2010-08-24 14:17:48.505733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -224,6 +224,31 @@ config LOCKD_V4
  config EXPORTFS
  	tristate
@@ -1573,8 +1651,13 @@ diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
  	tristate
  	select FS_POSIX_ACL
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-24 14:17:48.509734000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-24 14:17:48.511732000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,66 @@
 +#include <linux/module.h>
 +#include <linux/uaccess.h>
@@ -1643,8 +1726,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.or
 +	return;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-24 14:17:48.514733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-24 14:17:48.516731000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1160 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.c
@@ -2807,8 +2895,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.
 +module_init(nfs4blocklayout_init);
 +module_exit(nfs4blocklayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-24 14:17:48.519731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-24 14:17:48.521730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,335 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdev.c
@@ -3146,8 +3239,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.
 +	goto out;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-24 14:17:48.523733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-24 14:17:48.525730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,120 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdm.c
@@ -3270,8 +3368,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.3
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-24 14:17:48.528729000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-24 14:17:48.529735000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,303 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -3577,8 +3680,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.
 +
 +#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-24 14:17:48.532731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-24 14:17:48.534734000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,948 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -4529,8 +4637,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noar
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-24 14:17:48.537729000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-24 14:17:48.538739000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,6 @@
 +#
 +# Makefile for the pNFS block layout driver kernel module
@@ -4540,7 +4653,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarc
 +			extents.o block-device-discovery-pipe.o
 diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
 --- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-24 14:17:48.544730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -8,6 +8,8 @@
  #ifndef __LINUX_FS_NFS_CALLBACK_H
  #define __LINUX_FS_NFS_CALLBACK_H
@@ -4613,7 +4730,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/c
  extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
 --- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-24 14:17:48.562731000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -8,10 +8,15 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -5096,7 +5217,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/
  	return status;
 diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
 --- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-24 14:17:48.568730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -22,6 +22,8 @@
  #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
  
@@ -5298,8 +5423,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/n
  		.process_op = (callback_process_op_t)nfs4_callback_sequence,
  		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-24 14:14:13.062705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-24 14:17:48.575730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -39,6 +39,7 @@
  #include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
@@ -5508,8 +5638,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/cli
  		goto error;
  
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-24 14:17:48.578729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-24 14:17:48.579735000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,292 @@
 +#if defined(CONFIG_SPNFS_BLOCK)
 +
@@ -5804,8 +5939,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +}
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-24 14:17:48.584729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-24 14:17:48.586730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1672 @@
 +/*
 + *  bl_ops.c
@@ -7480,8 +7620,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-24 14:14:13.068705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-24 14:17:48.592730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -104,7 +104,8 @@ again:
  			continue;
  		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
@@ -7558,7 +7703,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs
  	rcu_read_unlock();
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
 --- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-24 14:17:48.597733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -34,9 +34,7 @@ enum {
  int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
  void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -7571,8 +7720,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs
  
  struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-24 14:14:13.612707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-24 14:17:48.604730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -17,11 +17,19 @@
  #include <linux/module.h>
  #include <linux/exportfs.h>
@@ -7750,7 +7904,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  	svcauth_unix_purge();
 diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
 --- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-24 14:17:48.610730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
  	.rpc_release = nfs_direct_read_release,
  };
@@ -7996,7 +8154,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/dir
  		user_addr += bytes;
 diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
 --- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-24 14:17:48.616730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -79,3 +79,52 @@ config NFSD_V4
  	  available from http://linux-nfs.org/.
  
@@ -8052,7 +8214,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kc
 +	  If unsure, say N.
 diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
 --- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-24 14:17:48.621733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
@@ -8062,8 +8228,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/M
 +nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
 +nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-24 14:14:13.618705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-24 14:17:48.628730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -40,7 +40,6 @@
  
  #define NFSPROC4_CB_NULL 0
@@ -8603,8 +8774,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
 +}
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-24 14:17:48.633729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-24 14:17:48.641730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1679 @@
 +/******************************************************************************
 + *
@@ -10286,8 +10462,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfs
 +	return status;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-24 14:17:48.645731000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-24 14:17:48.647730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,461 @@
 +/******************************************************************************
 + *
@@ -10751,8 +10932,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/n
 +};
 +EXPORT_SYMBOL(pnfs_dlm_export_ops);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-24 14:17:48.651729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-24 14:17:48.652735000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,620 @@
 +/*
 +*  linux/fs/nfsd/nfs4pnfsds.c
@@ -11375,8 +11561,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nf
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-24 14:14:13.623707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-24 14:17:48.658733000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -34,10 +34,14 @@
   */
  #include <linux/file.h>
@@ -11851,8 +12042,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-24 14:14:13.632707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-24 14:17:48.667732000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -42,6 +42,8 @@
  #include "xdr4.h"
  #include "vfs.h"
@@ -12368,8 +12564,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
  }
  
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-24 14:14:13.639707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-24 14:17:48.675730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -47,9 +47,14 @@
  #include <linux/nfsd_idmap.h>
  #include <linux/nfs4_acl.h>
@@ -12988,8 +13189,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
  	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
  	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-24 14:14:13.645705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-24 14:17:48.681730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -13,10 +13,15 @@
  #include <linux/nfsd/syscall.h>
  #include <linux/lockd/lockd.h>
@@ -13166,8 +13372,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  	remove_proc_entry("fs/nfs/exports", NULL);
  	remove_proc_entry("fs/nfs", NULL);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-24 14:14:13.651705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-24 14:17:48.687730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
  #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
  	NFSD4_SUPPORTED_ATTRS_WORD0
@@ -13189,7 +13400,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-24 14:17:48.693730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -10,6 +10,7 @@
  #include <linux/exportfs.h>
  
@@ -13227,7 +13442,11 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nf
  		__u32 tfh[2];
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-24 14:17:48.698733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -14,6 +14,7 @@ enum nfsd_fsid {
  	FSID_UUID8,
  	FSID_UUID16,
@@ -13280,8 +13499,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nf
 +
  #endif /* _LINUX_NFSD_FH_INT_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-24 14:14:06.365163000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-24 14:17:48.704731000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
  
  };
@@ -13292,8 +13516,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/n
  int nfsd_vers(int vers, enum vers_op change)
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-24 14:17:48.708729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-24 14:17:48.710730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,143 @@
 +/*
 + *  Copyright (c) 2005 The Regents of the University of Michigan.
@@ -13439,8 +13668,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pn
 +
 +#endif /* LINUX_NFSD_PNFSD_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-24 14:17:48.713731000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-24 14:17:48.715730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,225 @@
 +/*
 + * linux/fs/nfsd/pnfs_lexp.c
@@ -13668,8 +13902,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nf
 +	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-24 14:17:48.719729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-24 14:17:48.720735000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,535 @@
 +/*
 + * fs/nfsd/spnfs_com.c
@@ -14207,8 +14446,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfs
 +}
 +#endif /* CONFIG_PROC_FS */
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-24 14:17:48.724733000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-24 14:17:48.726730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,878 @@
 +/*
 + * fs/nfsd/spnfs_ops.c
@@ -15089,8 +15333,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfs
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-24 14:14:13.656705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-24 14:17:48.731738000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -242,6 +242,12 @@ struct nfs4_client {
  	u32			cl_cb_seq_nr;
  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
@@ -15207,8 +15456,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
 +
  #endif   /* NFSD4_STATE_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-24 14:14:06.371160000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-24 14:17:48.737742000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -37,7 +37,12 @@
  #ifdef CONFIG_NFSD_V4
  #include <linux/nfs4_acl.h>
@@ -15335,8 +15589,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.
  out_nfserr:
  	err = nfserrno(host_err);
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-24 14:14:13.661705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-24 14:17:48.743747000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -37,6 +37,8 @@
  #ifndef _LINUX_NFSD_XDR4_H
  #define _LINUX_NFSD_XDR4_H
@@ -15413,8 +15672,13 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  	struct nfs4_replay *			replay;
  };
 diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-24 14:14:13.079708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-24 14:17:48.749746000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -28,6 +28,7 @@
  #include <linux/aio.h>
  #include <linux/gfp.h>
@@ -15540,8 +15804,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.
  	if (!ret)
  		return VM_FAULT_LOCKED;
 diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-24 14:14:13.095705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-24 14:17:48.757730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -48,6 +48,7 @@
  #include "internal.h"
  #include "fscache.h"
@@ -15755,8 +16024,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inod
  	nfs_fs_proc_exit();
  	nfsiod_stop();
 diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-24 14:14:13.100708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-24 14:17:48.763734000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
  					   struct nfs_fattr *);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
@@ -15817,7 +16091,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/i
  		struct page *, struct page *);
 diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
 --- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-24 14:17:48.769730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -79,10 +79,48 @@ config NFS_V4_1
  	depends on NFS_V4 && EXPERIMENTAL
  	help
@@ -15870,7 +16148,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kcon
  	depends on NFS_FS=y && IP_PNP
 diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
 --- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-24 14:17:48.774730000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
  			   delegation.o idmap.o \
  			   callback.o callback_xdr.o callback_proc.o \
@@ -15885,8 +16167,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Mak
 +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 +obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-24 14:14:13.119708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-24 14:17:48.780730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
  	.dentry_ops	= &nfs_dentry_operations,
  	.dir_inode_ops	= &nfs3_dir_inode_operations,
@@ -15896,8 +16183,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.getattr	= nfs3_proc_getattr,
  	.setattr	= nfs3_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-24 14:17:48.784731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-24 14:17:48.786730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,765 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayout.c
@@ -16665,8 +16957,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs
 +module_init(nfs4filelayout_init);
 +module_exit(nfs4filelayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-24 14:17:48.790731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-24 14:17:48.792730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,636 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayoutdev.c
@@ -17305,8 +17602,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch
 +}
 +
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-24 14:17:48.795731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-24 14:17:48.796742000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,97 @@
 +/*
 + *  pnfs_nfs4filelayout.h
@@ -17406,8 +17708,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs
 +
 +#endif /* FS_NFS_NFS4FILELAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-24 14:14:13.130705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-24 14:17:48.802730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -45,8 +45,28 @@ enum nfs4_client_state {
  	NFS4CLNT_RECLAIM_NOGRACE,
  	NFS4CLNT_DELEGRETURN,
@@ -17556,8 +17863,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nf
  
  /* nfs4xdr.c */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-24 14:14:13.143709000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-24 14:17:48.811734000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -49,12 +49,15 @@
  #include <linux/mount.h>
  #include <linux/module.h>
@@ -19223,7 +19535,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.setattr	= nfs4_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
 --- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-24 14:17:48.818733000 -0400
+=======
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -54,17 +54,17 @@
  void
  nfs4_renew_state(struct work_struct *work)
@@ -19246,8 +19562,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs
  	spin_lock(&clp->cl_lock);
  	lease = clp->cl_lease_time;
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-24 14:14:13.150705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-24 14:17:48.825730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -53,6 +53,9 @@
  #include "callback.h"
  #include "delegation.h"
@@ -19566,8 +19887,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/
  			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
  			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-24 14:14:13.159705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-24 14:17:48.834738000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -50,8 +50,11 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -21078,8 +21404,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nf
  };
  
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-24 14:17:48.839734000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-24 14:17:48.840742000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,11 @@
 +#
 +# Makefile for the pNFS Objects Layout Driver kernel module
@@ -21093,8 +21424,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs
 +panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
 +obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-24 14:17:48.843735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-24 14:17:48.845739000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1087 @@
 +/*
 + *  objio_osd.c
@@ -22184,8 +22520,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noar
 +module_init(objlayout_init);
 +module_exit(objlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-24 14:17:48.848735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-24 14:17:48.851730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,790 @@
 +/*
 + *  objlayout.c
@@ -22978,8 +23319,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noar
 +	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
 +};
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-24 14:17:48.852735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-24 14:17:48.854746000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,171 @@
 +/*
 + *  objlayout.h
@@ -23153,8 +23499,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noar
 +
 +#endif /* _OBJLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-24 14:17:48.857735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-24 14:17:48.860740000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,734 @@
 +/*
 + *  panfs_shim.c
@@ -23891,8 +24242,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noa
 +module_init(panlayout_init);
 +module_exit(panlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-24 14:17:48.863734000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-24 14:17:48.864730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,482 @@
 +/*
 + *  panfs_shim.h
@@ -24377,8 +24733,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noa
 +
 +#endif /* _PANLAYOUT_PANFS_SHIM_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-24 14:17:48.868731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-24 14:17:48.869739000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,435 @@
 +/*
 + *  pnfs_osd_xdr.c
@@ -24816,8 +25177,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-24 14:14:13.169705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-24 14:17:48.875733000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  
@@ -24940,8 +25306,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/p
  				if (res == INT_MAX)
  					goto out;
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-24 14:17:48.880733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-24 14:17:48.883730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,2027 @@
 +/*
 + *  linux/fs/nfs/pnfs.c
@@ -26971,8 +27342,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +}
 +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-24 14:17:48.886733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-24 14:17:48.887735000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,355 @@
 +/*
 + *  fs/nfs/pnfs.h
@@ -27330,8 +27706,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +
 +#endif /* FS_NFS_PNFS_H */
 diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-24 14:14:13.174707000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-24 14:17:48.893730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
  	fattr = nfs_alloc_fattr();
  	status = -ENOMEM;
@@ -27359,8 +27740,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.
  	.getattr	= nfs_proc_getattr,
  	.setattr	= nfs_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-24 14:14:13.179708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-24 14:17:48.899733000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -18,8 +18,12 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
@@ -27575,8 +27961,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.
  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
  read_complete:
 diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-24 14:14:13.186707000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-24 14:17:48.907729000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -64,6 +64,7 @@
  #include "iostat.h"
  #include "internal.h"
@@ -27624,8 +28015,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/supe
  #endif
  
 diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-24 14:14:13.192705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-24 14:17:48.913730000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
  	struct nfs_unlinkdata *data = calldata;
  	struct nfs_server *server = NFS_SERVER(data->dir);
@@ -27636,8 +28032,13 @@ diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unl
  		return;
  	rpc_call_start(task);
 diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-24 14:14:06.360160000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-24 14:17:48.921712000 -0400
+=======
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  #include <linux/nfs_page.h>
@@ -28326,7 +28727,11 @@ diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/writ
  int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
 --- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-24 14:17:48.933713000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -2,6 +2,7 @@
  #define LINUX_EXPORTFS_H 1
  
@@ -28399,8 +28804,13 @@ diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/i
 +#endif /* CONFIG_PNFSD */
  #endif /* LINUX_EXPORTFS_H */
 diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-24 14:17:48.945690000 -0400
 +++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-24 14:17:48.946693000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,141 @@
 +#ifndef _LINUX_EXP_XDR_H
 +#define _LINUX_EXP_XDR_H
@@ -28544,8 +28954,13 @@ diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/in
 +}
 +#endif /* _LINUX_EXP_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-24 14:14:13.014707000 -0400
 +++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-24 14:17:48.961675000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -387,6 +387,7 @@ struct inodes_stat_t {
  #include <asm/byteorder.h>
  
@@ -28564,7 +28979,11 @@ diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include
  	struct dentry		*s_root;
 diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
 --- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-24 14:17:48.974681000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -17,7 +17,10 @@
  
  #define NFS4_BITMAP_SIZE	2
@@ -28694,8 +29113,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/inclu
  #endif
  
 diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-24 14:17:48.986670000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-24 14:17:48.989666000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,330 @@
 +/*
 + *  include/linux/nfs4_pnfs.h
@@ -29028,8 +29452,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/
 +
 +#endif /* LINUX_NFS4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-24 14:17:48.998668000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-24 14:17:49.000665000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,101 @@
 +#ifndef NFSD4_BLOCK
 +#define NFSD4_BLOCK
@@ -29133,8 +29562,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarc
 +#endif /* NFSD4_BLOCK */
 +
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-24 14:17:49.012664000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-24 14:17:49.013671000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,345 @@
 +/*
 + * include/linux/nfsd4_spnfs.h
@@ -29483,7 +29917,11 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarc
 +#endif /* NFS_SPNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
 --- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-24 14:17:49.018668000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -29,6 +29,7 @@
  #ifdef __KERNEL__
  
@@ -29494,7 +29932,11 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch
   * Largest number of bytes we need to allocate for an NFS
 diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
 --- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-24 14:17:49.024673000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -32,6 +32,8 @@
  #define NFSDDBG_REPCACHE	0x0080
  #define NFSDDBG_XDR		0x0100
@@ -29506,7 +29948,11 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
 --- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-24 14:17:49.030665000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -100,6 +100,7 @@ struct svc_export {
  	uid_t			ex_anon_uid;
  	gid_t			ex_anon_gid;
@@ -29516,8 +29962,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarc
  	struct nfsd4_fs_locations ex_fslocs;
  	int			ex_nflavors;
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-24 14:17:49.033666000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-24 14:17:49.034665000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,132 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29652,8 +30103,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.3
 +
 +#endif /* NFSD_NFS4LAYOUTXDR_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-24 14:17:49.037666000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-24 14:17:49.039665000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,54 @@
 +/******************************************************************************
 + *
@@ -29710,8 +30166,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-24 14:17:49.042666000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-24 14:17:49.044665000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,271 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29986,7 +30447,11 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.n
 +#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
 --- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-24 14:17:49.049665000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -29,6 +29,7 @@
  /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
  #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
@@ -30024,8 +30489,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noar
  
  union nfsctl_res {
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-24 14:14:13.201710000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-24 14:17:49.063666000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -72,13 +72,20 @@ struct nfs_access_entry {
  	int			mask;
  };
@@ -30124,8 +30594,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/inc
  
  #ifdef __KERNEL__
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-24 14:14:13.206708000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-24 14:17:49.077665000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -15,6 +15,7 @@ struct nlm_host;
  struct nfs4_sequence_args;
  struct nfs4_sequence_res;
@@ -30200,7 +30675,11 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/
  	atomic_t active; /* Keep trace of any activity to this server */
 diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
 --- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-24 14:17:49.089668000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
  	NFSIOS_SHORTREAD,
  	NFSIOS_SHORTWRITE,
@@ -30213,7 +30692,11 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
 --- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-24 14:17:49.103665000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -39,6 +39,7 @@ struct nfs_page {
  	struct list_head	wb_list;	/* Defines state of page: */
  	struct page		*wb_page;	/* page to read in/write out */
@@ -30262,8 +30745,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/i
  			     struct inode *inode,
  			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
 diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-24 14:14:13.211708000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-24 14:17:49.116665000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -3,6 +3,8 @@
  
  #include <linux/nfsacl.h>
@@ -30415,8 +30903,13 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/in
  extern struct rpc_version	nfs_version3;
  extern struct rpc_version	nfs_version4;
 diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-24 14:17:49.128664000 -0400
 +++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-24 14:17:49.129670000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,57 @@
 +#ifndef _PANFS_SHIM_API_H
 +#define _PANFS_SHIM_API_H
@@ -30476,8 +30969,13 @@ diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.no
 +
 +#endif /* _PANFS_SHIM_API_H */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-24 14:17:49.141664000 -0400
 +++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-24 14:17:49.142670000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,440 @@
 +/*
 + *  pnfs_osd_xdr.h
@@ -30920,8 +31418,13 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noar
 +
 +#endif /* __PNFS_OSD_XDR_H__ */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-24 14:17:49.153666000 -0400
 +++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-24 14:17:49.155665000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,134 @@
 +/*
 + *  include/linux/pnfs_xdr.h
@@ -31059,7 +31562,11 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/i
 +#endif /* LINUX_PNFS_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
 --- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-24 14:17:49.168668000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -8,6 +8,7 @@
  #ifndef __LINUX_POSIX_ACL_H
  #define __LINUX_POSIX_ACL_H
@@ -31070,7 +31577,11 @@ diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/
  #define ACL_UNDEFINED_ID	(-1)
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-24 14:17:49.174665000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -14,6 +14,8 @@
  /* size of an XDR encoding unit in bytes, i.e. 32bit */
  #define XDR_UNIT	(4)
@@ -31082,7 +31593,11 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.n
  
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-24 14:17:49.179667000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -3,6 +3,7 @@
  
  #ifdef __KERNEL__
@@ -31103,8 +31618,13 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.3
  
  struct rpc_pipe_ops {
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-24 14:17:49.183664000 -0400
 +++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-24 14:17:49.184674000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,111 @@
 +/*
 + *  Copyright (c) 2008 The Regents of the University of Michigan.
@@ -31219,7 +31739,11 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux
 +#endif /* _SIMPLE_RPC_PIPEFS_H_ */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
+<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-24 14:17:49.190665000 -0400
+=======
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
  
  	return buf;
@@ -31263,8 +31787,13 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.n
 +}
  #endif /* SUNRPC_SVC_XPRT_H */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-24 14:14:13.258707000 -0400
 +++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-24 14:17:49.195672000 -0400
+=======
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
  	return p + 2;
  }
@@ -31287,9 +31816,20 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch
  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
  		unsigned int base, unsigned int len);
  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+<<<<<<< HEAD
 diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
 --- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
 +++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-24 14:17:49.204668000 -0400
+=======
+diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
+--- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
++++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
+@@ -0,0 +1 @@
++-pnfs
+diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
  	    addr.o rpcb_clnt.o timer.o xdr.o \
@@ -31300,8 +31840,13 @@ diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/su
  sunrpc-$(CONFIG_PROC_FS) += stats.o
  sunrpc-$(CONFIG_SYSCTL) += sysctl.o
 diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-24 14:17:49.208664000 -0400
 +++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-24 14:17:49.209670000 -0400
+=======
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,424 @@
 +/*
 + *  net/sunrpc/simple_rpc_pipefs.c
@@ -31728,8 +32273,13 @@ diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.no
 +}
 +EXPORT_SYMBOL(pipefs_generic_destroy_msg);
 diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+<<<<<<< HEAD
 --- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-24 14:14:13.447705000 -0400
 +++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-24 14:17:49.215665000 -0400
+=======
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
+>>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
  
  	/* Shift the tail first */

From 7f5ad3008e5cc3c63310ed4b440290adc3c90309 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Mon, 23 Aug 2010 14:15:46 -0400
Subject: [PATCH 16/20] Fixed a couple compile errors in the server code.

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 nfsd-35-fc.patch | 62 ------------------------------------------------
 1 file changed, 62 deletions(-)

diff --git a/nfsd-35-fc.patch b/nfsd-35-fc.patch
index 9a97fc6ec..2825464af 100644
--- a/nfsd-35-fc.patch
+++ b/nfsd-35-fc.patch
@@ -1,10 +1,6 @@
 diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt
 --- linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 14:12:24.165356789 -0400
-=======
-+++ linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt	2010-08-23 09:57:18.233564439 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -137,7 +137,7 @@ NS*| OPENATTR             | OPT        |
     | READ                 | REQ        |              | Section 18.22  |
     | READDIR              | REQ        |              | Section 18.23  |
@@ -16,11 +12,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/nfs/nfs41-server.txt.orig
     | RENAME               | REQ        |              | Section 18.26  |
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
 --- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 14:12:24.519356675 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 09:57:18.234564075 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_ca
  	.alloc		= expkey_alloc,
  };
@@ -116,11 +108,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  out_put_clp:
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 14:12:52.625429773 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 10:00:37.257414684 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -79,11 +79,6 @@ enum nfs_cb_opnum4 {
  					cb_sequence_dec_sz +            \
  					op_dec_sz)
@@ -223,11 +211,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  	int status;
  
 -	status = rpc_call_async(cb->cb_client, &msg,
-<<<<<<< HEAD
 +	status = rpc_call_async(clp->cl_cb_client, &msg,
-=======
-+	status = rpc_call_async(cb->cl_cb_client, &msg,
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
  				RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
  				&nfsd4_cb_probe_ops, (void *)clp);
 -	if (status) {
@@ -418,11 +402,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
  }
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 14:12:25.698356909 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 09:57:18.237376763 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[
  static const char *nfsd4_op_name(unsigned opnum);
  
@@ -510,11 +490,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 14:12:25.700356284 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 09:57:18.240356512 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -45,8 +45,8 @@
  #define NFSDDBG_FACILITY                NFSDDBG_PROC
  
@@ -1304,7 +1280,6 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
 -{
 -	user_lease_time = leasetime;
 -}
-<<<<<<< HEAD
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
 --- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-23 14:14:22.882428704 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 14:14:33.418376589 -0400
@@ -1320,11 +1295,6 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 14:12:25.821359224 -0400
-=======
-diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
---- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 09:57:20.629370282 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -46,6 +46,7 @@ enum {
  	 */
  #ifdef CONFIG_NFSD_V4
@@ -1445,11 +1415,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  		/* last one */ {""}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 14:12:25.835418441 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 09:57:20.629370282 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -82,7 +82,6 @@ int nfs4_state_init(void);
  void nfsd4_free_slabs(void);
  int nfs4_state_start(void);
@@ -1486,11 +1452,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  /*
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
 --- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 14:12:25.836366516 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 09:57:21.807501619 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
  	struct nfs4_client	*cbs_clp;
  };
@@ -1608,11 +1570,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
  nfs4_put_stateowner(struct nfs4_stateowner *so)
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
 --- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 14:12:25.837387292 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 09:57:23.994379831 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
  	struct nfs4_sessionid	sessionid;
  };
@@ -1654,11 +1612,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 14:12:25.838377224 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h	2010-08-23 09:57:23.994379831 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -40,12 +40,12 @@ struct nfs_fhbase_old {
   * This is the new flexible, extensible style NFSv2/v3 file handle.
   * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
@@ -1677,11 +1631,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsfh.h.orig linux-2.6.34.noarch
   * This might allow a file to be confirmed to be in a writable part of a
 diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sunrpc/cache.c
 --- linux-2.6.34.noarch/net/sunrpc/cache.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 14:12:25.839376838 -0400
-=======
-+++ linux-2.6.34.noarch/net/sunrpc/cache.c	2010-08-23 09:57:23.995376793 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -49,11 +49,17 @@ static void cache_init(struct cache_head
  	h->last_refresh = now;
  }
@@ -1748,11 +1698,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/cache.c.orig linux-2.6.34.noarch/net/sun
  		/* entry is valid */
 diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/sunrpc/svcsock.c
 --- linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 14:12:25.840384371 -0400
-=======
-+++ linux-2.6.34.noarch/net/sunrpc/svcsock.c	2010-08-23 09:57:23.997368707 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -547,7 +547,6 @@ static int svc_udp_recvfrom(struct svc_r
  			dprintk("svc: recvfrom returned error %d\n", -err);
  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1819,11 +1765,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svcsock.c.orig linux-2.6.34.noarch/net/s
  error:
 diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/sunrpc/svc_xprt.c
 --- linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 14:12:25.841371223 -0400
-=======
-+++ linux-2.6.34.noarch/net/sunrpc/svc_xprt.c	2010-08-23 09:57:23.996377209 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, lon
  		if (rqstp->rq_deferred) {
  			svc_xprt_received(xprt);
@@ -1852,11 +1794,7 @@ diff -up linux-2.6.34.noarch/net/sunrpc/svc_xprt.c.orig linux-2.6.34.noarch/net/
  void svc_close_xprt(struct svc_xprt *xprt)
 diff -up linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
 --- linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 14:12:25.842376584 -0400
-=======
-+++ linux-2.6.34.noarch/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2010-08-23 09:57:23.998377481 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc
  		ret, rqstp->rq_arg.len,	rqstp->rq_arg.head[0].iov_base,
  		rqstp->rq_arg.head[0].iov_len);

From d7cf8e11508fe3b3aa3fd8bb68768938ae587158 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 24 Aug 2010 14:49:23 -0400
Subject: [PATCH 17/20] Removed the localversion-pnfs file from the pnfs patch

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |   2 +-
 pnfs-all-2.6.35-2010-08-19-f13.patch | 550 ---------------------------
 2 files changed, 1 insertion(+), 551 deletions(-)

diff --git a/kernel.spec b/kernel.spec
index f9c6ff212..f3e776e20 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs_all_2.6.35_2010_08_19
+%define buildid .pnfs34.2010.08.19
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
diff --git a/pnfs-all-2.6.35-2010-08-19-f13.patch b/pnfs-all-2.6.35-2010-08-19-f13.patch
index ecc100c30..10df9b15c 100644
--- a/pnfs-all-2.6.35-2010-08-19-f13.patch
+++ b/pnfs-all-2.6.35-2010-08-19-f13.patch
@@ -1,11 +1,6 @@
 diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-24 14:14:03.643355000 -0400
 +++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-24 14:17:48.415730000 -0400
-=======
---- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-23 12:08:27.310584826 -0400
-+++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-23 12:09:03.273553977 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -13,6 +13,7 @@
  #include <sys/stat.h>
  #include <sys/mman.h>
@@ -16,11 +11,7 @@ diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arc
  #include "os.h"
 diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
 --- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/block/genhd.c	2010-08-24 14:17:48.421730000 -0400
-=======
-+++ linux-2.6.34.noarch/block/genhd.c	2010-08-23 12:09:03.273553977 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -1009,6 +1009,7 @@ static void disk_release(struct device *
  struct class block_class = {
  	.name		= "block",
@@ -30,13 +21,8 @@ diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.
  static char *block_devnode(struct device *dev, mode_t *mode)
  {
 diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-24 14:17:48.423729000 -0400
 +++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-24 14:17:48.425730000 -0400
-=======
---- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-23 12:09:03.274563927 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-23 12:09:03.274563927 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,211 @@
 +(c) 2007 Network Appliance Inc.
 +
@@ -251,11 +237,7 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.
 +
 diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
 --- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-24 14:17:48.430730000 -0400
-=======
-+++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-23 12:09:03.275584050 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -651,6 +651,12 @@ static int dev_create(struct dm_ioctl *p
  	return r;
  }
@@ -310,11 +292,7 @@ diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/driv
  	int r;
 diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
 --- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-24 14:17:48.435733000 -0400
-=======
-+++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-23 12:09:03.276563906 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
  	put_device(&class_to_shost(dev)->shost_gendev);
  }
@@ -326,11 +304,7 @@ diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drive
  };
 diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
 --- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-24 14:17:48.440733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-23 12:09:03.277563890 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -36,13 +36,9 @@
  #include <linux/fs.h>
  #include <linux/time.h>
@@ -386,13 +360,8 @@ diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/
 +
  #endif
 diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-24 14:17:48.444731000 -0400
 +++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-24 14:17:48.446730000 -0400
-=======
---- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-23 12:09:03.278386746 -0400
-+++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-23 12:09:03.278386746 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,396 @@
 +/*
 + * export.c - Implementation of the pnfs_export_operations
@@ -792,11 +761,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs
 +}
 diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
 --- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-24 14:17:48.452730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-23 12:09:03.279502002 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
  	if (unlikely(wait_obj_created(oi)))
  		goto fail;
@@ -816,11 +781,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/
   * Fill in an inode read from the OSD and set it up for use
 diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
 --- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-24 14:17:48.457733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-23 12:09:03.279502002 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -13,4 +13,5 @@
  #
  
@@ -829,11 +790,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/K
  obj-$(CONFIG_EXOFS_FS) += exofs.o
 diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
 --- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-24 14:17:48.462739000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-23 12:09:03.280553663 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -1,6 +1,7 @@
  config EXOFS_FS
  	tristate "exofs: OSD based file system support"
@@ -844,11 +801,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/
  	  as its backing storage.
 diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
 --- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-24 14:17:48.468730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-23 12:09:03.281511951 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
  	sb->s_fs_info = sbi;
  	sb->s_op = &exofs_sops;
@@ -859,11 +812,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/
  		EXOFS_ERR("ERROR: exofs_iget failed\n");
 diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
 --- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-24 14:17:48.473730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-23 12:09:03.282511528 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -16,6 +16,13 @@
  #include <linux/namei.h>
  #include <linux/sched.h>
@@ -880,11 +829,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exp
  
 diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
 --- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-24 14:17:48.478733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-23 12:09:03.282511528 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -3,4 +3,7 @@
  
  obj-$(CONFIG_EXPORTFS) += exportfs.o
@@ -895,13 +840,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/ex
 +exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
 +exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-24 14:17:48.482731000 -0400
 +++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-24 14:17:48.484734000 -0400
-=======
---- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,158 @@
 +/*
 + *  linux/fs/nfsd/nfs4blocklayoutxdr.c
@@ -1062,13 +1002,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.
 +}
 +EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-24 14:17:48.487733000 -0400
 +++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-24 14:17:48.489734000 -0400
-=======
---- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-23 12:09:03.283511561 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-23 12:09:03.283511561 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,218 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -1289,13 +1224,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.n
 +}
 +EXPORT_SYMBOL(filelayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-24 14:17:48.493729000 -0400
 +++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-24 14:17:48.494735000 -0400
-=======
---- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-23 12:09:03.284511493 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-23 12:09:03.284511493 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,289 @@
 +/*
 + *  pnfs_osd_xdr_enc.c
@@ -1588,11 +1518,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.no
 +EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
 diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
 --- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-24 14:17:48.499730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-23 12:09:03.285539075 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -19,6 +19,7 @@
  #include <linux/gfs2_ondisk.h>
  #include <linux/slow-work.h>
@@ -1613,11 +1539,7 @@ diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gf
  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
 --- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/Kconfig	2010-08-24 14:17:48.505733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/Kconfig	2010-08-23 12:09:03.286512316 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -224,6 +224,31 @@ config LOCKD_V4
  config EXPORTFS
  	tristate
@@ -1651,13 +1573,8 @@ diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
  	tristate
  	select FS_POSIX_ACL
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-24 14:17:48.509734000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-24 14:17:48.511732000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-23 12:09:03.287381619 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-23 12:09:03.287381619 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,66 @@
 +#include <linux/module.h>
 +#include <linux/uaccess.h>
@@ -1726,13 +1643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.or
 +	return;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-24 14:17:48.514733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-24 14:17:48.516731000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-23 12:09:03.288501648 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-23 12:09:03.288501648 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1160 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.c
@@ -2895,13 +2807,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.
 +module_init(nfs4blocklayout_init);
 +module_exit(nfs4blocklayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-24 14:17:48.519731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-24 14:17:48.521730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-23 12:09:03.289501933 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-23 12:09:03.289501933 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,335 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdev.c
@@ -3239,13 +3146,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.
 +	goto out;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-24 14:17:48.523733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-24 14:17:48.525730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-23 12:09:03.290395707 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,120 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdm.c
@@ -3368,13 +3270,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.3
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-24 14:17:48.528729000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-24 14:17:48.529735000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-23 12:09:03.290395707 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-23 12:09:03.291501560 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,303 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -3680,13 +3577,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.
 +
 +#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-24 14:17:48.532731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-24 14:17:48.534734000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-23 12:09:03.292511531 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,948 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -4637,13 +4529,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noar
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-24 14:17:48.537729000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-24 14:17:48.538739000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-23 12:09:03.292511531 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-23 12:09:03.293491476 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,6 @@
 +#
 +# Makefile for the pNFS block layout driver kernel module
@@ -4653,11 +4540,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarc
 +			extents.o block-device-discovery-pipe.o
 diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
 --- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-24 14:17:48.544730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-23 12:09:03.293491476 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -8,6 +8,8 @@
  #ifndef __LINUX_FS_NFS_CALLBACK_H
  #define __LINUX_FS_NFS_CALLBACK_H
@@ -4730,11 +4613,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/c
  extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
 --- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-24 14:17:48.562731000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-23 12:09:03.294522414 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -8,10 +8,15 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -5217,11 +5096,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/
  	return status;
 diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
 --- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-24 14:17:48.568730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-23 12:09:03.295502055 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -22,6 +22,8 @@
  #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
  
@@ -5423,13 +5298,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/n
  		.process_op = (callback_process_op_t)nfs4_callback_sequence,
  		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-24 14:14:13.062705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-24 14:17:48.575730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-23 12:09:03.297501650 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -39,6 +39,7 @@
  #include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
@@ -5638,13 +5508,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/cli
  		goto error;
  
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-24 14:17:48.578729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-24 14:17:48.579735000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-23 12:09:03.297501650 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-23 12:09:03.298501447 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,292 @@
 +#if defined(CONFIG_SPNFS_BLOCK)
 +
@@ -5939,13 +5804,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +}
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-24 14:17:48.584729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-24 14:17:48.586730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-23 12:09:03.299501445 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-23 12:09:03.299501445 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1672 @@
 +/*
 + *  bl_ops.c
@@ -7620,13 +7480,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-24 14:14:13.068705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-24 14:17:48.592730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-23 12:08:29.037481540 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-23 12:09:03.300491952 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -104,7 +104,8 @@ again:
  			continue;
  		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
@@ -7703,11 +7558,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs
  	rcu_read_unlock();
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
 --- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-24 14:17:48.597733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-23 12:09:03.301431797 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -34,9 +34,7 @@ enum {
  int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
  void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -7720,13 +7571,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs
  
  struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-24 14:14:13.612707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-24 14:17:48.604730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-23 12:08:29.089481525 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-23 12:09:03.302511603 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -17,11 +17,19 @@
  #include <linux/module.h>
  #include <linux/exportfs.h>
@@ -7904,11 +7750,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  	svcauth_unix_purge();
 diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
 --- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-24 14:17:48.610730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-23 12:09:03.303491500 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
  	.rpc_release = nfs_direct_read_release,
  };
@@ -8154,11 +7996,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/dir
  		user_addr += bytes;
 diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
 --- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-24 14:17:48.616730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-23 12:09:03.304505472 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -79,3 +79,52 @@ config NFSD_V4
  	  available from http://linux-nfs.org/.
  
@@ -8214,11 +8052,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kc
 +	  If unsure, say N.
 diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
 --- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-24 14:17:48.621733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-23 12:09:03.304505472 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
@@ -8228,13 +8062,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/M
 +nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
 +nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-24 14:14:13.618705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-24 14:17:48.628730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-23 12:08:29.090501507 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-23 12:09:03.306491345 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -40,7 +40,6 @@
  
  #define NFSPROC4_CB_NULL 0
@@ -8774,13 +8603,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
 +}
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-24 14:17:48.633729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-24 14:17:48.641730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-23 12:09:03.307491492 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-23 12:09:03.308491262 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1679 @@
 +/******************************************************************************
 + *
@@ -10462,13 +10286,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfs
 +	return status;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-24 14:17:48.645731000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-24 14:17:48.647730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-23 12:09:03.309501439 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-23 12:09:03.309501439 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,461 @@
 +/******************************************************************************
 + *
@@ -10932,13 +10751,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/n
 +};
 +EXPORT_SYMBOL(pnfs_dlm_export_ops);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-24 14:17:48.651729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-24 14:17:48.652735000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-23 12:09:03.310501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-23 12:09:03.310501390 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,620 @@
 +/*
 +*  linux/fs/nfsd/nfs4pnfsds.c
@@ -11561,13 +11375,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nf
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-24 14:14:13.623707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-24 14:17:48.658733000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-23 12:08:29.091491685 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-23 12:09:03.311501496 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -34,10 +34,14 @@
   */
  #include <linux/file.h>
@@ -12042,13 +11851,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-24 14:14:13.632707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-24 14:17:48.667732000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-23 12:08:29.093491375 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-23 12:09:03.313491310 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -42,6 +42,8 @@
  #include "xdr4.h"
  #include "vfs.h"
@@ -12564,13 +12368,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
  }
  
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-24 14:14:13.639707000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-24 14:17:48.675730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-23 12:09:03.315491356 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -47,9 +47,14 @@
  #include <linux/nfsd_idmap.h>
  #include <linux/nfs4_acl.h>
@@ -13189,13 +12988,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
  	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
  	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-24 14:14:13.645705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-24 14:17:48.681730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-23 12:08:29.094491943 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-23 12:09:03.317501495 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -13,10 +13,15 @@
  #include <linux/nfsd/syscall.h>
  #include <linux/lockd/lockd.h>
@@ -13372,13 +13166,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  	remove_proc_entry("fs/nfs/exports", NULL);
  	remove_proc_entry("fs/nfs", NULL);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-24 14:14:13.651705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-24 14:17:48.687730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-23 12:08:29.095491390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-23 12:09:03.318355741 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
  #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
  	NFSD4_SUPPORTED_ATTRS_WORD0
@@ -13400,11 +13189,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-24 14:17:48.693730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-23 12:09:03.319511586 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -10,6 +10,7 @@
  #include <linux/exportfs.h>
  
@@ -13442,11 +13227,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nf
  		__u32 tfh[2];
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-24 14:17:48.698733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-23 12:09:03.319511586 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -14,6 +14,7 @@ enum nfsd_fsid {
  	FSID_UUID8,
  	FSID_UUID16,
@@ -13499,13 +13280,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nf
 +
  #endif /* _LINUX_NFSD_FH_INT_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-24 14:14:06.365163000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-24 14:17:48.704731000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-23 12:08:27.631563969 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-23 12:09:03.320416974 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
  
  };
@@ -13516,13 +13292,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/n
  int nfsd_vers(int vers, enum vers_op change)
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-24 14:17:48.708729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-24 14:17:48.710730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-23 12:09:03.321376171 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,143 @@
 +/*
 + *  Copyright (c) 2005 The Regents of the University of Michigan.
@@ -13668,13 +13439,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pn
 +
 +#endif /* LINUX_NFSD_PNFSD_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-24 14:17:48.713731000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-24 14:17:48.715730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-23 12:09:03.321376171 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-23 12:09:03.322501672 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,225 @@
 +/*
 + * linux/fs/nfsd/pnfs_lexp.c
@@ -13902,13 +13668,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nf
 +	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-24 14:17:48.719729000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-24 14:17:48.720735000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-23 12:09:03.322501672 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-23 12:09:03.323511608 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,535 @@
 +/*
 + * fs/nfsd/spnfs_com.c
@@ -14446,13 +14207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfs
 +}
 +#endif /* CONFIG_PROC_FS */
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-24 14:17:48.724733000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-24 14:17:48.726730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-23 12:09:03.324501390 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-23 12:09:03.324501390 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,878 @@
 +/*
 + * fs/nfsd/spnfs_ops.c
@@ -15333,13 +15089,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfs
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-24 14:14:13.656705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-24 14:17:48.731738000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-23 12:08:29.096512142 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-23 12:09:03.325501424 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -242,6 +242,12 @@ struct nfs4_client {
  	u32			cl_cb_seq_nr;
  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
@@ -15456,13 +15207,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
 +
  #endif   /* NFSD4_STATE_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-24 14:14:06.371160000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-24 14:17:48.737742000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-23 12:08:27.632564132 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-23 12:09:03.326501490 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -37,7 +37,12 @@
  #ifdef CONFIG_NFSD_V4
  #include <linux/nfs4_acl.h>
@@ -15589,13 +15335,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.
  out_nfserr:
  	err = nfserrno(host_err);
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-24 14:14:13.661705000 -0400
 +++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-24 14:17:48.743747000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-23 12:08:29.097425997 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-23 12:09:03.327451643 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -37,6 +37,8 @@
  #ifndef _LINUX_NFSD_XDR4_H
  #define _LINUX_NFSD_XDR4_H
@@ -15672,13 +15413,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  	struct nfs4_replay *			replay;
  };
 diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-24 14:14:13.079708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-24 14:17:48.749746000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-23 12:08:29.039491912 -0400
-+++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-23 12:09:03.328501680 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -28,6 +28,7 @@
  #include <linux/aio.h>
  #include <linux/gfp.h>
@@ -15804,13 +15540,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.
  	if (!ret)
  		return VM_FAULT_LOCKED;
 diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-24 14:14:13.095705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-24 14:17:48.757730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-23 12:09:03.329501644 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -48,6 +48,7 @@
  #include "internal.h"
  #include "fscache.h"
@@ -16024,13 +15755,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inod
  	nfs_fs_proc_exit();
  	nfsiod_stop();
 diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-24 14:14:13.100708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-24 14:17:48.763734000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-23 12:08:29.042511552 -0400
-+++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-23 12:09:03.330502148 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
  					   struct nfs_fattr *);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
@@ -16091,11 +15817,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/i
  		struct page *, struct page *);
 diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
 --- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-24 14:17:48.769730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-23 12:09:03.331395814 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -79,10 +79,48 @@ config NFS_V4_1
  	depends on NFS_V4 && EXPERIMENTAL
  	help
@@ -16148,11 +15870,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kcon
  	depends on NFS_FS=y && IP_PNP
 diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
 --- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-24 14:17:48.774730000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-23 12:09:03.331395814 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
  			   delegation.o idmap.o \
  			   callback.o callback_xdr.o callback_proc.o \
@@ -16167,13 +15885,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Mak
 +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 +obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-24 14:14:13.119708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-24 14:17:48.780730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-23 12:08:29.045525837 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-23 12:09:03.332511640 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
  	.dentry_ops	= &nfs_dentry_operations,
  	.dir_inode_ops	= &nfs3_dir_inode_operations,
@@ -16183,13 +15896,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.getattr	= nfs3_proc_getattr,
  	.setattr	= nfs3_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-24 14:17:48.784731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-24 14:17:48.786730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-23 12:09:03.333512111 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-23 12:09:03.334491472 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,765 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayout.c
@@ -16957,13 +16665,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs
 +module_init(nfs4filelayout_init);
 +module_exit(nfs4filelayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-24 14:17:48.790731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-24 14:17:48.792730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-23 12:09:03.334491472 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-23 12:09:03.335501543 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,636 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayoutdev.c
@@ -17602,13 +17305,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch
 +}
 +
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-24 14:17:48.795731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-24 14:17:48.796742000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-23 12:09:03.335501543 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-23 12:09:03.335501543 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,97 @@
 +/*
 + *  pnfs_nfs4filelayout.h
@@ -17708,13 +17406,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs
 +
 +#endif /* FS_NFS_NFS4FILELAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-24 14:14:13.130705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-24 14:17:48.802730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-23 12:08:29.047512264 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-23 12:09:03.336490079 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -45,8 +45,28 @@ enum nfs4_client_state {
  	NFS4CLNT_RECLAIM_NOGRACE,
  	NFS4CLNT_DELEGRETURN,
@@ -17863,13 +17556,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nf
  
  /* nfs4xdr.c */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-24 14:14:13.143709000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-24 14:17:48.811734000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-23 12:08:29.050481368 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-23 12:09:03.339481253 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -49,12 +49,15 @@
  #include <linux/mount.h>
  #include <linux/module.h>
@@ -19535,11 +19223,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.setattr	= nfs4_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
 --- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-24 14:17:48.818733000 -0400
-=======
-+++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-23 12:09:03.341491726 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -54,17 +54,17 @@
  void
  nfs4_renew_state(struct work_struct *work)
@@ -19562,13 +19246,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs
  	spin_lock(&clp->cl_lock);
  	lease = clp->cl_lease_time;
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-24 14:14:13.150705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-24 14:17:48.825730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-23 12:08:29.052491341 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-23 12:09:03.342373443 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -53,6 +53,9 @@
  #include "callback.h"
  #include "delegation.h"
@@ -19887,13 +19566,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/
  			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
  			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-24 14:14:13.159705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-24 14:17:48.834738000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-23 12:08:29.054481400 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-23 12:09:03.346481283 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -50,8 +50,11 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -21404,13 +21078,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nf
  };
  
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-24 14:17:48.839734000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-24 14:17:48.840742000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-23 12:09:03.348511665 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-23 12:09:03.348511665 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,11 @@
 +#
 +# Makefile for the pNFS Objects Layout Driver kernel module
@@ -21424,13 +21093,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs
 +panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
 +obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-24 14:17:48.843735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-24 14:17:48.845739000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-23 12:09:03.349501459 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-23 12:09:03.349501459 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,1087 @@
 +/*
 + *  objio_osd.c
@@ -22520,13 +22184,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noar
 +module_init(objlayout_init);
 +module_exit(objlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-24 14:17:48.848735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-24 14:17:48.851730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-23 12:09:03.350491564 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-23 12:09:03.350491564 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,790 @@
 +/*
 + *  objlayout.c
@@ -23319,13 +22978,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noar
 +	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
 +};
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-24 14:17:48.852735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-24 14:17:48.854746000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-23 12:09:03.351434439 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-23 12:09:03.351434439 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,171 @@
 +/*
 + *  objlayout.h
@@ -23499,13 +23153,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noar
 +
 +#endif /* _OBJLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-24 14:17:48.857735000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-24 14:17:48.860740000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-23 12:09:03.352501716 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-23 12:09:03.352501716 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,734 @@
 +/*
 + *  panfs_shim.c
@@ -24242,13 +23891,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noa
 +module_init(panlayout_init);
 +module_exit(panlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-24 14:17:48.863734000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-24 14:17:48.864730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-23 12:09:03.353501685 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-23 12:09:03.353501685 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,482 @@
 +/*
 + *  panfs_shim.h
@@ -24733,13 +24377,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noa
 +
 +#endif /* _PANLAYOUT_PANFS_SHIM_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-24 14:17:48.868731000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-24 14:17:48.869739000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-23 12:09:03.354501721 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-23 12:09:03.354501721 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,435 @@
 +/*
 + *  pnfs_osd_xdr.c
@@ -25177,13 +24816,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-24 14:14:13.169705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-24 14:17:48.875733000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-23 12:08:29.056411363 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-23 12:09:03.355511659 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  
@@ -25306,13 +24940,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/p
  				if (res == INT_MAX)
  					goto out;
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-24 14:17:48.880733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-24 14:17:48.883730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-23 12:09:03.356501413 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-23 12:09:03.357481204 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,2027 @@
 +/*
 + *  linux/fs/nfs/pnfs.c
@@ -27342,13 +26971,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +}
 +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-24 14:17:48.886733000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-24 14:17:48.887735000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-23 12:09:03.358501440 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-23 12:09:03.358501440 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,355 @@
 +/*
 + *  fs/nfs/pnfs.h
@@ -27706,13 +27330,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +
 +#endif /* FS_NFS_PNFS_H */
 diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-24 14:14:13.174707000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-24 14:17:48.893730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-23 12:09:03.359501471 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
  	fattr = nfs_alloc_fattr();
  	status = -ENOMEM;
@@ -27740,13 +27359,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.
  	.getattr	= nfs_proc_getattr,
  	.setattr	= nfs_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-24 14:14:13.179708000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-24 14:17:48.899733000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-23 12:08:29.057511533 -0400
-+++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-23 12:09:03.359501471 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -18,8 +18,12 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
@@ -27961,13 +27575,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.
  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
  read_complete:
 diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-24 14:14:13.186707000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-24 14:17:48.907729000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-23 12:08:29.059491391 -0400
-+++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-23 12:09:03.361501458 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -64,6 +64,7 @@
  #include "iostat.h"
  #include "internal.h"
@@ -28015,13 +27624,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/supe
  #endif
  
 diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-24 14:14:13.192705000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-24 14:17:48.913730000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-23 12:08:29.060501485 -0400
-+++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-23 12:09:03.362419975 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
  	struct nfs_unlinkdata *data = calldata;
  	struct nfs_server *server = NFS_SERVER(data->dir);
@@ -28032,13 +27636,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unl
  		return;
  	rpc_call_start(task);
 diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-24 14:14:06.360160000 -0400
 +++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-24 14:17:48.921712000 -0400
-=======
---- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-23 12:08:27.630563929 -0400
-+++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-23 12:09:03.364491337 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  #include <linux/nfs_page.h>
@@ -28727,11 +28326,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/writ
  int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
 --- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-24 14:17:48.933713000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-23 12:09:03.365501459 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -2,6 +2,7 @@
  #define LINUX_EXPORTFS_H 1
  
@@ -28804,13 +28399,8 @@ diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/i
 +#endif /* CONFIG_PNFSD */
  #endif /* LINUX_EXPORTFS_H */
 diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-24 14:17:48.945690000 -0400
 +++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-24 14:17:48.946693000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-23 12:09:03.367491365 -0400
-+++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-23 12:09:03.367491365 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,141 @@
 +#ifndef _LINUX_EXP_XDR_H
 +#define _LINUX_EXP_XDR_H
@@ -28954,13 +28544,8 @@ diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/in
 +}
 +#endif /* _LINUX_EXP_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-24 14:14:13.014707000 -0400
 +++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-24 14:17:48.961675000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-23 12:08:29.021511898 -0400
-+++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-23 12:09:03.369481147 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -387,6 +387,7 @@ struct inodes_stat_t {
  #include <asm/byteorder.h>
  
@@ -28979,11 +28564,7 @@ diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include
  	struct dentry		*s_root;
 diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
 --- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-24 14:17:48.974681000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-23 12:09:03.371491472 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -17,7 +17,10 @@
  
  #define NFS4_BITMAP_SIZE	2
@@ -29113,13 +28694,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/inclu
  #endif
  
 diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-24 14:17:48.986670000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-24 14:17:48.989666000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-23 12:09:03.372501550 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-23 12:09:03.372501550 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,330 @@
 +/*
 + *  include/linux/nfs4_pnfs.h
@@ -29452,13 +29028,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/
 +
 +#endif /* LINUX_NFS4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-24 14:17:48.998668000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-24 14:17:49.000665000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-23 12:09:03.373491892 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-23 12:09:03.374491393 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,101 @@
 +#ifndef NFSD4_BLOCK
 +#define NFSD4_BLOCK
@@ -29562,13 +29133,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarc
 +#endif /* NFSD4_BLOCK */
 +
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-24 14:17:49.012664000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-24 14:17:49.013671000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-23 12:09:03.375501481 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-23 12:09:03.375501481 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,345 @@
 +/*
 + * include/linux/nfsd4_spnfs.h
@@ -29917,11 +29483,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarc
 +#endif /* NFS_SPNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
 --- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-24 14:17:49.018668000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-23 12:09:03.376401789 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -29,6 +29,7 @@
  #ifdef __KERNEL__
  
@@ -29932,11 +29494,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch
   * Largest number of bytes we need to allocate for an NFS
 diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
 --- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-24 14:17:49.024673000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-23 12:09:03.376401789 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -32,6 +32,8 @@
  #define NFSDDBG_REPCACHE	0x0080
  #define NFSDDBG_XDR		0x0100
@@ -29948,11 +29506,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
 --- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-24 14:17:49.030665000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-23 12:09:03.377481954 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -100,6 +100,7 @@ struct svc_export {
  	uid_t			ex_anon_uid;
  	gid_t			ex_anon_gid;
@@ -29962,13 +29516,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarc
  	struct nfsd4_fs_locations ex_fslocs;
  	int			ex_nflavors;
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-24 14:17:49.033666000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-24 14:17:49.034665000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-23 12:09:03.377481954 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-23 12:09:03.378501747 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,132 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -30103,13 +29652,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.3
 +
 +#endif /* NFSD_NFS4LAYOUTXDR_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-24 14:17:49.037666000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-24 14:17:49.039665000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-23 12:09:03.378501747 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-23 12:09:03.378501747 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,54 @@
 +/******************************************************************************
 + *
@@ -30166,13 +29710,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-24 14:17:49.042666000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-24 14:17:49.044665000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-23 12:09:03.379487099 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-23 12:09:03.379487099 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,271 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -30447,11 +29986,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.n
 +#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
 --- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-24 14:17:49.049665000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-23 12:09:03.380502500 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -29,6 +29,7 @@
  /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
  #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
@@ -30489,13 +30024,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noar
  
  union nfsctl_res {
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-24 14:14:13.201710000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-24 14:17:49.063666000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-23 12:08:29.061494081 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-23 12:09:03.381511751 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -72,13 +72,20 @@ struct nfs_access_entry {
  	int			mask;
  };
@@ -30594,13 +30124,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/inc
  
  #ifdef __KERNEL__
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-24 14:14:13.206708000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-24 14:17:49.077665000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-23 12:09:03.383491395 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -15,6 +15,7 @@ struct nlm_host;
  struct nfs4_sequence_args;
  struct nfs4_sequence_res;
@@ -30675,11 +30200,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/
  	atomic_t active; /* Keep trace of any activity to this server */
 diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
 --- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-24 14:17:49.089668000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-23 12:09:03.384501540 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
  	NFSIOS_SHORTREAD,
  	NFSIOS_SHORTWRITE,
@@ -30692,11 +30213,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
 --- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-24 14:17:49.103665000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-23 12:09:03.385491518 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -39,6 +39,7 @@ struct nfs_page {
  	struct list_head	wb_list;	/* Defines state of page: */
  	struct page		*wb_page;	/* page to read in/write out */
@@ -30745,13 +30262,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/i
  			     struct inode *inode,
  			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
 diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-24 14:14:13.211708000 -0400
 +++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-24 14:17:49.116665000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-23 12:08:29.062501618 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-23 12:09:03.387491422 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -3,6 +3,8 @@
  
  #include <linux/nfsacl.h>
@@ -30903,13 +30415,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/in
  extern struct rpc_version	nfs_version3;
  extern struct rpc_version	nfs_version4;
 diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-24 14:17:49.128664000 -0400
 +++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-24 14:17:49.129670000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-23 12:09:03.388491527 -0400
-+++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-23 12:09:03.388491527 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,57 @@
 +#ifndef _PANFS_SHIM_API_H
 +#define _PANFS_SHIM_API_H
@@ -30969,13 +30476,8 @@ diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.no
 +
 +#endif /* _PANFS_SHIM_API_H */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-24 14:17:49.141664000 -0400
 +++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-24 14:17:49.142670000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-23 12:09:03.390501461 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-23 12:09:03.390501461 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,440 @@
 +/*
 + *  pnfs_osd_xdr.h
@@ -31418,13 +30920,8 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noar
 +
 +#endif /* __PNFS_OSD_XDR_H__ */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_xdr.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-24 14:17:49.153666000 -0400
 +++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-24 14:17:49.155665000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig	2010-08-23 12:09:03.391491550 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_xdr.h	2010-08-23 12:09:03.391491550 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,134 @@
 +/*
 + *  include/linux/pnfs_xdr.h
@@ -31562,11 +31059,7 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_xdr.h.orig linux-2.6.34.noarch/i
 +#endif /* LINUX_PNFS_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
 --- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-24 14:17:49.168668000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-23 12:09:03.393501437 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -8,6 +8,7 @@
  #ifndef __LINUX_POSIX_ACL_H
  #define __LINUX_POSIX_ACL_H
@@ -31577,11 +31070,7 @@ diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/
  #define ACL_UNDEFINED_ID	(-1)
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-24 14:17:49.174665000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-23 12:09:03.393501437 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -14,6 +14,8 @@
  /* size of an XDR encoding unit in bytes, i.e. 32bit */
  #define XDR_UNIT	(4)
@@ -31593,11 +31082,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.n
  
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-24 14:17:49.179667000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-23 12:09:03.394512138 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -3,6 +3,7 @@
  
  #ifdef __KERNEL__
@@ -31618,13 +31103,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.3
  
  struct rpc_pipe_ops {
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-24 14:17:49.183664000 -0400
 +++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-24 14:17:49.184674000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-23 12:09:03.394512138 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-23 12:09:03.395501822 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,111 @@
 +/*
 + *  Copyright (c) 2008 The Regents of the University of Michigan.
@@ -31739,11 +31219,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux
 +#endif /* _SIMPLE_RPC_PIPEFS_H_ */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
-<<<<<<< HEAD
 +++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-24 14:17:49.190665000 -0400
-=======
-+++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-23 12:09:03.395501822 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
  
  	return buf;
@@ -31787,13 +31263,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.n
 +}
  #endif /* SUNRPC_SVC_XPRT_H */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-24 14:14:13.258707000 -0400
 +++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-24 14:17:49.195672000 -0400
-=======
---- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-23 12:08:29.066475323 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-23 12:09:03.396464612 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
  	return p + 2;
  }
@@ -31816,20 +31287,9 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch
  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
  		unsigned int base, unsigned int len);
  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-<<<<<<< HEAD
 diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
 --- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
 +++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-24 14:17:49.204668000 -0400
-=======
-diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
---- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-23 12:09:03.396464612 -0400
-+++ linux-2.6.34.noarch/localversion-pnfs	2010-08-23 12:09:03.396464612 -0400
-@@ -0,0 +1 @@
-+-pnfs
-diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
---- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-23 12:09:03.397501662 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
  	    addr.o rpcb_clnt.o timer.o xdr.o \
@@ -31840,13 +31300,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/su
  sunrpc-$(CONFIG_PROC_FS) += stats.o
  sunrpc-$(CONFIG_SYSCTL) += sysctl.o
 diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-24 14:17:49.208664000 -0400
 +++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-24 14:17:49.209670000 -0400
-=======
---- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-23 12:09:03.398522348 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-23 12:09:03.398522348 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -0,0 +1,424 @@
 +/*
 + *  net/sunrpc/simple_rpc_pipefs.c
@@ -32273,13 +31728,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.no
 +}
 +EXPORT_SYMBOL(pipefs_generic_destroy_msg);
 diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
-<<<<<<< HEAD
 --- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-24 14:14:13.447705000 -0400
 +++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-24 14:17:49.215665000 -0400
-=======
---- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-23 12:08:29.081501640 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-23 12:09:03.399443371 -0400
->>>>>>> Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-19
 @@ -403,16 +403,14 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
  
  	/* Shift the tail first */

From c9fe5dbdd69fdb2b0598be1491c3f0c938fa359e Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Tue, 31 Aug 2010 20:57:01 -0400
Subject: [PATCH 18/20] - Updated to the latest pNFS tag:
 pnfs-all-2.6.35-2010-08-24

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |     8 +-
 pnfs-all-2.6.35-2010-08-24-f13.patch | 31778 +++++++++++++++++++++++++
 2 files changed, 31783 insertions(+), 3 deletions(-)
 create mode 100644 pnfs-all-2.6.35-2010-08-24-f13.patch

diff --git a/kernel.spec b/kernel.spec
index f3e776e20..6b9632cdd 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs34.2010.08.19
+%define buildid .pnfs34.2010.08.24
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
@@ -818,7 +818,7 @@ Patch12581: xen-use-percpu-interrupts-for-ipis-and-virqs.patch
 
 Patch30000: nfs-35-fc.patch
 Patch30001: nfsd-35-fc.patch
-Patch30002: pnfs-all-2.6.35-2010-08-19-f13.patch
+Patch30002: pnfs-all-2.6.35-2010-08-24-f13.patch
 Patch30003: linux-2.6-pnfs-compile.patch
 Patch30004: linux-2.6.35-inline.patch
 
@@ -1551,7 +1551,7 @@ ApplyPatch xen-use-percpu-interrupts-for-ipis-and-virqs.patch
 
 ApplyPatch nfs-35-fc.patch  
 ApplyPatch nfsd-35-fc.patch  
-ApplyPatch pnfs-all-2.6.35-2010-08-19-f13.patch
+ApplyPatch pnfs-all-2.6.35-2010-08-24-f13.patch
 ApplyPatch linux-2.6-pnfs-compile.patch
 ApplyPatch linux-2.6.35-inline.patch
 # END OF PATCH APPLICATIONS
@@ -2256,6 +2256,8 @@ fi
 * Wed Sep 01 2010 Chuck Ebbert <cebbert@redhat.com>  2.6.34.6-48
 - Revert commit 6a1a82df91fa0eb1cc76069a9efe5714d087eccd from 2.6.34.1;
   it breaks ftdi_sio (#613597)
+* Tue Aug 31 2010 Steve Dickson <steved@redhat.com>
+- Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-08-24
 
 * Fri Aug 27 2010 Chuck Ebbert <cebbert@redhat.com>  2.6.34.6-47
 - Linux 2.6.34.6
diff --git a/pnfs-all-2.6.35-2010-08-24-f13.patch b/pnfs-all-2.6.35-2010-08-24-f13.patch
new file mode 100644
index 000000000..17d1c844d
--- /dev/null
+++ b/pnfs-all-2.6.35-2010-08-24-f13.patch
@@ -0,0 +1,31778 @@
+diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-31 20:41:16.924243041 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-31 20:42:05.486160576 -0400
+@@ -13,6 +13,7 @@
+ #include <sys/stat.h>
+ #include <sys/mman.h>
+ #include <sys/param.h>
++#include <sys/stat.h>
+ #include "init.h"
+ #include "kern_constants.h"
+ #include "os.h"
+diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
+--- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-08-31 20:42:05.487160201 -0400
+@@ -1009,6 +1009,7 @@ static void disk_release(struct device *
+ struct class block_class = {
+ 	.name		= "block",
+ };
++EXPORT_SYMBOL(block_class);
+ 
+ static char *block_devnode(struct device *dev, mode_t *mode)
+ {
+diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-31 20:42:05.486160576 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-31 20:42:05.486160576 -0400
+@@ -0,0 +1,211 @@
++(c) 2007 Network Appliance Inc.
++
++spNFS
++-----
++
++An spNFS system consists of a Meta Data Server (MDS), a number of Client machines (C) and a number of Data Servers (DS).
++
++A file system is mounted by the clients from the MDS, and all file data
++is striped across the DSs.
++
++Identify the machines that will be filling each of these roles.
++
++The spnfs kernel will be installed on all machines: clients, the MDS and DSs.
++
++
++Building and installing the spNFS kernel
++----------------------------------------
++
++Get the spNFS kernel from:
++
++	git://linux-nfs.org/~bhalevy/linux-pnfs.git
++
++Use the pnfs-all-latest branch and add these options to your .config file
++
++	CONFIG_NETWORK_FILESYSTEMS=y
++	CONFIG_NFS_FS=m
++	CONFIG_NFS_V4=y
++	CONFIG_NFS_V4_1=y
++	CONFIG_PNFS=y
++	CONFIG_NFSD=m
++	CONFIG_PNFSD=y
++	# CONFIG_PNFSD_LOCAL_EXPORT is not set
++	CONFIG_SPNFS=y
++
++By default, spNFS uses whole-file layouts.  Layout segments can be enabled
++by adding:
++
++	CONFIG_SPNFS_LAYOUTSEGMENTS=y
++
++to your .config file.
++
++Building and installation of kernel+modules is as usual.
++This kernel should be installed and booted on the client, MDS and DSs.
++
++Note that CONFIG_PNFSD_LOCAL_EXPORT must be disabled for spnfs as it
++takes over the pnfs export interface.
++
++Building nfs-utils
++------------------
++
++Get the nfs-utils package containing spnfsd from:
++
++	git://linux-nfs.org/~bhalevy/pnfs-nfs-utils.git
++
++Follow the standard instructions for building nfs-utils.
++
++After building, the spnfsd daemon will be located in utils/spnfsd.  The spnfsd
++daemon will only be needed on the MDS.
++
++
++Installation
++------------
++
++The nfs-utils package contains a default spnfsd.conf file in
++utils/spnfsd/spnfsd.conf.  Copy this file to /etc/spnfsd.conf.
++
++By default, the DS-Mount-Directory is set to /spnfs (see spnfsd.conf).  Under
++this directory, mount points must be created for each DS to
++be used for pNFS data stripes.  These mount points are named by the ip address
++of the corresponding DS.  In the sample spnfsd.conf, there are two
++DSs defined (172.16.28.134 and 172.16.28.141).
++
++Following the sample spnfsd.conf,
++
++	mkdir /spnfs
++
++on the MDS (corresponding to DS-Mount-Directory).  Then
++
++	mkdir /spnfs/172.16.28.134
++	mkdir /spnfs/172.16.28.141
++
++to create the mount points for the DSs.
++
++On the DSs, chose a directory where data stripes will be created by the MDS.
++For the sample file, this directory is /pnfs, so on each DS execute:
++
++	mkdir /pnfs
++
++This directory is specified in the spnfsd.conf file by the DS*_ROOT option
++(where * is replaced by the DS number).  DS_ROOT is specified relative to
++the directory being exported by the DSs.  In our example, our DSs are exporting
++the root directory (/) and therefore our DS_ROOT is /pnfs.  On the DSs, we have
++the following entry in /etc/exports:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check)
++
++N.B. If we had created a /exports directory and a /pnfs directory under
++/exports, and if we were exporting /exports, then DS_ROOT would still be /pnfs
++(not /exports/pnfs).
++
++It may be useful to add entries to /etc/fstab on the MDS to automatically
++mount the DS_ROOT file systems.  For this example, our MDS fstab would
++contain:
++
++	172.17.84.128:/pnfs /spnfs/172.17.84.128 nfs    defaults        1 2
++	172.17.84.122:/pnfs /spnfs/172.17.84.122 nfs    defaults        1 2
++
++The DS mounts must be performed manually or via fstab at this time (automatic
++mounting, directory creation, etc. are on the todo list).  To perform I/O
++through the MDS, the DS mounts MUST use NFSv3 at this time (this restriction
++will eventually be removed).
++
++
++On the MDS, choose a file system to use with spNFS and export it, e.g.:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check,pnfs)
++
++Make sure nfsd and all supporting processes are running on the MDS and DSs.
++
++
++Running
++-------
++
++If rpc_pipefs is not already mounted (if you're running idmapd it probably is),
++you may want to add the following line to /etc/fstab:
++
++	rpc_pipefs    /var/lib/nfs/rpc_pipefs rpc_pipefs defaults     0 0
++
++to automatically mount rpc_pipefs.
++
++With spnfsd.conf configured for your environment and the mounts mounted as
++described above, spnfsd can now be started.
++
++On the MDS, execute spnfsd:
++
++	spnfsd
++
++The executable is located in the directory where it was built, and
++may also have been installed elsewhere depending on how you built nfs-utils.
++It will run in the foreground by default, and in fact will do so despite
++any options suggesting the contrary (it's still a debugging build).
++
++On the client, make sure the nfslayoutdriver module is loaded:
++
++	modprobe nfslayoutdriver
++
++Then mount the file system from the MDS:
++
++	mount -t nfs4 -o minorversion=1 mds:/ /mnt
++
++I/O through the MDS is now supported.  To use it, do not load the
++nfslayoutdriver on the client, and mount the MDS using NFSv4 or 4.1
++(NFSv2 and v3 are not yet supported).
++
++You may now use spNFS by performing file system activities in /mnt.
++If you create files in /mnt, you should see stripe files corresponding to
++new files being created on the DSs.  The current implementation names the
++stripe files based on the inode number of the file on the MDS.  For example,
++if you create a file foo in /mnt and do an 'ls -li /mnt/foo':
++
++	# ls -li foo
++	1233 -rw-r--r-- 1 root root 0 Nov 29 15:54 foo
++
++You should see stripe files on each under /pnfs (per the sample) named
++1233.  The file /pnfs/1233 on DS1 will contain the first <stripe size> bytes
++of data written to foo, DS2 will contain the next <stripe size> bytes, etc.
++Removing /mnt/foo will remove the corresponding stripe files on the DSs.
++Other file system operations should behave (mostly :-) as expected.
++
++
++Layout Segments
++---------------
++
++If the kernel is compiled to support layout segments, there will
++be two files created under /proc/fs/spnfs for controlling layout
++segment functionality.
++
++To enable layout segments, write a '1' to /proc/fs/spnfs/layoutseg, e.g.:
++
++	echo 1 > /proc/fs/spnfs/layoutseg
++
++Layout segments can be disabled (returning to whole-file layouts) by
++writing a '0' to /proc/fs/spnfs/layoutseg:
++
++	echo 0 > /proc/fs/spnfs/layoutseg
++
++When layout segments are enabled, the size of the layouts returned can
++be specified by writing a decimal number (ascii representation) to
++/proc/fs/spnfs/layoutsegsize:
++
++	echo 1024 > /proc/fs/spnfs/layoutsegsize
++
++The value'0' has a special meaning--it causes the server to return a
++layout that is exactly the size requested by the client:
++
++	echo 0 > /proc/fs/spnfs/layoutsegsize
++
++
++Troubleshooting
++---------------
++
++If you see data being written to the files on the MDS rather than
++the stripe files, make sure the nfslayoutdriver is loaded on the client
++(see above).
++
++If you get a "permission denied" error, make sure mountd is running on the mds
++(it occasionally fails to start).
++
++Bugs, enhancements, compliments, complaints to: dmuntz@netapp.com
++
++
+diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-08-31 20:41:17.063232968 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-31 20:42:05.488160560 -0400
+@@ -657,6 +657,12 @@ static int dev_create(struct dm_ioctl *p
+ 	return r;
+ }
+ 
++int dm_dev_create(struct dm_ioctl *param)
++{
++	return dev_create(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_create);
++
+ /*
+  * Always use UUID for lookups if it's present, otherwise use name or dev.
+  */
+@@ -751,6 +757,12 @@ static int dev_remove(struct dm_ioctl *p
+ 	return 0;
+ }
+ 
++int dm_dev_remove(struct dm_ioctl *param)
++{
++	return dev_remove(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_remove);
++
+ /*
+  * Check a string doesn't overrun the chunk of
+  * memory we copied from userland.
+@@ -923,6 +935,12 @@ static int do_resume(struct dm_ioctl *pa
+ 	return r;
+ }
+ 
++int dm_do_resume(struct dm_ioctl *param)
++{
++	return do_resume(param);
++}
++EXPORT_SYMBOL(dm_do_resume);
++
+ /*
+  * Set or unset the suspension state of a device.
+  * If the device already is in the requested state we just return its status.
+@@ -1200,6 +1218,12 @@ out:
+ 	return r;
+ }
+ 
++int dm_table_load(struct dm_ioctl *param, size_t param_size)
++{
++	return table_load(param, param_size);
++}
++EXPORT_SYMBOL(dm_table_load);
++
+ static int table_clear(struct dm_ioctl *param, size_t param_size)
+ {
+ 	int r;
+diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
+--- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-31 20:42:05.489160594 -0400
+@@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
+ 	put_device(&class_to_shost(dev)->shost_gendev);
+ }
+ 
+-static struct class shost_class = {
++struct class shost_class = {
+ 	.name		= "scsi_host",
+ 	.dev_release	= scsi_host_cls_release,
+ };
+diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
+--- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-31 20:42:05.492243039 -0400
+@@ -36,13 +36,9 @@
+ #include <linux/fs.h>
+ #include <linux/time.h>
+ #include <linux/backing-dev.h>
++#include <linux/pnfs_osd_xdr.h>
+ #include "common.h"
+ 
+-/* FIXME: Remove once pnfs hits mainline
+- * #include <linux/exportfs/pnfs_osd_xdr.h>
+- */
+-#include "pnfs.h"
+-
+ #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
+ 
+ #ifdef CONFIG_EXOFS_DEBUG
+@@ -103,6 +99,7 @@ struct exofs_sb_info {
+ struct exofs_i_info {
+ 	struct inode   vfs_inode;          /* normal in-memory inode          */
+ 	wait_queue_head_t i_wq;            /* wait queue for inode            */
++	spinlock_t     i_layout_lock;      /* lock for layout/return/recall   */
+ 	unsigned long  i_flags;            /* various atomic flags            */
+ 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
+ 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
+@@ -166,6 +163,9 @@ static inline unsigned exofs_io_state_si
+  */
+ #define OBJ_2BCREATED	0	/* object will be created soon*/
+ #define OBJ_CREATED	1	/* object has been created on the osd*/
++/* Below are not used atomic but reuse the same i_flags */
++#define OBJ_LAYOUT_IS_GIVEN  2  /* inode has given layouts to clients*/
++#define OBJ_IN_LAYOUT_RECALL 3  /* inode is in the middle of a layout recall*/
+ 
+ static inline int obj_2bcreated(struct exofs_i_info *oi)
+ {
+@@ -304,4 +304,20 @@ extern const struct inode_operations exo
+ extern const struct inode_operations exofs_symlink_inode_operations;
+ extern const struct inode_operations exofs_fast_symlink_inode_operations;
+ 
++/* export.c */
++typedef int (exofs_recall_fn)(struct inode *inode);
++#ifdef CONFIG_PNFSD
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo);
++void exofs_init_export(struct super_block *sb);
++#else
++static inline int exofs_inode_recall_layout(struct inode *inode,
++				enum pnfs_iomode iomode, exofs_recall_fn todo)
++{
++	return todo(inode);
++}
++
++static inline void exofs_init_export(struct super_block *sb) {}
++#endif
++
+ #endif
+diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-31 20:42:05.493222759 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-31 20:42:05.493222759 -0400
+@@ -0,0 +1,396 @@
++/*
++ * export.c - Implementation of the pnfs_export_operations
++ *
++ * Copyright (C) 2009 Panasas Inc.
++ * All rights reserved.
++ *
++ * Boaz Harrosh <bharrosh@panasas.com>
++ *
++ * This file is part of exofs.
++ *
++ * exofs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation.  Since it is based on ext2, and the only
++ * valid version of GPL for the Linux kernel is version 2, the only valid
++ * version of GPL for exofs is version 2.
++ *
++ * exofs is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with exofs; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include "exofs.h"
++
++static int exofs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_OSD2_OBJECTS;
++}
++
++static void set_dev_id(struct pnfs_deviceid *pnfs_devid, u64 sbid, u64 devid)
++{
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)pnfs_devid;
++
++	dev_id->sbid  = sbid;
++	dev_id->devid = devid;
++}
++
++static int cb_layout_recall(struct inode *inode, enum pnfs_iomode iomode,
++			    u64 offset, u64 length, void *cookie)
++{
++	struct nfsd4_pnfs_cb_layout cbl;
++	struct pnfsd_cb_ctl cb_ctl;
++	int status;
++
++	memset(&cb_ctl, 0, sizeof(cb_ctl));
++	status = pnfsd_get_cb_op(&cb_ctl);
++	if (unlikely(status)) {
++		EXOFS_ERR("%s: nfsd unloaded!! inode (0x%lx) status=%d\n",
++			  __func__, inode->i_ino, status);
++		goto err;
++	}
++
++	memset(&cbl, 0, sizeof(cbl));
++	cbl.cbl_recall_type = RETURN_FILE;
++	cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
++	cbl.cbl_seg.iomode = iomode;
++	cbl.cbl_seg.offset = offset;
++	cbl.cbl_seg.length = length;
++	cbl.cbl_cookie = cookie;
++
++	status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
++	pnfsd_put_cb_op(&cb_ctl);
++
++err:
++	return status;
++}
++
++static enum nfsstat4 exofs_layout_get(
++	struct inode *inode,
++	struct exp_xdr_stream *xdr,
++	const struct nfsd4_pnfs_layoutget_arg *args,
++	struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
++	struct exofs_layout *el = &sbi->layout;
++	struct pnfs_osd_object_cred *creds = NULL;
++	struct pnfs_osd_layout layout;
++	__be32 *start;
++	bool in_recall;
++	int i, err;
++	enum nfsstat4 nfserr;
++
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	res->lg_seg.iomode = IOMODE_RW;
++	res->lg_return_on_close = true; /* TODO: unused but will be soon */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	creds = kcalloc(el->s_numdevs, sizeof(*creds), GFP_KERNEL);
++	if (!creds) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto out;
++	}
++
++	/* Fill in a pnfs_osd_layout struct */
++	layout.olo_map = sbi->data_map;
++
++	for (i = 0; i < el->s_numdevs; i++) {
++		struct pnfs_osd_object_cred *cred = &creds[i];
++		osd_id id = exofs_oi_objno(oi);
++		unsigned dev = exofs_layout_od_id(el, id, i);
++
++		set_dev_id(&cred->oc_object_id.oid_device_id, args->lg_sbid,
++			   dev);
++		cred->oc_object_id.oid_partition_id = el->s_pid;
++		cred->oc_object_id.oid_object_id = id;
++		cred->oc_osd_version = osd_dev_is_ver1(el->s_ods[dev]) ?
++						PNFS_OSD_VERSION_1 :
++						PNFS_OSD_VERSION_2;
++		cred->oc_cap_key_sec = PNFS_OSD_CAP_KEY_SEC_NONE;
++
++		cred->oc_cap_key.cred_len	= 0;
++		cred->oc_cap_key.cred		= NULL;
++
++		cred->oc_cap.cred_len	= OSD_CAP_LEN;
++		cred->oc_cap.cred	= oi->i_cred;
++	}
++
++	layout.olo_comps_index = 0;
++	layout.olo_num_comps = el->s_numdevs;
++	layout.olo_comps = creds;
++
++	err = pnfs_osd_xdr_encode_layout(xdr, &layout);
++	if (err) {
++		nfserr = NFS4ERR_TOOSMALL; /* FIXME: Change osd_xdr error codes */
++		goto out;
++	}
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	if (!in_recall) {
++		__set_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		nfserr = NFS4_OK;
++	} else {
++		nfserr = NFS4ERR_RECALLCONFLICT;
++	}
++	spin_unlock(&oi->i_layout_lock);
++
++out:
++	kfree(creds);
++	EXOFS_DBGMSG("(0x%lx) nfserr=%u xdr_bytes=%zu\n",
++		     inode->i_ino, nfserr, exp_xdr_qbytes(xdr->p - start));
++	return nfserr;
++}
++
++/* NOTE: inode mutex must NOT be held */
++static int exofs_layout_commit(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutcommit_arg *args,
++	struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct timespec mtime;
++	loff_t i_size;
++	int in_recall;
++
++	/* In case of a recall we ignore the new size and mtime since they
++	 * are going to be changed again by truncate, and since we cannot take
++	 * the inode lock in that case.
++	 */
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	if (in_recall) {
++		EXOFS_DBGMSG("(0x%lx) commit was called during recall\n",
++			     inode->i_ino);
++		return 0;
++	}
++
++	/* NOTE: I would love to call inode_setattr here
++	 *	 but i cannot since this will cause an eventual vmtruncate,
++	 *	 which will cause a layout_recall. So open code the i_size
++	 *	 and mtime/atime changes under i_mutex.
++	 */
++	mutex_lock_nested(&inode->i_mutex, I_MUTEX_NORMAL);
++
++	if (args->lc_mtime.seconds) {
++		mtime.tv_sec = args->lc_mtime.seconds;
++		mtime.tv_nsec = args->lc_mtime.nseconds;
++
++		/* layout commit may only make time bigger, since there might
++		 * be reordering of the notifications and it might arrive after
++		 * A local change.
++		 * TODO: if mtime > ctime then we know set_attr did an mtime
++		 * in the future. and we can let this update through
++		 */
++		if (0 <= timespec_compare(&mtime, &inode->i_mtime))
++			mtime = inode->i_mtime;
++	} else {
++		mtime = current_fs_time(inode->i_sb);
++	}
++
++	/* TODO: Will below work? since mark_inode_dirty has it's own
++	 *       Time handling
++	 */
++	inode->i_atime = inode->i_mtime = mtime;
++
++	i_size = i_size_read(inode);
++	if (args->lc_newoffset) {
++		loff_t new_size = args->lc_last_wr + 1;
++
++		if (i_size < new_size) {
++			i_size_write(inode, i_size = new_size);
++			res->lc_size_chg = 1;
++			res->lc_newsize = new_size;
++		}
++	}
++	/* TODO: else { i_size = osd_get_object_length() } */
++
++/* TODO: exofs does not currently use the osd_xdr part of the layout_commit */
++
++	mark_inode_dirty_sync(inode);
++
++	mutex_unlock(&inode->i_mutex);
++	EXOFS_DBGMSG("(0x%lx) i_size=0x%llx lcp->off=0x%llx\n",
++		     inode->i_ino, i_size, args->lc_last_wr);
++	return 0;
++}
++
++static void exofs_handle_error(struct pnfs_osd_ioerr *ioerr)
++{
++	EXOFS_ERR("exofs_handle_error: errno=%d is_write=%d obj=0x%llx "
++		  "offset=0x%llx length=0x%llx\n",
++		  ioerr->oer_errno, ioerr->oer_iswrite,
++		  _LLU(ioerr->oer_component.oid_object_id),
++		  _LLU(ioerr->oer_comp_offset),
++		  _LLU(ioerr->oer_comp_length));
++}
++
++static int exofs_layout_return(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	__be32 *p = args->lrf_body;
++	unsigned len = exp_xdr_qwords(args->lrf_body_len);
++
++	EXOFS_DBGMSG("(0x%lx) cookie %p xdr_len %d\n",
++		     inode->i_ino, args->lr_cookie, len);
++
++	while (len >= pnfs_osd_ioerr_xdr_sz()) {
++		struct pnfs_osd_ioerr ioerr;
++
++		p = pnfs_osd_xdr_decode_ioerr(&ioerr, p);
++		len -= pnfs_osd_ioerr_xdr_sz();
++		exofs_handle_error(&ioerr);
++	}
++
++	if (args->lr_cookie) {
++		struct exofs_i_info *oi = exofs_i(inode);
++		bool in_recall;
++
++		spin_lock(&oi->i_layout_lock);
++		in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++		__clear_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		spin_unlock(&oi->i_layout_lock);
++
++		/* TODO: how to communicate cookie with the waiter */
++		if (in_recall)
++			wake_up(&oi->i_wq); /* wakeup any recalls */
++	}
++
++	return 0;
++}
++
++int exofs_get_device_info(struct super_block *sb, struct exp_xdr_stream *xdr,
++			  u32 layout_type,
++			  const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct exofs_sb_info *sbi = sb->s_fs_info;
++	struct pnfs_osd_deviceaddr devaddr;
++	const struct osd_dev_info *odi;
++	u64 devno = devid->devid;
++	__be32 *start;
++	int err;
++
++	memset(&devaddr, 0, sizeof(devaddr));
++
++	if (unlikely(devno >= sbi->layout.s_numdevs))
++		return -ENODEV;
++
++	odi = osduld_device_info(sbi->layout.s_ods[devno]);
++
++	devaddr.oda_systemid.len = odi->systemid_len;
++	devaddr.oda_systemid.data = (void *)odi->systemid; /* !const cast */
++
++	devaddr.oda_osdname.len = odi->osdname_len ;
++	devaddr.oda_osdname.data = (void *)odi->osdname;/* !const cast */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		err = -E2BIG;
++		goto err;
++	}
++
++	err = pnfs_osd_xdr_encode_deviceaddr(xdr, &devaddr);
++	if (err)
++		goto err;
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	EXOFS_DBGMSG("xdr_bytes=%Zu devno=%lld osdname-%s\n",
++		     exp_xdr_qbytes(xdr->p - start), devno, odi->osdname);
++	return 0;
++
++err:
++	EXOFS_DBGMSG("Error: err=%d at_byte=%zu\n",
++		     err, exp_xdr_qbytes(xdr->p - start));
++	return err;
++}
++
++struct pnfs_export_operations exofs_pnfs_ops = {
++	.layout_type	= exofs_layout_type,
++	.layout_get	= exofs_layout_get,
++	.layout_commit	= exofs_layout_commit,
++	.layout_return	= exofs_layout_return,
++	.get_device_info = exofs_get_device_info,
++};
++
++static bool is_layout_returned(struct exofs_i_info *oi)
++{
++	bool layout_given;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	return !layout_given;
++}
++
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	int layout_given;
++	int error = 0;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	__set_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	if (!layout_given)
++		goto exec;
++
++	for (;;) {
++		EXOFS_DBGMSG("(0x%lx) has_layout issue a recall\n",
++			     inode->i_ino);
++		error = cb_layout_recall(inode, iomode, 0, NFS4_MAX_UINT64,
++					 &oi->i_wq);
++		switch (error) {
++		case 0:
++		case -EAGAIN:
++			break;
++		case -ENOENT:
++			goto exec;
++		default:
++			goto err;
++		}
++
++		error = wait_event_interruptible(oi->i_wq,
++						 is_layout_returned(oi));
++		if (error)
++			goto err;
++	}
++
++exec:
++	error = todo(inode);
++
++err:
++	spin_lock(&oi->i_layout_lock);
++	__clear_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	EXOFS_DBGMSG("(0x%lx) return=>%d\n", inode->i_ino, error);
++	return error;
++}
++
++void exofs_init_export(struct super_block *sb)
++{
++	sb->s_pnfs_op = &exofs_pnfs_ops;
++}
+diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
+--- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-31 20:42:05.494222756 -0400
+@@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
+ 	if (unlikely(wait_obj_created(oi)))
+ 		goto fail;
+ 
+-	ret = _do_truncate(inode);
++	ret = exofs_inode_recall_layout(inode, IOMODE_ANY, _do_truncate);
+ 	if (ret)
+ 		goto fail;
+ 
+@@ -964,6 +964,7 @@ static void __oi_init(struct exofs_i_inf
+ {
+ 	init_waitqueue_head(&oi->i_wq);
+ 	oi->i_flags = 0;
++	spin_lock_init(&oi->i_layout_lock);
+ }
+ /*
+  * Fill in an inode read from the OSD and set it up for use
+diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
+--- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-31 20:42:05.490222933 -0400
+@@ -13,4 +13,5 @@
+ #
+ 
+ exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
++exofs-$(CONFIG_PNFSD) +=  export.o
+ obj-$(CONFIG_EXOFS_FS) += exofs.o
+diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
+--- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-31 20:42:05.491232880 -0400
+@@ -1,6 +1,7 @@
+ config EXOFS_FS
+ 	tristate "exofs: OSD based file system support"
+ 	depends on SCSI_OSD_ULD
++	select EXPORTFS_OSD_LAYOUT if PNFSD
+ 	help
+ 	  EXOFS is a file system that uses an OSD storage device,
+ 	  as its backing storage.
+diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
+--- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-31 20:42:05.496073173 -0400
+@@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
+ 	sb->s_fs_info = sbi;
+ 	sb->s_op = &exofs_sops;
+ 	sb->s_export_op = &exofs_export_ops;
++	exofs_init_export(sb);
+ 	root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
+ 	if (IS_ERR(root)) {
+ 		EXOFS_ERR("ERROR: exofs_iget failed\n");
+diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
+--- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-31 20:42:05.497212975 -0400
+@@ -16,6 +16,13 @@
+ #include <linux/namei.h>
+ #include <linux/sched.h>
+ 
++#if defined(CONFIG_PNFSD)
++struct pnfsd_cb_ctl pnfsd_cb_ctl = {
++	.lock = __SPIN_LOCK_UNLOCKED(pnfsd_cb_ctl.lock)
++};
++EXPORT_SYMBOL(pnfsd_cb_ctl);
++#endif /* CONFIG_PNFSD */
++
+ #define dprintk(fmt, args...) do{}while(0)
+ 
+ 
+diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
+--- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-31 20:42:05.496073173 -0400
+@@ -3,4 +3,7 @@
+ 
+ obj-$(CONFIG_EXPORTFS) += exportfs.o
+ 
+-exportfs-objs := expfs.o
++exportfs-y				:= expfs.o
++exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT)	+= nfs4filelayoutxdr.o
++exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
++exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-31 20:42:05.497212975 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+@@ -0,0 +1,158 @@
++/*
++ *  linux/fs/nfsd/nfs4blocklayoutxdr.c
++ *
++ *
++ *  Created by Rick McNeal on 3/31/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++#include <linux/module.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++static int
++bl_encode_simple(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  12 + 4 + bld->u.simple.bld_sig_len);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u32(p, 1);
++	p = exp_xdr_encode_u64(p, bld->u.simple.bld_offset);
++	exp_xdr_encode_opaque(p, bld->u.simple.bld_sig,
++			      bld->u.simple.bld_sig_len);
++
++	return 0;
++}
++
++static int
++bl_encode_slice(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2 + 2 + 1);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_start);
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_len);
++	exp_xdr_encode_u32(p, bld->u.slice.bld_index);
++
++	return 0;
++}
++
++static int
++bl_encode_concat(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	return -ENOTSUPP;
++}
++
++static int
++bl_encode_stripe(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	int i;
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  2 + 1 + bld->u.stripe.bld_stripes);
++
++	p = exp_xdr_encode_u64(p, bld->u.stripe.bld_chunk_size);
++	p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripes);
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++)
++		p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripe_indexs[i]);
++
++	return 0;
++}
++
++int
++blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			   const struct list_head *volumes)
++{
++	u32				num_vols	= 0,
++					*layoutlen_p	= xdr->p;
++	pnfs_blocklayout_devinfo_t	*bld;
++	int				status		= 0;
++	__be32 *p;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -ETOOSMALL;
++	p += 2;
++
++	/*
++	 * All simple volumes with their signature are required to be listed
++	 * first.
++	 */
++	list_for_each_entry(bld, volumes, bld_list) {
++		num_vols++;
++		p = exp_xdr_reserve_qwords(xdr, 1);
++		if (!p)
++			return -ETOOSMALL;
++		p = exp_xdr_encode_u32(p, bld->bld_type);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				status = bl_encode_simple(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_SLICE:
++				status = bl_encode_slice(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				status = bl_encode_concat(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				status = bl_encode_stripe(xdr, bld);
++				break;
++			default:
++				BUG();
++		}
++		if (status)
++			goto error;
++	}
++
++	/* ---- Fill in the overall length and number of volumes ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (xdr->p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, num_vols);
++
++error:
++	return status;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_devinfo);
++
++enum nfsstat4
++blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++			  const struct list_head *bl_head)
++{
++	struct pnfs_blocklayout_layout	*b;
++	u32				*layoutlen_p	= xdr->p,
++					extents		= 0;
++	__be32 *p;
++
++	/*
++	 * Save spot for opaque block layout length and number of extents,
++	 * fill-in later.
++	 */
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return NFS4ERR_TOOSMALL;
++	p += 2;
++
++	list_for_each_entry(b, bl_head, bll_list) {
++		extents++;
++		p = exp_xdr_reserve_qwords(xdr, 5 * 2 + 1);
++		if (!p)
++			return NFS4ERR_TOOSMALL;
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.sbid);
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.devid);
++		p = exp_xdr_encode_u64(p, b->bll_foff);
++		p = exp_xdr_encode_u64(p, b->bll_len);
++		p = exp_xdr_encode_u64(p, b->bll_soff);
++		p = exp_xdr_encode_u32(p, b->bll_es);
++	}
++
++	/* ---- Fill in the overall length and number of extents ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, extents);
++
++	return NFS4_OK;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-31 20:42:05.498113655 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+@@ -0,0 +1,218 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/exp_xdr.h>
++#include <linux/module.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++/* We do our-own dprintk so filesystems are not dependent on sunrpc */
++#ifdef dprintk
++#undef dprintk
++#endif
++#define dprintk(fmt, args, ...)	do { } while (0)
++
++/* Calculate the XDR length of the GETDEVICEINFO4resok structure
++ * excluding the gdir_notification and the gdir_device_addr da_layout_type.
++ */
++static int fl_devinfo_xdr_words(const struct pnfs_filelayout_device *fdev)
++{
++	struct pnfs_filelayout_devaddr *fl_addr;
++	struct pnfs_filelayout_multipath *mp;
++	int i, j, nwords;
++
++	/* da_addr_body length, indice length, indices,
++	 * multipath_list4 length */
++	nwords = 1 + 1 + fdev->fl_stripeindices_length + 1;
++	for (i = 0; i < fdev->fl_device_length; i++) {
++		mp = &fdev->fl_device_list[i];
++		nwords++; /* multipath list length */
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			fl_addr = mp->fl_multipath_list;
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_netid.len);
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_addr.len);
++		}
++	}
++	dprintk("<-- %s nwords %d\n", __func__, nwords);
++	return nwords;
++}
++
++/* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++int
++filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			  const struct pnfs_filelayout_device *fdev)
++{
++	unsigned int i, j, len = 0, opaque_words;
++	u32 *p_in;
++	u32 index_count = fdev->fl_stripeindices_length;
++	u32 dev_count = fdev->fl_device_length;
++	int error = 0;
++	__be32 *p;
++
++	opaque_words = fl_devinfo_xdr_words(fdev);
++	dprintk("%s: Begin indx_cnt: %u dev_cnt: %u total size %u\n",
++		__func__,
++		index_count,
++		dev_count,
++		opaque_words*4);
++
++	/* check space for opaque length */
++	p = p_in = exp_xdr_reserve_qwords(xdr, opaque_words);
++	if (!p) {
++		error =  -ETOOSMALL;
++		goto out;
++	}
++
++	/* Fill in length later */
++	p++;
++
++	/* encode device list indices */
++	p = exp_xdr_encode_u32(p, index_count);
++	for (i = 0; i < index_count; i++)
++		p = exp_xdr_encode_u32(p, fdev->fl_stripeindices_list[i]);
++
++	/* encode device list */
++	p = exp_xdr_encode_u32(p, dev_count);
++	for (i = 0; i < dev_count; i++) {
++		struct pnfs_filelayout_multipath *mp = &fdev->fl_device_list[i];
++
++		p = exp_xdr_encode_u32(p, mp->fl_multipath_length);
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			struct pnfs_filelayout_devaddr *da =
++						&mp->fl_multipath_list[j];
++
++			/* Encode device info */
++			p = exp_xdr_encode_opaque(p, da->r_netid.data,
++						     da->r_netid.len);
++			p = exp_xdr_encode_opaque(p, da->r_addr.data,
++						     da->r_addr.len);
++		}
++	}
++
++	/* backfill in length. Subtract 4 for da_addr_body size */
++	len = (char *)p - (char *)p_in;
++	exp_xdr_encode_u32(p_in, len - 4);
++
++	error = 0;
++out:
++	dprintk("%s: End err %d xdrlen %d\n",
++		__func__, error, len);
++	return error;
++}
++EXPORT_SYMBOL(filelayout_encode_devinfo);
++
++/* Encodes the loc_body structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++enum nfsstat4
++filelayout_encode_layout(struct exp_xdr_stream *xdr,
++			 const struct pnfs_filelayout_layout *flp)
++{
++	u32 len = 0, nfl_util, fhlen, i;
++	u32 *layoutlen_p;
++	enum nfsstat4 nfserr;
++	__be32 *p;
++
++	dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
++		__func__,
++		flp->device_id.pnfs_fsid,
++		flp->device_id.pnfs_devid,
++		flp->lg_first_stripe_index,
++		flp->lg_fh_length);
++
++	/* Ensure file system added at least one file handle */
++	if (flp->lg_fh_length <= 0) {
++		dprintk("%s: File Layout has no file handles!!\n", __func__);
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto out;
++	}
++
++	/* Ensure room for len, devid, util, first_stripe_index,
++	 * pattern_offset, number of filehandles */
++	p = layoutlen_p = exp_xdr_reserve_qwords(xdr, 1+2+2+1+1+2+1);
++	if (!p) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	/* save spot for opaque file layout length, fill-in later*/
++	p++;
++
++	/* encode device id */
++	p = exp_xdr_encode_u64(p, flp->device_id.sbid);
++	p = exp_xdr_encode_u64(p, flp->device_id.devid);
++
++	/* set and encode flags */
++	nfl_util = flp->lg_stripe_unit;
++	if (flp->lg_commit_through_mds)
++		nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
++	if (flp->lg_stripe_type == STRIPE_DENSE)
++		nfl_util |= NFL4_UFLG_DENSE;
++	p = exp_xdr_encode_u32(p, nfl_util);
++
++	/* encode first stripe index */
++	p = exp_xdr_encode_u32(p, flp->lg_first_stripe_index);
++
++	/* encode striping pattern start */
++	p = exp_xdr_encode_u64(p, flp->lg_pattern_offset);
++
++	/* encode number of file handles */
++	p = exp_xdr_encode_u32(p, flp->lg_fh_length);
++
++	/* encode file handles */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		fhlen = flp->lg_fh_list[i].fh_size;
++		p = exp_xdr_reserve_space(xdr, 4 + fhlen);
++		if (!p) {
++			nfserr = NFS4ERR_TOOSMALL;
++			goto out;
++		}
++		p = exp_xdr_encode_opaque(p, &flp->lg_fh_list[i].fh_base, fhlen);
++	}
++
++	/* Set number of bytes encoded =  total_bytes_encoded - length var */
++	len = (char *)p - (char *)layoutlen_p;
++	exp_xdr_encode_u32(layoutlen_p, len - 4);
++
++	nfserr = NFS4_OK;
++out:
++	dprintk("%s: End err %u xdrlen %d\n",
++		__func__, nfserr, len);
++	return nfserr;
++}
++EXPORT_SYMBOL(filelayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-31 20:42:05.499125509 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-31 20:42:05.499125509 -0400
+@@ -0,0 +1,289 @@
++/*
++ *  pnfs_osd_xdr_enc.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static int pnfs_osd_xdr_encode_data_map(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_data_map *data_map)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 1+2+1+1+1+1);
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, data_map->odm_num_comps);
++	p = exp_xdr_encode_u64(p, data_map->odm_stripe_unit);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_width);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_depth);
++	p = exp_xdr_encode_u32(p, data_map->odm_mirror_cnt);
++	p = exp_xdr_encode_u32(p, data_map->odm_raid_algorithm);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline int pnfs_osd_xdr_encode_objid(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_objid *object_id)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2+2+2+2);
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)&object_id->oid_device_id;
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u64(p, dev_id->sbid);
++	p = exp_xdr_encode_u64(p, dev_id->devid);
++	p = exp_xdr_encode_u64(p, object_id->oid_partition_id);
++	p = exp_xdr_encode_u64(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * enum pnfs_osd_cap_key_sec4 {
++ * 	PNFS_OSD_CAP_KEY_SEC_NONE = 0,
++ * 	PNFS_OSD_CAP_KEY_SEC_SSV  = 1
++ * };
++ *
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static int pnfs_osd_xdr_encode_object_cred(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_object_cred *olo_comp)
++{
++	__be32 *p;
++	int err;
++
++	err = pnfs_osd_xdr_encode_objid(xdr, &olo_comp->oc_object_id);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_space(xdr, 3*4 + 4+olo_comp->oc_cap.cred_len);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, olo_comp->oc_osd_version);
++
++	/* No sec for now */
++	p = exp_xdr_encode_u32(p, PNFS_OSD_CAP_KEY_SEC_NONE);
++	p = exp_xdr_encode_u32(p, 0); /* opaque oc_capability_key<> */
++
++	exp_xdr_encode_opaque(p, olo_comp->oc_cap.cred,
++			      olo_comp->oc_cap.cred_len);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_layout {
++ * 	struct pnfs_osd_data_map	olo_map;
++ * 	u32				olo_comps_index;
++ * 	u32				olo_num_comps;
++ * 	struct pnfs_osd_object_cred	*olo_comps;
++ * };
++ */
++int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *pol)
++{
++	__be32 *p;
++	u32 i;
++	int err;
++
++	err = pnfs_osd_xdr_encode_data_map(xdr, &pol->olo_map);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, pol->olo_comps_index);
++	p = exp_xdr_encode_u32(p, pol->olo_num_comps);
++
++	for (i = 0; i < pol->olo_num_comps; i++) {
++		err = pnfs_osd_xdr_encode_object_cred(xdr, &pol->olo_comps[i]);
++		if (err)
++			return err;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_layout);
++
++static int _encode_string(struct exp_xdr_stream *xdr,
++			  const struct nfs4_string *str)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr, 4 + str->len);
++
++	if (!p)
++		return -E2BIG;
++	exp_xdr_encode_opaque(p, str->data, str->len);
++	return 0;
++}
++
++/* struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr)
++{
++	__be32 *p;
++	int err;
++
++	p = exp_xdr_reserve_space(xdr, 4 + 4 + sizeof(devaddr->oda_lun));
++	if (!p)
++		return -E2BIG;
++
++	/* Empty oda_targetid */
++	p = exp_xdr_encode_u32(p, OBJ_TARGET_ANON);
++
++	/* Empty oda_targetaddr for now */
++	p = exp_xdr_encode_u32(p, 0);
++
++	/* oda_lun */
++	exp_xdr_encode_bytes(p, devaddr->oda_lun, sizeof(devaddr->oda_lun));
++
++	err = _encode_string(xdr, &devaddr->oda_systemid);
++	if (err)
++		return err;
++
++	err = pnfs_osd_xdr_encode_object_cred(xdr,
++					      &devaddr->oda_root_obj_cred);
++	if (err)
++		return err;
++
++	err = _encode_string(xdr, &devaddr->oda_osdname);
++	if (err)
++		return err;
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_deviceaddr);
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p)
++{
++	lou->dsu_valid = be32_to_cpu(*p++);
++	if (lou->dsu_valid)
++		p = xdr_decode_hyper(p, &lou->dsu_delta);
++	lou->olu_ioerr_flag = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_layoutupdate);
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline __be32 *
++pnfs_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
++{
++	/* FIXME: p = xdr_decode_fixed(...) */
++	memcpy(objid->oid_device_id.data, p, sizeof(objid->oid_device_id.data));
++	p += XDR_QUADLEN(sizeof(objid->oid_device_id.data));
++
++	p = xdr_decode_hyper(p, &objid->oid_partition_id);
++	p = xdr_decode_hyper(p, &objid->oid_object_id);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p)
++{
++	p = pnfs_osd_xdr_decode_objid(p, &ioerr->oer_component);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_offset);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_length);
++	ioerr->oer_iswrite = be32_to_cpu(*p++);
++	ioerr->oer_errno = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
+diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
+--- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-31 20:42:05.500123860 -0400
+@@ -19,6 +19,7 @@
+ #include <linux/gfs2_ondisk.h>
+ #include <linux/slow-work.h>
+ #include <linux/quotaops.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "gfs2.h"
+ #include "incore.h"
+@@ -1146,6 +1147,9 @@ static int fill_super(struct super_block
+ 	sb->s_magic = GFS2_MAGIC;
+ 	sb->s_op = &gfs2_super_ops;
+ 	sb->s_export_op = &gfs2_export_ops;
++#if defined(CONFIG_PNFSD)
++	sb->s_pnfs_op = &pnfs_dlm_export_ops;
++#endif /* CONFIG_PNFSD */
+ 	sb->s_xattr = gfs2_xattr_handlers;
+ 	sb->s_qcop = &gfs2_quotactl_ops;
+ 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
+--- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-08-31 20:42:05.490222933 -0400
+@@ -224,6 +224,31 @@ config LOCKD_V4
+ config EXPORTFS
+ 	tristate
+ 
++config EXPORTFS_FILE_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 files layout type.
++	  Must be automatically selected by supporting filesystems.
++
++config EXPORTFS_OSD_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 objects layout type.
++	  Must be automatically selected by supporting osd
++	  filesystems.
++
++	  If unsure, say N.
++
++config EXPORTFS_BLOCK_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 blocks layout type.
++	  Must be automatically selected by supporting filesystems.
++
++
+ config NFS_ACL_SUPPORT
+ 	tristate
+ 	select FS_POSIX_ACL
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-31 20:42:05.503222878 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-31 20:42:05.503222878 -0400
+@@ -0,0 +1,66 @@
++#include <linux/module.h>
++#include <linux/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/sched.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY NFSDBG_PNFS_LD
++
++struct pipefs_list bl_device_list;
++struct dentry *bl_device_pipe;
++
++ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t len)
++{
++	int err;
++	struct pipefs_hdr *msg;
++
++	dprintk("Entering %s...\n", __func__);
++
++	msg = pipefs_readmsg(filp, src, len);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: unable to read pipefs message.\n");
++		return PTR_ERR(msg);
++	}
++
++	/* now assign the result, which wakes the blocked thread */
++	err = pipefs_assign_upcall_reply(msg, &bl_device_list);
++	if (err) {
++		dprintk("ERROR: failed to assign upcall with id %u\n",
++			msg->msgid);
++		kfree(msg);
++	}
++	return len;
++}
++
++static const struct rpc_pipe_ops bl_pipe_ops = {
++	.upcall         = pipefs_generic_upcall,
++	.downcall       = bl_pipe_downcall,
++	.destroy_msg    = pipefs_generic_destroy_msg,
++};
++
++int bl_pipe_init(void)
++{
++	dprintk("%s: block_device pipefs registering...\n", __func__);
++	bl_device_pipe = pipefs_mkpipe("bl_device_pipe", &bl_pipe_ops, 1);
++	if (IS_ERR(bl_device_pipe))
++		dprintk("ERROR, unable to make block_device pipe\n");
++
++	if (!bl_device_pipe)
++		dprintk("bl_device_pipe is NULL!\n");
++	else
++	dprintk("bl_device_pipe created!\n");
++	pipefs_init_list(&bl_device_list);
++	return 0;
++}
++
++void bl_pipe_exit(void)
++{
++	dprintk("%s: block_device pipefs unregistering...\n", __func__);
++	if (IS_ERR(bl_device_pipe))
++		return ;
++	pipefs_closepipe(bl_device_pipe);
++	return;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-31 20:42:05.504232855 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-31 20:42:05.504232855 -0400
+@@ -0,0 +1,1160 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++
++#include <linux/buffer_head.h> /* various write calls */
++#include <linux/bio.h> /* struct bio */
++#include <linux/vmalloc.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
++
++/* Callback operations to the pNFS client */
++static struct pnfs_client_operations *pnfs_block_callback_ops;
++
++static void print_page(struct page *page)
++{
++	dprintk("PRINTPAGE page %p\n", page);
++	dprintk("        PagePrivate %d\n", PagePrivate(page));
++	dprintk("        PageUptodate %d\n", PageUptodate(page));
++	dprintk("        PageError %d\n", PageError(page));
++	dprintk("        PageDirty %d\n", PageDirty(page));
++	dprintk("        PageReferenced %d\n", PageReferenced(page));
++	dprintk("        PageLocked %d\n", PageLocked(page));
++	dprintk("        PageWriteback %d\n", PageWriteback(page));
++	dprintk("        PageMappedToDisk %d\n", PageMappedToDisk(page));
++	dprintk("\n");
++}
++
++/* Given the be associated with isect, determine if page data needs to be
++ * initialized.
++ */
++static int is_hole(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_NONE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return !is_sector_initialized(be->be_inval, isect);
++}
++
++/* Given the be associated with isect, determine if page data can be
++ * written to disk.
++ */
++static int is_writable(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return is_sector_initialized(be->be_inval, isect);
++}
++
++static int
++dont_like_caller(struct nfs_page *req)
++{
++	if (atomic_read(&req->wb_complete)) {
++		/* Called by _multi */
++		return 1;
++	} else {
++		/* Called by _one */
++		return 0;
++	}
++}
++
++static enum pnfs_try_status
++bl_commit(struct nfs_write_data *nfs_data,
++	  int sync)
++{
++	dprintk("%s enter\n", __func__);
++	return PNFS_NOT_ATTEMPTED;
++}
++
++/* The data we are handed might be spread across several bios.  We need
++ * to track when the last one is finished.
++ */
++struct parallel_io {
++	struct kref refcnt;
++	struct rpc_call_ops call_ops;
++	void (*pnfs_callback) (void *data);
++	void *data;
++};
++
++static inline struct parallel_io *alloc_parallel(void *data)
++{
++	struct parallel_io *rv;
++
++	rv  = kmalloc(sizeof(*rv), GFP_KERNEL);
++	if (rv) {
++		rv->data = data;
++		kref_init(&rv->refcnt);
++	}
++	return rv;
++}
++
++static inline void get_parallel(struct parallel_io *p)
++{
++	kref_get(&p->refcnt);
++}
++
++static void destroy_parallel(struct kref *kref)
++{
++	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
++
++	dprintk("%s enter\n", __func__);
++	p->pnfs_callback(p->data);
++	kfree(p);
++}
++
++static inline void put_parallel(struct parallel_io *p)
++{
++	kref_put(&p->refcnt, destroy_parallel);
++}
++
++static struct bio *
++bl_submit_bio(int rw, struct bio *bio)
++{
++	if (bio) {
++		get_parallel(bio->bi_private);
++		dprintk("%s submitting %s bio %u@%llu\n", __func__,
++			rw == READ ? "read" : "write",
++			bio->bi_size, (u64)bio->bi_sector);
++		submit_bio(rw, bio);
++	}
++	return NULL;
++}
++
++static inline void
++bl_done_with_rpage(struct page *page, const int ok)
++{
++	if (ok) {
++		ClearPagePnfsErr(page);
++		SetPageUptodate(page);
++	} else {
++		ClearPageUptodate(page);
++		SetPageError(page);
++		SetPagePnfsErr(page);
++	}
++	/* Page is unlocked via rpc_release.  Should really be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_read(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_rpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++static void bl_read_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	pnfs_block_callback_ops->nfs_readlist_complete(rdata);
++}
++
++static void
++bl_end_par_io_read(void *data)
++{
++	struct nfs_read_data *rdata = data;
++
++	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
++	schedule_work(&rdata->task.u.tk_work);
++}
++
++/* We don't want normal .rpc_call_done callback used, so we replace it
++ * with this stub.
++ */
++static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
++{
++	return;
++}
++
++static enum pnfs_try_status
++bl_read_pagelist(struct nfs_read_data *rdata,
++		 unsigned nr_pages)
++{
++	int i, hole;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t f_offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct page **pages = rdata->args.pages;
++	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
++	       nr_pages, f_offset, count);
++
++	if (dont_like_caller(rdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		goto use_mds;
++	}
++	if ((nr_pages == 1) && PagePnfsErr(rdata->req->wb_page)) {
++		/* We want to fall back to mds in case of read_page
++		 * after error on read_pages.
++		 */
++		dprintk("%s PG_pnfserr set\n", __func__);
++		goto use_mds;
++	}
++	par = alloc_parallel(rdata);
++	if (!par)
++		goto use_mds;
++	par->call_ops = *rdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_read;
++	/* At this point, we can no longer jump to use_mds */
++
++	isect = (sector_t) (f_offset >> 9);
++	/* Code assumes extents are page-aligned */
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			put_extent(cow_read);
++			bio = bl_submit_bio(READ, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(rdata->pdata.lseg),
++					     isect, &cow_read);
++			if (!be) {
++				/* Error out this page */
++				bl_done_with_rpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++			if (cow_read) {
++				sector_t cow_length = cow_read->be_length -
++					(isect - cow_read->be_f_offset);
++				extent_length = min(extent_length, cow_length);
++			}
++		}
++		hole = is_hole(be, isect);
++		if (hole && !cow_read) {
++			bio = bl_submit_bio(READ, bio);
++			/* Fill hole w/ zeroes w/o accessing device */
++			dprintk("%s Zeroing page for hole\n", __func__);
++			zero_user(pages[i], 0,
++				  min_t(int, PAGE_CACHE_SIZE, count));
++			print_page(pages[i]);
++			bl_done_with_rpage(pages[i], 1);
++		} else {
++			struct pnfs_block_extent *be_read;
++
++			be_read = (hole && cow_read) ? cow_read : be;
++			for (;;) {
++				if (!bio) {
++					bio = bio_alloc(GFP_NOIO, nr_pages - i);
++					if (!bio) {
++						/* Error out this page */
++						bl_done_with_rpage(pages[i], 0);
++						break;
++					}
++					bio->bi_sector = isect -
++						be_read->be_f_offset +
++						be_read->be_v_offset;
++					bio->bi_bdev = be_read->be_mdev;
++					bio->bi_end_io = bl_end_io_read;
++					bio->bi_private = par;
++				}
++				if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++					break;
++				bio = bl_submit_bio(READ, bio);
++			}
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	if ((isect << 9) >= rdata->inode->i_size) {
++		rdata->res.eof = 1;
++		rdata->res.count = rdata->inode->i_size - f_offset;
++	} else {
++		rdata->res.count = (isect << 9) - f_offset;
++	}
++	put_extent(be);
++	put_extent(cow_read);
++	bl_submit_bio(READ, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++
++ use_mds:
++	dprintk("Giving up and using normal NFS\n");
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static void mark_extents_written(struct pnfs_block_layout *bl,
++				 __u64 offset, __u32 count)
++{
++	sector_t isect, end;
++	struct pnfs_block_extent *be;
++
++	dprintk("%s(%llu, %u)\n", __func__, offset, count);
++	if (count == 0)
++		return;
++	isect = (offset & (long)(PAGE_CACHE_MASK)) >> 9;
++	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
++	end >>= 9;
++	while (isect < end) {
++		sector_t len;
++		be = find_get_extent(bl, isect, NULL);
++		BUG_ON(!be); /* FIXME */
++		len = min(end, be->be_f_offset + be->be_length) - isect;
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			mark_for_commit(be, isect, len); /* What if fails? */
++		isect += len;
++		put_extent(be);
++	}
++}
++
++/* STUB - this needs thought */
++static inline void
++bl_done_with_wpage(struct page *page, const int ok)
++{
++	if (!ok) {
++		SetPageError(page);
++		SetPagePnfsErr(page);
++		/* This is an inline copy of nfs_zap_mapping */
++		/* This is oh so fishy, and needs deep thought */
++		if (page->mapping->nrpages != 0) {
++			struct inode *inode = page->mapping->host;
++			spin_lock(&inode->i_lock);
++			NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	/* end_page_writeback called in rpc_release.  Should be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_write(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_wpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++/* Function scheduled for call during bl_end_par_io_write,
++ * it marks sectors as written and extends the commitlist.
++ */
++static void bl_write_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	if (!wdata->task.tk_status) {
++		/* Marks for LAYOUTCOMMIT */
++		/* BUG - this should be called after each bio, not after
++		 * all finish, unless have some way of storing success/failure
++		 */
++		mark_extents_written(BLK_LSEG2EXT(wdata->pdata.lseg),
++				     wdata->args.offset, wdata->args.count);
++	}
++	pnfs_block_callback_ops->nfs_writelist_complete(wdata);
++}
++
++/* Called when last of bios associated with a bl_write_pagelist call finishes */
++static void
++bl_end_par_io_write(void *data)
++{
++	struct nfs_write_data *wdata = data;
++
++	/* STUB - ignoring error handling */
++	wdata->task.tk_status = 0;
++	wdata->verf.committed = NFS_FILE_SYNC;
++	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
++	schedule_work(&wdata->task.u.tk_work);
++}
++
++static enum pnfs_try_status
++bl_write_pagelist(struct nfs_write_data *wdata,
++		  unsigned nr_pages,
++		  int sync)
++{
++	int i;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t offset = wdata->args.offset;
++	size_t count = wdata->args.count;
++	struct page **pages = wdata->args.pages;
++	int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
++	if (!wdata->req->wb_lseg) {
++		dprintk("%s no lseg, falling back to MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	if (dont_like_caller(wdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
++	 * We want to write each, and if there is an error remove it from
++	 * list and call
++	 * nfs_retry_request(req) to have it redone using nfs.
++	 * QUEST? Do as block or per req?  Think have to do per block
++	 * as part of end_bio
++	 */
++	par = alloc_parallel(wdata);
++	if (!par)
++		return PNFS_NOT_ATTEMPTED;
++	par->call_ops = *wdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_write;
++	/* At this point, have to be more careful with error handling */
++
++	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> 9);
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			bio = bl_submit_bio(WRITE, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(wdata->pdata.lseg),
++					     isect, NULL);
++			if (!be || !is_writable(be, isect)) {
++				/* FIXME */
++				bl_done_with_wpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++		}
++		for (;;) {
++			if (!bio) {
++				bio = bio_alloc(GFP_NOIO, nr_pages - i);
++				if (!bio) {
++					/* Error out this page */
++					/* FIXME */
++					bl_done_with_wpage(pages[i], 0);
++					break;
++				}
++				bio->bi_sector = isect - be->be_f_offset +
++					be->be_v_offset;
++				bio->bi_bdev = be->be_mdev;
++				bio->bi_end_io = bl_end_io_write;
++				bio->bi_private = par;
++			}
++			if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++				break;
++			bio = bl_submit_bio(WRITE, bio);
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	wdata->res.count = (isect << 9) - (offset & (long)PAGE_CACHE_MASK);
++	put_extent(be);
++	bl_submit_bio(WRITE, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++}
++
++/* FIXME - range ignored */
++static void
++release_extents(struct pnfs_block_layout *bl,
++		struct pnfs_layout_range *range)
++{
++	int i;
++	struct pnfs_block_extent *be;
++
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		while (!list_empty(&bl->bl_extents[i])) {
++			be = list_first_entry(&bl->bl_extents[i],
++					      struct pnfs_block_extent,
++					      be_node);
++			list_del(&be->be_node);
++			put_extent(be);
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++}
++
++static void
++release_inval_marks(struct pnfs_inval_markings *marks)
++{
++	struct pnfs_inval_tracking *pos, *temp;
++
++	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
++		list_del(&pos->it_link);
++		kfree(pos);
++	}
++	return;
++}
++
++/* Note we are relying on caller locking to prevent nasty races. */
++static void
++bl_free_layout(struct pnfs_layout_hdr *lo)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++
++	dprintk("%s enter\n", __func__);
++	release_extents(bl, NULL);
++	release_inval_marks(&bl->bl_inval);
++	kfree(bl);
++}
++
++static struct pnfs_layout_hdr *
++bl_alloc_layout(struct inode *inode)
++{
++	struct pnfs_block_layout	*bl;
++
++	dprintk("%s enter\n", __func__);
++	bl = kzalloc(sizeof(*bl), GFP_KERNEL);
++	if (!bl)
++		return NULL;
++	spin_lock_init(&bl->bl_ext_lock);
++	INIT_LIST_HEAD(&bl->bl_extents[0]);
++	INIT_LIST_HEAD(&bl->bl_extents[1]);
++	INIT_LIST_HEAD(&bl->bl_commit);
++	bl->bl_count = 0;
++	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> 9;
++	INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
++	return &bl->bl_layout;
++}
++
++static void
++bl_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter\n", __func__);
++	kfree(lseg);
++}
++
++/* Because the generic infrastructure does not correctly merge layouts,
++ * we pretty much ignore lseg, and store all data layout wide, so we
++ * can correctly merge.  Eventually we should push some correct merge
++ * behavior up to the generic code, as the current behavior tends to
++ * cause lots of unnecessary overlapping LAYOUTGET requests.
++ */
++static struct pnfs_layout_segment *
++bl_alloc_lseg(struct pnfs_layout_hdr *lo,
++	      struct nfs4_layoutget_res *lgr)
++{
++	struct pnfs_layout_segment *lseg;
++	int status;
++
++	dprintk("%s enter\n", __func__);
++	lseg = kzalloc(sizeof(*lseg) + 0, GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++	status = nfs4_blk_process_layoutget(lo, lgr);
++	if (status) {
++		/* We don't want to call the full-blown bl_free_lseg,
++		 * since on error extents were not touched.
++		 */
++		/* STUB - we really want to distinguish between 2 error
++		 * conditions here.  This lseg failed, but lo data structures
++		 * are OK, or we hosed the lo data structures.  The calling
++		 * code probably needs to distinguish this too.
++		 */
++		kfree(lseg);
++		return ERR_PTR(status);
++	}
++	return lseg;
++}
++
++static int
++bl_setup_layoutcommit(struct pnfs_layout_hdr *lo,
++		      struct nfs4_layoutcommit_args *arg)
++{
++	struct nfs_server *nfss = PNFS_NFS_SERVER(lo);
++	struct bl_layoutupdate_data *layoutupdate_data;
++
++	dprintk("%s enter\n", __func__);
++	/* Need to ensure commit is block-size aligned */
++	if (nfss->pnfs_blksize) {
++		u64 mask = nfss->pnfs_blksize - 1;
++		u64 offset = arg->range.offset & mask;
++
++		arg->range.offset -= offset;
++		arg->range.length += offset + mask;
++		arg->range.length &= ~mask;
++	}
++
++	layoutupdate_data = kmalloc(sizeof(struct bl_layoutupdate_data),
++					 GFP_KERNEL);
++	if (unlikely(!layoutupdate_data))
++		return -ENOMEM;
++	INIT_LIST_HEAD(&layoutupdate_data->ranges);
++	arg->layoutdriver_data = layoutupdate_data;
++
++	return 0;
++}
++
++static void
++bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
++		       const struct nfs4_layoutcommit_args *arg)
++{
++	dprintk("%s enter\n", __func__);
++	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
++}
++
++static void
++bl_cleanup_layoutcommit(struct pnfs_layout_hdr *lo,
++			struct nfs4_layoutcommit_args *arg, int status)
++{
++	dprintk("%s enter\n", __func__);
++	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), arg, status);
++	kfree(arg->layoutdriver_data);
++}
++
++static void free_blk_mountid(struct block_mount_id *mid)
++{
++	if (mid) {
++		struct pnfs_block_dev *dev;
++		spin_lock(&mid->bm_lock);
++		while (!list_empty(&mid->bm_devlist)) {
++			dev = list_first_entry(&mid->bm_devlist,
++					       struct pnfs_block_dev,
++					       bm_node);
++			list_del(&dev->bm_node);
++			free_block_dev(dev);
++		}
++		spin_unlock(&mid->bm_lock);
++		kfree(mid);
++	}
++}
++
++/* This is mostly copied form the filelayout's get_device_info function.
++ * It seems much of this should be at the generic pnfs level.
++ */
++static struct pnfs_block_dev *
++nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
++			struct pnfs_deviceid *d_id,
++			struct list_head *sdlist)
++{
++	struct pnfs_device *dev;
++	struct pnfs_block_dev *rv = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	int i, rc;
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s max_resp_sz %u max_pages %d\n",
++		__func__, max_resp_sz, max_pages);
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev) {
++		dprintk("%s kmalloc failed\n", __func__);
++		return NULL;
++	}
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(dev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set dev->area */
++	dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!dev->area)
++		goto out_free;
++
++	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
++	dev->layout_type = LAYOUT_BLOCK_VOLUME;
++	dev->dev_notify_types = 0;
++	dev->pages = pages;
++	dev->pgbase = 0;
++	dev->pglen = PAGE_SIZE * max_pages;
++	dev->mincount = 0;
++
++	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
++	rc = pnfs_block_callback_ops->nfs_getdeviceinfo(server, dev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	rv = nfs4_blk_decode_device(server, dev, sdlist);
++ out_free:
++	if (dev->area != NULL)
++		vunmap(dev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(dev);
++	return rv;
++}
++
++
++/*
++ * Retrieve the list of available devices for the mountpoint.
++ */
++static int
++bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
++{
++	struct block_mount_id *b_mt_id = NULL;
++	struct pnfs_mount_type *mtype = NULL;
++	struct pnfs_devicelist *dlist = NULL;
++	struct pnfs_block_dev *bdev;
++	LIST_HEAD(block_disklist);
++	int status = 0, i;
++
++	dprintk("%s enter\n", __func__);
++
++	if (server->pnfs_blksize == 0) {
++		dprintk("%s Server did not return blksize\n", __func__);
++		return -EINVAL;
++	}
++	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
++	if (!b_mt_id) {
++		status = -ENOMEM;
++		goto out_error;
++	}
++	/* Initialize nfs4 block layout mount id */
++	spin_lock_init(&b_mt_id->bm_lock);
++	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
++
++	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
++	if (!dlist)
++		goto out_error;
++	dlist->eof = 0;
++	while (!dlist->eof) {
++		status = pnfs_block_callback_ops->nfs_getdevicelist(
++							server, fh, dlist);
++		if (status)
++			goto out_error;
++		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
++			__func__, dlist->num_devs, dlist->eof);
++		/* For each device returned in dlist, call GETDEVICEINFO, and
++		 * decode the opaque topology encoding to create a flat
++		 * volume topology, matching VOLUME_SIMPLE disk signatures
++		 * to disks in the visible block disk list.
++		 * Construct an LVM meta device from the flat volume topology.
++		 */
++		for (i = 0; i < dlist->num_devs; i++) {
++			bdev = nfs4_blk_get_deviceinfo(server, fh,
++						     &dlist->dev_id[i],
++						     &block_disklist);
++			if (!bdev)
++				goto out_error;
++			spin_lock(&b_mt_id->bm_lock);
++			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
++			spin_unlock(&b_mt_id->bm_lock);
++		}
++	}
++	dprintk("%s SUCCESS\n", __func__);
++	server->pnfs_ld_data = b_mt_id;
++
++ out_return:
++	kfree(dlist);
++	return status;
++
++ out_error:
++	free_blk_mountid(b_mt_id);
++	kfree(mtype);
++	goto out_return;
++}
++
++static int
++bl_uninitialize_mountpoint(struct nfs_server *server)
++{
++	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
++
++	dprintk("%s enter\n", __func__);
++	free_blk_mountid(b_mt_id);
++	dprintk("%s RETURNS\n", __func__);
++	return 0;
++}
++
++/* STUB - mark intersection of layout and page as bad, so is not
++ * used again.
++ */
++static void mark_bad_read(void)
++{
++	return;
++}
++
++/* Copied from buffer.c */
++static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
++{
++	if (uptodate) {
++		set_buffer_uptodate(bh);
++	} else {
++		/* This happens, due to failed READA attempts. */
++		clear_buffer_uptodate(bh);
++	}
++	unlock_buffer(bh);
++}
++
++/* Copied from buffer.c */
++static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
++{
++	__end_buffer_read_notouch(bh, uptodate);
++}
++
++/*
++ * map_block:  map a requested I/0 block (isect) into an offset in the LVM
++ * meta block_device
++ */
++static void
++map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
++{
++	dprintk("%s enter be=%p\n", __func__, be);
++
++	set_buffer_mapped(bh);
++	bh->b_bdev = be->be_mdev;
++	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
++		(be->be_mdev->bd_inode->i_blkbits - 9);
++
++	dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
++				__func__, (long)isect,
++				(long)bh->b_blocknr,
++				bh->b_size);
++	return;
++}
++
++/* Given an unmapped page, zero it (or read in page for COW),
++ * and set appropriate flags/markings, but it is safe to not initialize
++ * the range given in [from, to).
++ */
++/* This is loosely based on nobh_write_begin */
++static int
++init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
++		    unsigned from, unsigned to, sector_t **pages_to_mark)
++{
++	struct buffer_head *bh;
++	int inval, ret = -EIO;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect;
++
++	dprintk("%s enter, %p\n", __func__, page);
++	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
++	if (!bh) {
++		ret = -ENOMEM;
++		goto cleanup;
++	}
++
++	isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
++	be = find_get_extent(bl, isect, &cow_read);
++	if (!be)
++		goto cleanup;
++	inval = is_hole(be, isect);
++	dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
++	if (inval) {
++		if (be->be_state == PNFS_BLOCK_NONE_DATA) {
++			dprintk("%s PANIC - got NONE_DATA extent %p\n",
++				__func__, be);
++			goto cleanup;
++		}
++		map_block(isect, be, bh);
++		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
++	}
++	if (PageUptodate(page)) {
++		/* Do nothing */
++	} else if (inval & !cow_read) {
++		zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
++	} else if (0 < from || PAGE_CACHE_SIZE > to) {
++		struct pnfs_block_extent *read_extent;
++
++		read_extent = (inval && cow_read) ? cow_read : be;
++		map_block(isect, read_extent, bh);
++		lock_buffer(bh);
++		bh->b_end_io = end_buffer_read_nobh;
++		submit_bh(READ, bh);
++		dprintk("%s: Waiting for buffer read\n", __func__);
++		/* XXX Don't really want to hold layout lock here */
++		wait_on_buffer(bh);
++		if (!buffer_uptodate(bh))
++			goto cleanup;
++	}
++	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		/* There is a BUG here if is a short copy after write_begin,
++		 * but I think this is a generic fs bug.  The problem is that
++		 * we have marked the page as initialized, but it is possible
++		 * that the section not copied may never get copied.
++		 */
++		ret = mark_initialized_sectors(be->be_inval, isect,
++					       PAGE_CACHE_SECTORS,
++					       pages_to_mark);
++		/* Want to preallocate mem so above can't fail */
++		if (ret)
++			goto cleanup;
++	}
++	SetPageMappedToDisk(page);
++	ret = 0;
++
++cleanup:
++	free_buffer_head(bh);
++	put_extent(be);
++	put_extent(cow_read);
++	if (ret) {
++		/* Need to mark layout with bad read...should now
++		 * just use nfs4 for reads and writes.
++		 */
++		mark_bad_read();
++	}
++	return ret;
++}
++
++static int
++bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
++	       unsigned count, struct pnfs_fsdata *fsdata)
++{
++	unsigned from, to;
++	int ret;
++	sector_t *pages_to_mark = NULL;
++	struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
++
++	dprintk("%s enter, %u@%lld\n", __func__, count, pos);
++	print_page(page);
++	/* The following code assumes blocksize >= PAGE_CACHE_SIZE */
++	if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
++		dprintk("%s Can't handle blocksize %llu\n", __func__,
++			(u64)bl->bl_blocksize);
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		return 0;
++	}
++	if (PageMappedToDisk(page)) {
++		/* Basically, this is a flag that says we have
++		 * successfully called write_begin already on this page.
++		 */
++		/* NOTE - there are cache consistency issues here.
++		 * For example, what if the layout is recalled, then regained?
++		 * If the file is closed and reopened, will the page flags
++		 * be reset?  If not, we'll have to use layout info instead of
++		 * the page flag.
++		 */
++		return 0;
++	}
++	from = pos & (PAGE_CACHE_SIZE - 1);
++	to = from + count;
++	ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
++	if (ret) {
++		dprintk("%s init page failed with %i", __func__, ret);
++		/* Revert back to plain NFS and just continue on with
++		 * write.  This assumes there is no request attached, which
++		 * should be true if we get here.
++		 */
++		BUG_ON(PagePrivate(page));
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		kfree(pages_to_mark);
++		ret = 0;
++	} else {
++		fsdata->private = pages_to_mark;
++	}
++	return ret;
++}
++
++/* CAREFUL - what happens if copied < count??? */
++static int
++bl_write_end(struct inode *inode, struct page *page, loff_t pos,
++	     unsigned count, unsigned copied, struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter, %u@%lld, lseg=%p\n", __func__, count, pos, lseg);
++	print_page(page);
++	if (lseg)
++		SetPageUptodate(page);
++	return 0;
++}
++
++/* Return any memory allocated to fsdata->private, and take advantage
++ * of no page locks to mark pages noted in write_begin as needing
++ * initialization.
++ */
++static void
++bl_write_end_cleanup(struct file *filp, struct pnfs_fsdata *fsdata)
++{
++	struct page *page;
++	pgoff_t index;
++	sector_t *pos;
++	struct address_space *mapping = filp->f_mapping;
++	struct pnfs_fsdata *fake_data;
++	struct pnfs_layout_segment *lseg;
++
++	if (!fsdata)
++		return;
++	lseg = fsdata->lseg;
++	if (!lseg)
++		return;
++	pos = fsdata->private;
++	if (!pos)
++		return;
++	dprintk("%s enter with pos=%llu\n", __func__, (u64)(*pos));
++	for (; *pos != ~0; pos++) {
++		index = *pos >> (PAGE_CACHE_SHIFT - 9);
++		/* XXX How do we properly deal with failures here??? */
++		page = grab_cache_page_write_begin(mapping, index, 0);
++		if (!page) {
++			printk(KERN_ERR "%s BUG BUG BUG NoMem\n", __func__);
++			continue;
++		}
++		dprintk("%s: Examining block page\n", __func__);
++		print_page(page);
++		if (!PageMappedToDisk(page)) {
++			/* XXX How do we properly deal with failures here??? */
++			dprintk("%s Marking block page\n", __func__);
++			init_page_for_write(BLK_LSEG2EXT(fsdata->lseg), page,
++					    PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
++					    NULL);
++			print_page(page);
++			fake_data = kzalloc(sizeof(*fake_data), GFP_KERNEL);
++			if (!fake_data) {
++				printk(KERN_ERR "%s BUG BUG BUG NoMem\n",
++				       __func__);
++				unlock_page(page);
++				continue;
++			}
++			get_lseg(lseg);
++			fake_data->lseg = lseg;
++			fake_data->bypass_eof = 1;
++			mapping->a_ops->write_end(filp, mapping,
++						  index << PAGE_CACHE_SHIFT,
++						  PAGE_CACHE_SIZE,
++						  PAGE_CACHE_SIZE,
++						  page, fake_data);
++			/* Note fake_data is freed by nfs_write_end */
++		} else
++			unlock_page(page);
++	}
++	kfree(fsdata->private);
++	fsdata->private = NULL;
++}
++
++static ssize_t
++bl_get_stripesize(struct pnfs_layout_hdr *lo)
++{
++	dprintk("%s enter\n", __func__);
++	return 0;
++}
++
++/* This is called by nfs_can_coalesce_requests via nfs_pageio_do_add_request.
++ * Should return False if there is a reason requests can not be coalesced,
++ * otherwise, should default to returning True.
++ */
++static int
++bl_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++	   struct nfs_page *req)
++{
++	dprintk("%s enter\n", __func__);
++	if (pgio->pg_iswrite)
++		return prev->wb_lseg == req->wb_lseg;
++	else
++		return 1;
++}
++
++static struct layoutdriver_io_operations blocklayout_io_operations = {
++	.commit				= bl_commit,
++	.read_pagelist			= bl_read_pagelist,
++	.write_pagelist			= bl_write_pagelist,
++	.write_begin			= bl_write_begin,
++	.write_end			= bl_write_end,
++	.write_end_cleanup		= bl_write_end_cleanup,
++	.alloc_layout			= bl_alloc_layout,
++	.free_layout			= bl_free_layout,
++	.alloc_lseg			= bl_alloc_lseg,
++	.free_lseg			= bl_free_lseg,
++	.setup_layoutcommit		= bl_setup_layoutcommit,
++	.encode_layoutcommit		= bl_encode_layoutcommit,
++	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
++	.initialize_mountpoint		= bl_initialize_mountpoint,
++	.uninitialize_mountpoint	= bl_uninitialize_mountpoint,
++};
++
++static struct layoutdriver_policy_operations blocklayout_policy_operations = {
++	.get_stripesize			= bl_get_stripesize,
++	.pg_test			= bl_pg_test,
++};
++
++static struct pnfs_layoutdriver_type blocklayout_type = {
++	.id = LAYOUT_BLOCK_VOLUME,
++	.name = "LAYOUT_BLOCK_VOLUME",
++	.ld_io_ops = &blocklayout_io_operations,
++	.ld_policy_ops = &blocklayout_policy_operations,
++};
++
++static int __init nfs4blocklayout_init(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
++
++	pnfs_block_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
++	bl_pipe_init();
++	return 0;
++}
++
++static void __exit nfs4blocklayout_exit(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
++	       __func__);
++
++	pnfs_unregister_layoutdriver(&blocklayout_type);
++	bl_pipe_exit();
++}
++
++module_init(nfs4blocklayout_init);
++module_exit(nfs4blocklayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-31 20:42:05.506119071 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-31 20:42:05.506119071 -0400
+@@ -0,0 +1,335 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/buffer_head.h> /* __bread */
++
++#include <linux/genhd.h>
++#include <linux/blkdev.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
++{
++	uint32_t *q = p + XDR_QUADLEN(nbytes);
++	if (unlikely(q > end || q < p))
++		return NULL;
++	return p;
++}
++EXPORT_SYMBOL(blk_overflow);
++
++/* Open a block_device by device number. */
++struct block_device *nfs4_blkdev_get(dev_t dev)
++{
++	struct block_device *bd;
++
++	dprintk("%s enter\n", __func__);
++	bd = open_by_devnum(dev, FMODE_READ);
++	if (IS_ERR(bd))
++		goto fail;
++	return bd;
++fail:
++	dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++	return NULL;
++}
++
++/*
++ * Release the block device
++ */
++int nfs4_blkdev_put(struct block_device *bdev)
++{
++	dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
++			MINOR(bdev->bd_dev));
++	bd_release(bdev);
++	return blkdev_put(bdev, FMODE_READ);
++}
++
++/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
++ * in dev->dev_addr_buf.
++ */
++struct pnfs_block_dev *
++nfs4_blk_decode_device(struct nfs_server *server,
++		       struct pnfs_device *dev,
++		       struct list_head *sdlist)
++{
++	struct pnfs_block_dev *rv = NULL;
++	struct block_device *bd = NULL;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint32_t major, minor;
++
++	dprintk("%s enter\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return NULL;
++	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
++	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
++		dev->mincount);
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
++				    dev->mincount);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out_err;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out_err;
++	}
++	if (reply->status != BL_DEVICE_REQUEST_PROC) {
++		dprintk("%s failed to open device: %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++	memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
++	memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
++		sizeof(uint32_t));
++	bd = nfs4_blkdev_get(MKDEV(major, minor));
++	if (IS_ERR(bd)) {
++		dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++
++	rv = kzalloc(sizeof(*rv), GFP_KERNEL);
++	if (!rv)
++		goto out_err;
++
++	rv->bm_mdev = bd;
++	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
++	dprintk("%s Created device %s with bd_block_size %u\n",
++		__func__,
++		bd->bd_disk->disk_name,
++		bd->bd_block_size);
++	kfree(reply);
++	kfree(msg);
++	return rv;
++
++out_err:
++	kfree(rv);
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return NULL;
++}
++
++/* Map deviceid returned by the server to constructed block_device */
++static struct block_device *translate_devid(struct pnfs_layout_hdr *lo,
++					    struct pnfs_deviceid *id)
++{
++	struct block_device *rv = NULL;
++	struct block_mount_id *mid;
++	struct pnfs_block_dev *dev;
++
++	dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
++	mid = BLK_ID(lo);
++	spin_lock(&mid->bm_lock);
++	list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
++		if (memcmp(id->data, dev->bm_mdevid.data,
++			   NFS4_PNFS_DEVICEID4_SIZE) == 0) {
++			rv = dev->bm_mdev;
++			goto out;
++		}
++	}
++ out:
++	spin_unlock(&mid->bm_lock);
++	dprintk("%s returning %p\n", __func__, rv);
++	return rv;
++}
++
++/* Tracks info needed to ensure extents in layout obey constraints of spec */
++struct layout_verification {
++	u32 mode;	/* R or RW */
++	u64 start;	/* Expected start of next non-COW extent */
++	u64 inval;	/* Start of INVAL coverage */
++	u64 cowread;	/* End of COW read coverage */
++};
++
++/* Verify the extent meets the layout requirements of the pnfs-block draft,
++ * section 2.3.1.
++ */
++static int verify_extent(struct pnfs_block_extent *be,
++			 struct layout_verification *lv)
++{
++	if (lv->mode == IOMODE_READ) {
++		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
++		    be->be_state == PNFS_BLOCK_INVALID_DATA)
++			return -EIO;
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	}
++	/* lv->mode == IOMODE_RW */
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		if (lv->cowread > lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		lv->inval = lv->start;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
++		if (be->be_f_offset > lv->start)
++			return -EIO;
++		if (be->be_f_offset < lv->inval)
++			return -EIO;
++		if (be->be_f_offset < lv->cowread)
++			return -EIO;
++		/* It looks like you might want to min this with lv->start,
++		 * but you really don't.
++		 */
++		lv->inval = lv->inval + be->be_length;
++		lv->cowread = be->be_f_offset + be->be_length;
++		return 0;
++	} else
++		return -EIO;
++}
++
++/* XDR decode pnfs_block_layout4 structure */
++int
++nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
++			   struct nfs4_layoutget_res *lgr)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t *end = (uint32_t *)((char *)lgr->layout.buf + lgr->layout.len);
++	int i, status = -EIO;
++	uint32_t count;
++	struct pnfs_block_extent *be = NULL, *save;
++	uint64_t tmp; /* Used by READSECTOR */
++	struct layout_verification lv = {
++		.mode = lgr->range.iomode,
++		.start = lgr->range.offset >> 9,
++		.inval = lgr->range.offset >> 9,
++		.cowread = lgr->range.offset >> 9,
++	};
++
++	LIST_HEAD(extents);
++
++	BLK_READBUF(p, end, 4);
++	READ32(count);
++
++	dprintk("%s enter, number of extents %i\n", __func__, count);
++	BLK_READBUF(p, end, (28 + NFS4_PNFS_DEVICEID4_SIZE) * count);
++
++	/* Decode individual extents, putting them in temporary
++	 * staging area until whole layout is decoded to make error
++	 * recovery easier.
++	 */
++	for (i = 0; i < count; i++) {
++		be = alloc_extent();
++		if (!be) {
++			status = -ENOMEM;
++			goto out_err;
++		}
++		READ_DEVID(&be->be_devid);
++		be->be_mdev = translate_devid(lo, &be->be_devid);
++		if (!be->be_mdev)
++			goto out_err;
++		/* The next three values are read in as bytes,
++		 * but stored as 512-byte sector lengths
++		 */
++		READ_SECTOR(be->be_f_offset);
++		READ_SECTOR(be->be_length);
++		READ_SECTOR(be->be_v_offset);
++		READ32(be->be_state);
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			be->be_inval = &bl->bl_inval;
++		if (verify_extent(be, &lv)) {
++			dprintk("%s verify failed\n", __func__);
++			goto out_err;
++		}
++		list_add_tail(&be->be_node, &extents);
++	}
++	if (p != end) {
++		dprintk("%s Undecoded cruft at end of opaque\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lgr->range.offset + lgr->range.length != lv.start << 9) {
++		dprintk("%s Final length mismatch\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lv.start < lv.cowread) {
++		dprintk("%s Final uncovered COW extent\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	/* Extents decoded properly, now try to merge them in to
++	 * existing layout extents.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	list_for_each_entry_safe(be, save, &extents, be_node) {
++		list_del(&be->be_node);
++		status = add_and_merge_extent(bl, be);
++		if (status) {
++			spin_unlock(&bl->bl_ext_lock);
++			/* This is a fairly catastrophic error, as the
++			 * entire layout extent lists are now corrupted.
++			 * We should have some way to distinguish this.
++			 */
++			be = NULL;
++			goto out_err;
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	status = 0;
++ out:
++	dprintk("%s returns %i\n", __func__, status);
++	return status;
++
++ out_err:
++	put_extent(be);
++	while (!list_empty(&extents)) {
++		be = list_first_entry(&extents, struct pnfs_block_extent,
++				      be_node);
++		list_del(&be->be_node);
++		put_extent(be);
++	}
++	goto out;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-31 20:42:05.506119071 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-31 20:42:05.506119071 -0400
+@@ -0,0 +1,120 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdm.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2007 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Fred Isaman <iisaman@umich.edu>
++ *  Andy Adamson <andros@citi.umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include <linux/genhd.h> /* gendisk - used in a dprintk*/
++#include <linux/sched.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Defines used for calculating memory usage in nfs4_blk_flatten() */
++#define ARGSIZE   24    /* Max bytes needed for linear target arg string */
++#define SPECSIZE (sizeof8(struct dm_target_spec) + ARGSIZE)
++#define SPECS_PER_PAGE (PAGE_SIZE / SPECSIZE)
++#define SPEC_HEADER_ADJUST (SPECS_PER_PAGE - \
++			    (PAGE_SIZE - sizeof8(struct dm_ioctl)) / SPECSIZE)
++#define roundup8(x) (((x)+7) & ~7)
++#define sizeof8(x) roundup8(sizeof(x))
++
++static int dev_remove(dev_t dev)
++{
++	int ret = 1;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint64_t bl_dev;
++	uint32_t major = MAJOR(dev), minor = MINOR(dev);
++
++	dprintk("Entering %s\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return ret;
++
++	memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
++	memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
++				    sizeof(uint64_t));
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out;
++	}
++
++	if (reply->status == BL_DEVICE_REQUEST_PROC)
++		ret = 0; /*TODO: what to return*/
++out:
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return ret;
++}
++
++/*
++ * Release meta device
++ */
++static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
++{
++	int rv;
++
++	dprintk("%s Releasing\n", __func__);
++	/* XXX Check return? */
++	rv = nfs4_blkdev_put(bdev->bm_mdev);
++	dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);
++
++	rv = dev_remove(bdev->bm_mdev->bd_dev);
++	dprintk("%s Returns %d\n", __func__, rv);
++	return rv;
++}
++
++void free_block_dev(struct pnfs_block_dev *bdev)
++{
++	if (bdev) {
++		if (bdev->bm_mdev) {
++			dprintk("%s Removing DM device: %d:%d\n",
++				__func__,
++				MAJOR(bdev->bm_mdev->bd_dev),
++				MINOR(bdev->bm_mdev->bd_dev));
++			/* XXX Check status ?? */
++			nfs4_blk_metadev_release(bdev);
++		}
++		kfree(bdev);
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-31 20:42:05.505169618 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-31 20:42:05.505169618 -0400
+@@ -0,0 +1,302 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#ifndef FS_NFS_NFS4BLOCKLAYOUT_H
++#define FS_NFS_NFS4BLOCKLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/dm-ioctl.h> /* Needed for struct dm_ioctl*/
++
++#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
++
++#define PG_pnfserr PG_owner_priv_1
++#define PagePnfsErr(page)	test_bit(PG_pnfserr, &(page)->flags)
++#define SetPagePnfsErr(page)	set_bit(PG_pnfserr, &(page)->flags)
++#define ClearPagePnfsErr(page)	clear_bit(PG_pnfserr, &(page)->flags)
++
++extern int dm_dev_create(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_dev_remove(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_do_resume(struct dm_ioctl *param);
++extern int dm_table_load(struct dm_ioctl *param, size_t param_size);
++
++struct block_mount_id {
++	spinlock_t			bm_lock;    /* protects list */
++	struct list_head		bm_devlist; /* holds pnfs_block_dev */
++};
++
++struct pnfs_block_dev {
++	struct list_head		bm_node;
++	struct pnfs_deviceid		bm_mdevid;    /* associated devid */
++	struct block_device		*bm_mdev;     /* meta device itself */
++};
++
++/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
++struct visible_block_device {
++	struct list_head	vi_node;
++	struct block_device	*vi_bdev;
++	int			vi_mapped;
++	int			vi_put_done;
++};
++
++enum blk_vol_type {
++	PNFS_BLOCK_VOLUME_SIMPLE   = 0,	/* maps to a single LU */
++	PNFS_BLOCK_VOLUME_SLICE    = 1,	/* slice of another volume */
++	PNFS_BLOCK_VOLUME_CONCAT   = 2,	/* concatenation of multiple volumes */
++	PNFS_BLOCK_VOLUME_STRIPE   = 3	/* striped across multiple volumes */
++};
++
++/* All disk offset/lengths are stored in 512-byte sectors */
++struct pnfs_blk_volume {
++	uint32_t		bv_type;
++	sector_t 		bv_size;
++	struct pnfs_blk_volume 	**bv_vols;
++	int 			bv_vol_n;
++	union {
++		dev_t			bv_dev;
++		sector_t		bv_stripe_unit;
++		sector_t 		bv_offset;
++	};
++};
++
++/* Since components need not be aligned, cannot use sector_t */
++struct pnfs_blk_sig_comp {
++	int64_t 	bs_offset;  /* In bytes */
++	uint32_t   	bs_length;  /* In bytes */
++	char 		*bs_string;
++};
++
++/* Maximum number of signatures components in a simple volume */
++# define PNFS_BLOCK_MAX_SIG_COMP 16
++
++struct pnfs_blk_sig {
++	int 				si_num_comps;
++	struct pnfs_blk_sig_comp	si_comps[PNFS_BLOCK_MAX_SIG_COMP];
++};
++
++enum exstate4 {
++	PNFS_BLOCK_READWRITE_DATA	= 0,
++	PNFS_BLOCK_READ_DATA		= 1,
++	PNFS_BLOCK_INVALID_DATA		= 2, /* mapped, but data is invalid */
++	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
++};
++
++#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
++
++struct my_tree_t {
++	sector_t		mtt_step_size;	/* Internal sector alignment */
++	struct list_head	mtt_stub; /* Should be a radix tree */
++};
++
++struct pnfs_inval_markings {
++	spinlock_t	im_lock;
++	struct my_tree_t im_tree;	/* Sectors that need LAYOUTCOMMIT */
++	sector_t	im_block_size;	/* Server blocksize in sectors */
++};
++
++struct pnfs_inval_tracking {
++	struct list_head it_link;
++	int		 it_sector;
++	int		 it_tags;
++};
++
++/* sector_t fields are all in 512-byte sectors */
++struct pnfs_block_extent {
++	struct kref	be_refcnt;
++	struct list_head be_node;	/* link into lseg list */
++	struct pnfs_deviceid be_devid;  /* STUB - remevable??? */
++	struct block_device *be_mdev;
++	sector_t	be_f_offset;	/* the starting offset in the file */
++	sector_t	be_length;	/* the size of the extent */
++	sector_t	be_v_offset;	/* the starting offset in the volume */
++	enum exstate4	be_state;	/* the state of this extent */
++	struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */
++};
++
++/* Shortened extent used by LAYOUTCOMMIT */
++struct pnfs_block_short_extent {
++	struct list_head bse_node;
++	struct pnfs_deviceid bse_devid;	/* STUB - removable??? */
++	struct block_device *bse_mdev;
++	sector_t	bse_f_offset;	/* the starting offset in the file */
++	sector_t	bse_length;	/* the size of the extent */
++};
++
++static inline void
++INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
++{
++	spin_lock_init(&marks->im_lock);
++	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
++	marks->im_block_size = blocksize;
++	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
++					   blocksize);
++}
++
++enum extentclass4 {
++	RW_EXTENT	= 0, /* READWRTE and INVAL */
++	RO_EXTENT	= 1, /* READ and NONE */
++	EXTENT_LISTS	= 2,
++};
++
++static inline int choose_list(enum exstate4 state)
++{
++	if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA)
++		return RO_EXTENT;
++	else
++		return RW_EXTENT;
++}
++
++struct pnfs_block_layout {
++	struct pnfs_layout_hdr bl_layout;
++	struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */
++	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
++	struct list_head	bl_extents[EXTENT_LISTS]; /* R and RW extents */
++	struct list_head	bl_commit;	/* Needs layout commit */
++	unsigned int		bl_count;	/* entries in bl_commit */
++	sector_t		bl_blocksize;  /* Server blocksize in sectors */
++};
++
++/* this struct is comunicated between:
++ * bl_setup_layoutcommit && bl_encode_layoutcommit && bl_cleanup_layoutcommit
++ */
++struct bl_layoutupdate_data {
++	struct list_head ranges;
++};
++
++#define BLK_ID(lo) ((struct block_mount_id *)(PNFS_NFS_SERVER(lo)->pnfs_ld_data))
++
++static inline struct pnfs_block_layout *
++BLK_LO2EXT(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct pnfs_block_layout, bl_layout);
++}
++
++static inline struct pnfs_block_layout *
++BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
++{
++	return BLK_LO2EXT(lseg->layout);
++}
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
++
++#define BLK_READBUF(p, e, nbytes)  do { \
++	p = blk_overflow(p, e, nbytes); \
++	if (!p) { \
++		printk(KERN_WARNING \
++			"%s: reply buffer overflowed in line %d.\n", \
++			__func__, __LINE__); \
++		goto out_err; \
++	} \
++} while (0)
++
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {                  \
++	(x) = (uint64_t)ntohl(*p++) << 32;           \
++	(x) |= ntohl(*p++);                     \
++} while (0)
++#define COPYMEM(x, nbytes) do {                 \
++	memcpy((x), p, nbytes);                 \
++	p += XDR_QUADLEN(nbytes);               \
++} while (0)
++#define READ_DEVID(x)	COPYMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++#define READ_SECTOR(x)     do { \
++	READ64(tmp); \
++	if (tmp & 0x1ff) { \
++		printk(KERN_WARNING \
++		       "%s Value not 512-byte aligned at line %d\n", \
++		       __func__, __LINE__);			     \
++		goto out_err; \
++	} \
++	(x) = tmp >> 9; \
++} while (0)
++
++#define WRITE32(n)               do { \
++	*p++ = htonl(n); \
++	} while (0)
++#define WRITE64(n)               do {                           \
++	*p++ = htonl((uint32_t)((n) >> 32));			\
++	*p++ = htonl((uint32_t)(n));				\
++} while (0)
++#define WRITEMEM(ptr, nbytes)     do {                          \
++	p = xdr_encode_opaque_fixed(p, ptr, nbytes);	\
++} while (0)
++#define WRITE_DEVID(x)  WRITEMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++
++/* blocklayoutdev.c */
++struct block_device *nfs4_blkdev_get(dev_t dev);
++int nfs4_blkdev_put(struct block_device *bdev);
++struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
++					      struct pnfs_device *dev,
++					      struct list_head *sdlist);
++int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
++			       struct nfs4_layoutget_res *lgr);
++int nfs4_blk_create_block_disk_list(struct list_head *);
++void nfs4_blk_destroy_disk_list(struct list_head *);
++/* blocklayoutdm.c */
++int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
++void free_block_dev(struct pnfs_block_dev *bdev);
++/* extents.c */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++		struct pnfs_block_extent **cow_read);
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages);
++void put_extent(struct pnfs_block_extent *be);
++struct pnfs_block_extent *alloc_extent(void);
++struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
++int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   struct xdr_stream *xdr,
++				   const struct nfs4_layoutcommit_args *arg);
++void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   const struct nfs4_layoutcommit_args *arg,
++				   int status);
++int add_and_merge_extent(struct pnfs_block_layout *bl,
++			 struct pnfs_block_extent *new);
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length);
++
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++extern struct pipefs_list bl_device_list;
++extern struct dentry *bl_device_pipe;
++
++int bl_pipe_init(void);
++void bl_pipe_exit(void);
++
++#define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
++#define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
++#define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
++#define BL_DEVICE_REQUEST_PROC         0x1 /* User level process succeeds */
++#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
++
++#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-31 20:42:05.507113260 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-31 20:42:05.508119925 -0400
+@@ -0,0 +1,948 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include "blocklayout.h"
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Bit numbers */
++#define EXTENT_INITIALIZED 0
++#define EXTENT_WRITTEN     1
++#define EXTENT_IN_COMMIT   2
++#define INTERNAL_EXISTS    MY_MAX_TAGS
++#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
++
++/* Returns largest t<=s s.t. t%base==0 */
++static inline sector_t normalize(sector_t s, int base)
++{
++	sector_t tmp = s; /* Since do_div modifies its argument */
++	return s - do_div(tmp, base);
++}
++
++static inline sector_t normalize_up(sector_t s, int base)
++{
++	return normalize(s + base - 1, base);
++}
++
++/* Complete stub using list while determine API wanted */
++
++/* Returns tags, or negative */
++static int32_t _find_entry(struct my_tree_t *tree, u64 s)
++{
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu) enter\n", __func__, s);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s)
++			return pos->it_tags & INTERNAL_MASK;
++		else
++			break;
++	}
++	return -ENOENT;
++}
++
++static inline
++int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
++{
++	int32_t tags;
++
++	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
++	s = normalize(s, tree->mtt_step_size);
++	tags = _find_entry(tree, s);
++	if ((tags < 0) || !(tags & (1 << tag)))
++		return 0;
++	else
++		return 1;
++}
++
++/* Creates entry with tag, or if entry already exists, unions tag to it.
++ * If storage is not NULL, newly created entry will use it.
++ * Returns number of entries added, or negative on error.
++ */
++static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
++		      struct pnfs_inval_tracking *storage)
++{
++	int found = 0;
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s) {
++			found = 1;
++			break;
++		} else
++			break;
++	}
++	if (found) {
++		pos->it_tags |= (1 << tag);
++		return 0;
++	} else {
++		struct pnfs_inval_tracking *new;
++		if (storage)
++			new = storage;
++		else {
++			new = kmalloc(sizeof(*new), GFP_KERNEL);
++			if (!new)
++				return -ENOMEM;
++		}
++		new->it_sector = s;
++		new->it_tags = (1 << tag);
++		list_add(&new->it_link, &pos->it_link);
++		return 1;
++	}
++}
++
++/* XXXX Really want option to not create */
++/* Over range, unions tag with existing entries, else creates entry with tag */
++static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
++{
++	u64 i;
++
++	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
++	for (i = normalize(s, tree->mtt_step_size); i < s + length;
++	     i += tree->mtt_step_size)
++		if (_add_entry(tree, i, tag, NULL))
++			return -ENOMEM;
++	return 0;
++}
++
++/* Ensure that future operations on given range of tree will not malloc */
++static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
++{
++	u64 start, end, s;
++	int count, i, used = 0, status = -ENOMEM;
++	struct pnfs_inval_tracking **storage;
++
++	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
++	start = normalize(offset, tree->mtt_step_size);
++	end = normalize_up(offset + length, tree->mtt_step_size);
++	count = (int)(end - start) / (int)tree->mtt_step_size;
++
++	/* Pre-malloc what memory we might need */
++	storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
++	if (!storage)
++		return -ENOMEM;
++	for (i = 0; i < count; i++) {
++		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
++				     GFP_KERNEL);
++		if (!storage[i])
++			goto out_cleanup;
++	}
++
++	/* Now need lock - HOW??? */
++
++	for (s = start; s < end; s += tree->mtt_step_size)
++		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
++
++	/* Unlock - HOW??? */
++	status = 0;
++
++ out_cleanup:
++	for (i = used; i < count; i++) {
++		if (!storage[i])
++			break;
++		kfree(storage[i]);
++	}
++	kfree(storage);
++	return status;
++}
++
++static void set_needs_init(sector_t *array, sector_t offset)
++{
++	sector_t *p = array;
++
++	dprintk("%s enter\n", __func__);
++	if (!p)
++		return;
++	while (*p < offset)
++		p++;
++	if (*p == offset)
++		return;
++	else if (*p == ~0) {
++		*p++ = offset;
++		*p = ~0;
++		return;
++	} else {
++		sector_t *save = p;
++		dprintk("%s Adding %llu\n", __func__, (u64)offset);
++		while (*p != ~0)
++			p++;
++		p++;
++		memmove(save + 1, save, (char *)p - (char *)save);
++		*save = offset;
++		return;
++	}
++}
++
++/* We are relying on page lock to serialize this */
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Assume start, end already sector aligned */
++static int
++_range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
++{
++	struct pnfs_inval_tracking *pos;
++	u64 expect = 0;
++
++	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector >= end)
++			continue;
++		if (!expect) {
++			if ((pos->it_sector == end - tree->mtt_step_size) &&
++			    (pos->it_tags & (1 << tag))) {
++				expect = pos->it_sector - tree->mtt_step_size;
++				if (expect < start)
++					return 1;
++				continue;
++			} else {
++				return 0;
++			}
++		}
++		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
++			return 0;
++		expect -= tree->mtt_step_size;
++		if (expect < start)
++			return 1;
++	}
++	return 0;
++}
++
++static int is_range_written(struct pnfs_inval_markings *marks,
++			    sector_t start, sector_t end)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Marks sectors in [offest, offset_length) as having been initialized.
++ * All lengths are step-aligned, where step is min(pagesize, blocksize).
++ * Notes where partial block is initialized, and helps prepare it for
++ * complete initialization later.
++ */
++/* Currently assumes offset is page-aligned */
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages)
++{
++	sector_t s, start, end;
++	sector_t *array = NULL; /* Pages to mark */
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n",
++		__func__, (u64)offset, (u64)length);
++	s = max((sector_t) 3,
++		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
++	dprintk("%s set max=%llu\n", __func__, (u64)s);
++	if (pages) {
++		array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
++		if (!array)
++			goto outerr;
++		array[0] = ~0;
++	}
++
++	start = normalize(offset, marks->im_block_size);
++	end = normalize_up(offset + length, marks->im_block_size);
++	if (_preload_range(&marks->im_tree, start, end - start))
++		goto outerr;
++
++	spin_lock(&marks->im_lock);
++
++	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
++	     s < offset; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s pre-area pages\n", __func__);
++		/* Portion of used block is not initialized */
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
++		goto out_unlock;
++	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
++	     s < end; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s post-area pages\n", __func__);
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++
++	spin_unlock(&marks->im_lock);
++
++	if (pages) {
++		if (array[0] == ~0) {
++			kfree(array);
++			*pages = NULL;
++		} else
++			*pages = array;
++	}
++	return 0;
++
++ out_unlock:
++	spin_unlock(&marks->im_lock);
++ outerr:
++	if (pages) {
++		kfree(array);
++		*pages = NULL;
++	}
++	return -ENOMEM;
++}
++
++/* Marks sectors in [offest, offset+length) as having been written to disk.
++ * All lengths should be block aligned.
++ */
++int mark_written_sectors(struct pnfs_inval_markings *marks,
++			 sector_t offset, sector_t length)
++{
++	int status;
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
++		(u64)offset, (u64)length);
++	spin_lock(&marks->im_lock);
++	status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
++	spin_unlock(&marks->im_lock);
++	return status;
++}
++
++static void print_short_extent(struct pnfs_block_short_extent *be)
++{
++	dprintk("PRINT SHORT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->bse_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->bse_length);
++	}
++}
++
++void print_clist(struct list_head *list, unsigned int count)
++{
++	struct pnfs_block_short_extent *be;
++	unsigned int i = 0;
++
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, bse_node) {
++		i++;
++		print_short_extent(be);
++	}
++	if (i != count)
++		dprintk("\n\nExpected %u entries\n\n\n", count);
++	dprintk("****************\n");
++}
++
++/* Note: In theory, we should do more checking that devid's match between
++ * old and new, but if they don't, the lists are too corrupt to salvage anyway.
++ */
++/* Note this is very similar to add_and_merge_extent */
++static void add_to_commitlist(struct pnfs_block_layout *bl,
++			      struct pnfs_block_short_extent *new)
++{
++	struct list_head *clist = &bl->bl_commit;
++	struct pnfs_block_short_extent *old, *save;
++	sector_t end = new->bse_f_offset + new->bse_length;
++
++	dprintk("%s enter\n", __func__);
++	print_short_extent(new);
++	print_clist(clist, bl->bl_count);
++	bl->bl_count++;
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe(old, save, clist, bse_node) {
++		if (new->bse_f_offset < old->bse_f_offset)
++			break;
++		if (end <= old->bse_f_offset + old->bse_length) {
++			/* Range is already in list */
++			bl->bl_count--;
++			kfree(new);
++			return;
++		} else if (new->bse_f_offset <=
++				old->bse_f_offset + old->bse_length) {
++			/* new overlaps or abuts existing be */
++			if (new->bse_mdev == old->bse_mdev) {
++				/* extend new to fully replace old */
++				new->bse_length += new->bse_f_offset -
++						old->bse_f_offset;
++				new->bse_f_offset = old->bse_f_offset;
++				list_del(&old->bse_node);
++				bl->bl_count--;
++				kfree(old);
++			}
++		}
++	}
++	/* Note that if we never hit the above break, old will not point to a
++	 * valid extent.  However, in that case &old->bse_node==list.
++	 */
++	list_add_tail(&new->bse_node, &old->bse_node);
++	/* Scan forward for overlaps.  If we find any, extend new and
++	 * remove the overlapped extent.
++	 */
++	old = list_prepare_entry(new, clist, bse_node);
++	list_for_each_entry_safe_continue(old, save, clist, bse_node) {
++		if (end < old->bse_f_offset)
++			break;
++		/* new overlaps or abuts old */
++		if (new->bse_mdev == old->bse_mdev) {
++			if (end < old->bse_f_offset + old->bse_length) {
++				/* extend new to fully cover old */
++				end = old->bse_f_offset + old->bse_length;
++				new->bse_length = end - new->bse_f_offset;
++			}
++			list_del(&old->bse_node);
++			bl->bl_count--;
++			kfree(old);
++		}
++	}
++	dprintk("%s: after merging\n", __func__);
++	print_clist(clist, bl->bl_count);
++}
++
++/* Note the range described by offset, length is guaranteed to be contained
++ * within be.
++ */
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length)
++{
++	sector_t new_end, end = offset + length;
++	struct pnfs_block_short_extent *new;
++	struct pnfs_block_layout *bl = container_of(be->be_inval,
++						    struct pnfs_block_layout,
++						    bl_inval);
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return -ENOMEM;
++
++	mark_written_sectors(be->be_inval, offset, length);
++	/* We want to add the range to commit list, but it must be
++	 * block-normalized, and verified that the normalized range has
++	 * been entirely written to disk.
++	 */
++	new->bse_f_offset = offset;
++	offset = normalize(offset, bl->bl_blocksize);
++	if (offset < new->bse_f_offset) {
++		if (is_range_written(be->be_inval, offset, new->bse_f_offset))
++			new->bse_f_offset = offset;
++		else
++			new->bse_f_offset = offset + bl->bl_blocksize;
++	}
++	new_end = normalize_up(end, bl->bl_blocksize);
++	if (end < new_end) {
++		if (is_range_written(be->be_inval, end, new_end))
++			end = new_end;
++		else
++			end = new_end - bl->bl_blocksize;
++	}
++	if (end <= new->bse_f_offset) {
++		kfree(new);
++		return 0;
++	}
++	new->bse_length = end - new->bse_f_offset;
++	new->bse_devid = be->be_devid;
++	new->bse_mdev = be->be_mdev;
++
++	spin_lock(&bl->bl_ext_lock);
++	/* new will be freed, either by add_to_commitlist if it decides not
++	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
++	 */
++	add_to_commitlist(bl, new);
++	spin_unlock(&bl->bl_ext_lock);
++	return 0;
++}
++
++static void print_bl_extent(struct pnfs_block_extent *be)
++{
++	dprintk("PRINT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->be_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->be_length);
++		dprintk("        be_v_offset %llu\n", (u64)be->be_v_offset);
++		dprintk("        be_state    %d\n", be->be_state);
++	}
++}
++
++static void
++destroy_extent(struct kref *kref)
++{
++	struct pnfs_block_extent *be;
++
++	be = container_of(kref, struct pnfs_block_extent, be_refcnt);
++	dprintk("%s be=%p\n", __func__, be);
++	kfree(be);
++}
++
++void
++put_extent(struct pnfs_block_extent *be)
++{
++	if (be) {
++		dprintk("%s enter %p (%i)\n", __func__, be,
++			atomic_read(&be->be_refcnt.refcount));
++		kref_put(&be->be_refcnt, destroy_extent);
++	}
++}
++
++struct pnfs_block_extent *alloc_extent(void)
++{
++	struct pnfs_block_extent *be;
++
++	be = kmalloc(sizeof(struct pnfs_block_extent), GFP_KERNEL);
++	if (!be)
++		return NULL;
++	INIT_LIST_HEAD(&be->be_node);
++	kref_init(&be->be_refcnt);
++	be->be_inval = NULL;
++	return be;
++}
++
++struct pnfs_block_extent *
++get_extent(struct pnfs_block_extent *be)
++{
++	if (be)
++		kref_get(&be->be_refcnt);
++	return be;
++}
++
++void print_elist(struct list_head *list)
++{
++	struct pnfs_block_extent *be;
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, be_node) {
++		print_bl_extent(be);
++	}
++	dprintk("****************\n");
++}
++
++static inline int
++extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new)
++{
++	/* Note this assumes new->be_f_offset >= old->be_f_offset */
++	return (new->be_state == old->be_state) &&
++		((new->be_state == PNFS_BLOCK_NONE_DATA) ||
++		 ((new->be_v_offset - old->be_v_offset ==
++		   new->be_f_offset - old->be_f_offset) &&
++		  new->be_mdev == old->be_mdev));
++}
++
++/* Adds new to appropriate list in bl, modifying new and removing existing
++ * extents as appropriate to deal with overlaps.
++ *
++ * See find_get_extent for list constraints.
++ *
++ * Refcount on new is already set.  If end up not using it, or error out,
++ * need to put the reference.
++ *
++ * Lock is held by caller.
++ */
++int
++add_and_merge_extent(struct pnfs_block_layout *bl,
++		     struct pnfs_block_extent *new)
++{
++	struct pnfs_block_extent *be, *tmp;
++	sector_t end = new->be_f_offset + new->be_length;
++	struct list_head *list;
++
++	dprintk("%s enter with be=%p\n", __func__, new);
++	print_bl_extent(new);
++	list = &bl->bl_extents[choose_list(new->be_state)];
++	print_elist(list);
++
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
++		if (new->be_f_offset >= be->be_f_offset + be->be_length)
++			break;
++		if (new->be_f_offset >= be->be_f_offset) {
++			if (end <= be->be_f_offset + be->be_length) {
++				/* new is a subset of existing be*/
++				if (extents_consistent(be, new)) {
++					dprintk("%s: new is subset, ignoring\n",
++						__func__);
++					put_extent(new);
++					return 0;
++				} else {
++					goto out_err;
++				}
++			} else {
++				/* |<--   be   -->|
++				 *          |<--   new   -->| */
++				if (extents_consistent(be, new)) {
++					/* extend new to fully replace be */
++					new->be_length += new->be_f_offset -
++						be->be_f_offset;
++					new->be_f_offset = be->be_f_offset;
++					new->be_v_offset = be->be_v_offset;
++					dprintk("%s: removing %p\n", __func__, be);
++					list_del(&be->be_node);
++					put_extent(be);
++				} else {
++					goto out_err;
++				}
++			}
++		} else if (end >= be->be_f_offset + be->be_length) {
++			/* new extent overlap existing be */
++			if (extents_consistent(be, new)) {
++				/* extend new to fully replace be */
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		} else if (end > be->be_f_offset) {
++			/*           |<--   be   -->|
++			 *|<--   new   -->| */
++			if (extents_consistent(new, be)) {
++				/* extend new to fully replace be */
++				new->be_length += be->be_f_offset + be->be_length -
++					new->be_f_offset - new->be_length;
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		}
++	}
++	/* Note that if we never hit the above break, be will not point to a
++	 * valid extent.  However, in that case &be->be_node==list.
++	 */
++	list_add(&new->be_node, &be->be_node);
++	dprintk("%s: inserting new\n", __func__);
++	print_elist(list);
++	/* STUB - The per-list consistency checks have all been done,
++	 * should now check cross-list consistency.
++	 */
++	return 0;
++
++ out_err:
++	put_extent(new);
++	return -EIO;
++}
++
++/* Returns extent, or NULL.  If a second READ extent exists, it is returned
++ * in cow_read, if given.
++ *
++ * The extents are kept in two seperate ordered lists, one for READ and NONE,
++ * one for READWRITE and INVALID.  Within each list, we assume:
++ * 1. Extents are ordered by file offset.
++ * 2. For any given isect, there is at most one extents that matches.
++ */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++	    struct pnfs_block_extent **cow_read)
++{
++	struct pnfs_block_extent *be, *cow, *ret;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	cow = ret = NULL;
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret &&
++		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				if (!ret)
++					ret = be;
++				else if (be->be_state != PNFS_BLOCK_READ_DATA)
++					put_extent(be);
++				else
++					cow = be;
++				break;
++			}
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	if (cow_read)
++		*cow_read = cow;
++	print_bl_extent(ret);
++	return ret;
++}
++
++/* Similar to find_get_extent, but called with lock held, and ignores cow */
++static struct pnfs_block_extent *
++find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
++{
++	struct pnfs_block_extent *be, *ret = NULL;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret)
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				ret = be;
++				break;
++			}
++		}
++	}
++	print_bl_extent(ret);
++	return ret;
++}
++
++int
++encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			       struct xdr_stream *xdr,
++			       const struct nfs4_layoutcommit_args *arg)
++{
++	sector_t start, end;
++	struct pnfs_block_short_extent *lce, *save;
++	unsigned int count = 0;
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct list_head *ranges = &bld->ranges;
++	__be32 *p, *xdr_start;
++
++	dprintk("%s enter\n", __func__);
++	start = arg->range.offset >> 9;
++	end = start + (arg->range.length >> 9);
++	dprintk("%s set start=%llu, end=%llu\n",
++		__func__, (u64)start, (u64)end);
++
++	/* BUG - creation of bl_commit is buggy - need to wait for
++	 * entire block to be marked WRITTEN before it can be added.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	/* Want to adjust for possible truncate */
++	/* We now want to adjust argument range */
++
++	/* XDR encode the ranges found */
++	xdr_start = xdr_reserve_space(xdr, 8);
++	if (!xdr_start)
++		goto out;
++	list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) {
++		p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data));
++		if (!p)
++			break;
++		WRITE_DEVID(&lce->bse_devid);
++		WRITE64(lce->bse_f_offset << 9);
++		WRITE64(lce->bse_length << 9);
++		WRITE64(0LL);
++		WRITE32(PNFS_BLOCK_READWRITE_DATA);
++		list_del(&lce->bse_node);
++		list_add_tail(&lce->bse_node, ranges);
++		bl->bl_count--;
++		count++;
++	}
++	xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
++	xdr_start[1] = cpu_to_be32(count);
++out:
++	spin_unlock(&bl->bl_ext_lock);
++	dprintk("%s found %i ranges\n", __func__, count);
++	return 0;
++}
++
++/* Helper function to set_to_rw that initialize a new extent */
++static void
++_prep_new_extent(struct pnfs_block_extent *new,
++		 struct pnfs_block_extent *orig,
++		 sector_t offset, sector_t length, int state)
++{
++	kref_init(&new->be_refcnt);
++	/* don't need to INIT_LIST_HEAD(&new->be_node) */
++	memcpy(&new->be_devid, &orig->be_devid, sizeof(struct pnfs_deviceid));
++	new->be_mdev = orig->be_mdev;
++	new->be_f_offset = offset;
++	new->be_length = length;
++	new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset;
++	new->be_state = state;
++	new->be_inval = orig->be_inval;
++}
++
++/* Tries to merge be with extent in front of it in list.
++ * Frees storage if not used.
++ */
++static struct pnfs_block_extent *
++_front_merge(struct pnfs_block_extent *be, struct list_head *head,
++	     struct pnfs_block_extent *storage)
++{
++	struct pnfs_block_extent *prev;
++
++	if (!storage)
++		goto no_merge;
++	if (&be->be_node == head || be->be_node.prev == head)
++		goto no_merge;
++	prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node);
++	if ((prev->be_f_offset + prev->be_length != be->be_f_offset) ||
++	    !extents_consistent(prev, be))
++		goto no_merge;
++	_prep_new_extent(storage, prev, prev->be_f_offset,
++			 prev->be_length + be->be_length, prev->be_state);
++	list_replace(&prev->be_node, &storage->be_node);
++	put_extent(prev);
++	list_del(&be->be_node);
++	put_extent(be);
++	return storage;
++
++ no_merge:
++	kfree(storage);
++	return be;
++}
++
++static u64
++set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length)
++{
++	u64 rv = offset + length;
++	struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old;
++	struct pnfs_block_extent *children[3];
++	struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL;
++	int i = 0, j;
++
++	dprintk("%s(%llu, %llu)\n", __func__, offset, length);
++	/* Create storage for up to three new extents e1, e2, e3 */
++	e1 = kmalloc(sizeof(*e1), GFP_KERNEL);
++	e2 = kmalloc(sizeof(*e2), GFP_KERNEL);
++	e3 = kmalloc(sizeof(*e3), GFP_KERNEL);
++	/* BUG - we are ignoring any failure */
++	if (!e1 || !e2 || !e3)
++		goto out_nosplit;
++
++	spin_lock(&bl->bl_ext_lock);
++	be = find_get_extent_locked(bl, offset);
++	rv = be->be_f_offset + be->be_length;
++	if (be->be_state != PNFS_BLOCK_INVALID_DATA) {
++		spin_unlock(&bl->bl_ext_lock);
++		goto out_nosplit;
++	}
++	/* Add e* to children, bumping e*'s krefs */
++	if (be->be_f_offset != offset) {
++		_prep_new_extent(e1, be, be->be_f_offset,
++				 offset - be->be_f_offset,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e1;
++		print_bl_extent(e1);
++	} else
++		merge1 = e1;
++	_prep_new_extent(e2, be, offset,
++			 min(length, be->be_f_offset + be->be_length - offset),
++			 PNFS_BLOCK_READWRITE_DATA);
++	children[i++] = e2;
++	print_bl_extent(e2);
++	if (offset + length < be->be_f_offset + be->be_length) {
++		_prep_new_extent(e3, be, e2->be_f_offset + e2->be_length,
++				 be->be_f_offset + be->be_length -
++				 offset - length,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e3;
++		print_bl_extent(e3);
++	} else
++		merge2 = e3;
++
++	/* Remove be from list, and insert the e* */
++	/* We don't get refs on e*, since this list is the base reference
++	 * set when init'ed.
++	 */
++	if (i < 3)
++		children[i] = NULL;
++	new = children[0];
++	list_replace(&be->be_node, &new->be_node);
++	put_extent(be);
++	new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1);
++	for (j = 1; j < i; j++) {
++		old = new;
++		new = children[j];
++		list_add(&new->be_node, &old->be_node);
++	}
++	if (merge2) {
++		/* This is a HACK, should just create a _back_merge function */
++		new = list_entry(new->be_node.next,
++				 struct pnfs_block_extent, be_node);
++		new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2);
++	}
++	spin_unlock(&bl->bl_ext_lock);
++
++	/* Since we removed the base reference above, be is now scheduled for
++	 * destruction.
++	 */
++	put_extent(be);
++	dprintk("%s returns %llu after split\n", __func__, rv);
++	return rv;
++
++ out_nosplit:
++	kfree(e1);
++	kfree(e2);
++	kfree(e3);
++	dprintk("%s returns %llu without splitting\n", __func__, rv);
++	return rv;
++}
++
++void
++clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			      const struct nfs4_layoutcommit_args *arg,
++			      int status)
++{
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct pnfs_block_short_extent *lce, *save;
++
++	dprintk("%s status %d\n", __func__, status);
++	list_for_each_entry_safe_reverse(lce, save, &bld->ranges, bse_node) {
++		if (likely(!status)) {
++			u64 offset = lce->bse_f_offset;
++			u64 end = offset + lce->bse_length;
++
++			do {
++				offset = set_to_rw(bl, offset, end - offset);
++			} while (offset < end);
++
++			kfree(lce);
++		} else {
++			spin_lock(&bl->bl_ext_lock);
++			add_to_commitlist(bl, lce);
++			spin_unlock(&bl->bl_ext_lock);
++		}
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-31 20:42:05.502212803 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-31 20:42:05.502212803 -0400
+@@ -0,0 +1,6 @@
++#
++# Makefile for the pNFS block layout driver kernel module
++#
++obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
++blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o \
++			extents.o block-device-discovery-pipe.o
+diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
+--- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-31 20:42:05.508119925 -0400
+@@ -111,6 +111,13 @@ extern int nfs41_validate_delegation_sta
+ 
+ #define RCA4_TYPE_MASK_RDATA_DLG	0
+ #define RCA4_TYPE_MASK_WDATA_DLG	1
++#define RCA4_TYPE_MASK_DIR_DLG         2
++#define RCA4_TYPE_MASK_FILE_LAYOUT     3
++#define RCA4_TYPE_MASK_BLK_LAYOUT      4
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN  8
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+ 
+ struct cb_recallanyargs {
+ 	struct sockaddr	*craa_addr;
+@@ -127,6 +134,39 @@ struct cb_recallslotargs {
+ extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ 					  void *dummy);
+ 
++struct cb_layoutrecallargs {
++	struct sockaddr		*cbl_addr;
++	struct nfs_fh		cbl_fh;
++	struct pnfs_layout_range cbl_seg;
++	struct nfs_fsid		cbl_fsid;
++	uint32_t		cbl_recall_type;
++	uint32_t		cbl_layout_type;
++	uint32_t		cbl_layoutchanged;
++	nfs4_stateid		cbl_stateid;
++};
++
++extern unsigned nfs4_callback_layoutrecall(
++	struct cb_layoutrecallargs *args,
++	void *dummy);
++
++struct cb_devicenotifyitem {
++	uint32_t		cbd_notify_type;
++	uint32_t		cbd_layout_type;
++	struct pnfs_deviceid	cbd_dev_id;
++	uint32_t		cbd_immediate;
++};
++
++/* XXX: Should be dynamic up to max compound size */
++#define NFS4_DEV_NOTIFY_MAXENTRIES 10
++struct cb_devicenotifyargs {
++	struct sockaddr			*addr;
++	int				 ndevs;
++	struct cb_devicenotifyitem	 devs[NFS4_DEV_NOTIFY_MAXENTRIES];
++};
++
++extern unsigned nfs4_callback_devicenotify(
++	struct cb_devicenotifyargs *args,
++	void *dummy);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
+--- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-31 20:42:05.509093330 -0400
+@@ -8,10 +8,15 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/slab.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/writeback.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #ifdef NFS_DEBUG
+ #define NFSDBG_FACILITY NFSDBG_CALLBACK
+@@ -62,16 +67,6 @@ out:
+ 	return res->status;
+ }
+ 
+-static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
+-{
+-#if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion > 0)
+-		return nfs41_validate_delegation_stateid;
+-#endif
+-	return nfs4_validate_delegation_stateid;
+-}
+-
+-
+ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+@@ -92,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_re
+ 		inode = nfs_delegation_find_inode(clp, &args->fh);
+ 		if (inode != NULL) {
+ 			/* Set up a helper thread to actually return the delegation */
+-			switch (nfs_async_inode_return_delegation(inode, &args->stateid,
+-								  nfs_validate_delegation_stateid(clp))) {
++			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ 				case 0:
+ 					res = 0;
+ 					break;
+@@ -116,24 +110,364 @@ out:
+ 
+ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+-	if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
+-					 sizeof(delegation->stateid.data)) != 0)
++	if (delegation == NULL || memcmp(delegation->stateid.u.data,
++					 stateid->u.data,
++					 sizeof(delegation->stateid.u.data)))
+ 		return 0;
+ 	return 1;
+ }
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static bool
++pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
++			    const nfs4_stateid stateid)
++{
++	int seqlock;
++	bool res;
++	u32 oldseqid, newseqid;
++
++	do {
++		seqlock = read_seqbegin(&lo->seqlock);
++		oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
++		newseqid = be32_to_cpu(stateid.u.stateid.seqid);
++		res = !memcmp(lo->stateid.u.stateid.other,
++			      stateid.u.stateid.other,
++			      NFS4_STATEID_OTHER_SIZE);
++		if (res) { /* comparing layout stateids */
++			if (oldseqid == ~0)
++				res = (newseqid == 1);
++			else
++				res = (newseqid == oldseqid + 1);
++		} else { /* open stateid */
++			res = !memcmp(lo->stateid.u.data,
++				      &zero_stateid,
++				      NFS4_STATEID_SIZE);
++			if (res)
++				res = (newseqid == 1);
++		}
++	} while (read_seqretry(&lo->seqlock, seqlock));
++
++	return res;
++}
++
++/*
++ * Retrieve an inode based on layout recall parameters
++ *
++ * Note: caller must iput(inode) to dereference the inode.
++ */
++static struct inode *
++nfs_layoutrecall_find_inode(struct nfs_client *clp,
++			    const struct cb_layoutrecallargs *args)
++{
++	struct nfs_inode *nfsi;
++	struct pnfs_layout_hdr *lo;
++	struct nfs_server *server;
++	struct inode *ino = NULL;
++
++	dprintk("%s: Begin recall_type=%d clp %p\n",
++		__func__, args->cbl_recall_type, clp);
++
++	spin_lock(&clp->cl_lock);
++	list_for_each_entry(lo, &clp->cl_layouts, layouts) {
++		nfsi = PNFS_NFS_INODE(lo);
++		if (!nfsi)
++			continue;
++
++		dprintk("%s: Searching inode=%lu\n",
++			__func__, nfsi->vfs_inode.i_ino);
++
++		if (args->cbl_recall_type == RETURN_FILE) {
++		    if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
++			continue;
++		} else if (args->cbl_recall_type == RETURN_FSID) {
++			server = NFS_SERVER(&nfsi->vfs_inode);
++			if (server->fsid.major != args->cbl_fsid.major ||
++			    server->fsid.minor != args->cbl_fsid.minor)
++				continue;
++		}
++
++		/* Make sure client didn't clean up layout without
++		 * telling the server */
++		if (!has_layout(nfsi))
++			continue;
++
++		ino = igrab(&nfsi->vfs_inode);
++		dprintk("%s: Found inode=%p\n", __func__, ino);
++		break;
++	}
++	spin_unlock(&clp->cl_lock);
++	return ino;
++}
++
++struct recall_layout_threadargs {
++	struct inode *inode;
++	struct nfs_client *clp;
++	struct completion started;
++	struct cb_layoutrecallargs *rl;
++	int result;
++};
++
++static int pnfs_recall_layout(void *data)
++{
++	struct inode *inode, *ino;
++	struct nfs_client *clp;
++	struct cb_layoutrecallargs rl;
++	struct nfs4_layoutreturn *lrp;
++	struct recall_layout_threadargs *args =
++		(struct recall_layout_threadargs *)data;
++	int status = 0;
++
++	daemonize("nfsv4-layoutreturn");
++
++	dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
++		__func__, args->rl->cbl_recall_type,
++		args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
++
++	clp = args->clp;
++	inode = args->inode;
++	rl = *args->rl;
++
++	/* support whole file layouts only */
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	if (rl.cbl_recall_type == RETURN_FILE) {
++		if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
++						rl.cbl_stateid))
++			status = pnfs_return_layout(inode, &rl.cbl_seg,
++						    &rl.cbl_stateid, RETURN_FILE,
++						    false);
++		else
++			status = cpu_to_be32(NFS4ERR_DELAY);
++		if (status)
++			dprintk("%s RETURN_FILE error: %d\n", __func__, status);
++		else
++			status =  cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++		args->result = status;
++		complete(&args->started);
++		goto out;
++	}
++
++	status = cpu_to_be32(NFS4_OK);
++	args->result = status;
++	complete(&args->started);
++	args = NULL;
++
++	/* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
++	while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
++		/* FIXME: need to check status on pnfs_return_layout */
++		pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
++		iput(ino);
++	}
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (!lrp) {
++		dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
++			__func__);
++		goto out;
++	}
++
++	/* send final layoutreturn */
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = rl.cbl_layout_type;
++	lrp->args.return_type = rl.cbl_recall_type;
++	lrp->args.range = rl.cbl_seg;
++	lrp->args.inode = inode;
++	nfs4_proc_layoutreturn(lrp, true);
++
++out:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	nfs_put_client(clp);
++	module_put_and_exit(0);
++	dprintk("%s: exit status %d\n", __func__, 0);
++	return 0;
++}
++
++/*
++ * Asynchronous layout recall!
++ */
++static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
++				    struct cb_layoutrecallargs *rl)
++{
++	struct recall_layout_threadargs data = {
++		.clp = clp,
++		.inode = inode,
++		.rl = rl,
++	};
++	struct task_struct *t;
++	int status = -EAGAIN;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* FIXME: do not allow two concurrent layout recalls */
++	if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
++		return status;
++
++	init_completion(&data.started);
++	__module_get(THIS_MODULE);
++	if (!atomic_inc_not_zero(&clp->cl_count))
++		goto out_put_no_client;
++
++	t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
++	if (IS_ERR(t)) {
++		printk(KERN_INFO "NFS: Layout recall callback thread failed "
++			"for client (clientid %08x/%08x)\n",
++			(unsigned)(clp->cl_clientid >> 32),
++			(unsigned)(clp->cl_clientid));
++		status = PTR_ERR(t);
++		goto out_module_put;
++	}
++	wait_for_completion(&data.started);
++	return data.result;
++out_module_put:
++	nfs_put_client(clp);
++out_put_no_client:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	module_put(THIS_MODULE);
++	return status;
++}
++
++static int pnfs_recall_all_layouts(struct nfs_client *clp)
++{
++	struct cb_layoutrecallargs rl;
++	struct inode *inode;
++	int status = 0;
++
++	rl.cbl_recall_type = RETURN_ALL;
++	rl.cbl_seg.iomode = IOMODE_ANY;
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	/* we need the inode to get the nfs_server struct */
++	inode = nfs_layoutrecall_find_inode(clp, &rl);
++	if (!inode)
++		return status;
++	status = pnfs_async_return_layout(clp, inode, &rl);
++	iput(inode);
++
++	return status;
++}
++
++__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
++				  void *dummy)
++{
++	struct nfs_client *clp;
++	struct inode *inode = NULL;
++	__be32 res;
++	int status;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
++	clp  = nfs_find_client(args->cbl_addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->cbl_addr));
++		goto out;
++	}
++
++	res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		/* the callback must come from the MDS personality */
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
++			goto loop;
++		if (args->cbl_recall_type == RETURN_FILE) {
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (inode != NULL) {
++				status = pnfs_async_return_layout(clp, inode,
++								  args);
++				if (status)
++					res = cpu_to_be32(NFS4ERR_DELAY);
++				iput(inode);
++			}
++		} else { /* _ALL or _FSID */
++			/* we need the inode to get the nfs_server struct */
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (!inode)
++				goto loop;
++			status = pnfs_async_return_layout(clp, inode, args);
++			if (status)
++				res = cpu_to_be32(NFS4ERR_DELAY);
++			iput(inode);
++		}
++loop:
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
++/* Remove the deviceid(s) from the nfs_client deviceid cache */
++static __be32 pnfs_devicenotify_client(struct nfs_client *clp,
++				       struct cb_devicenotifyargs *args)
++{
++	uint32_t type;
++	int i;
++
++	dprintk("%s: --> clp %p\n", __func__, clp);
++
++	for (i = 0; i < args->ndevs; i++) {
++		struct cb_devicenotifyitem *dev = &args->devs[i];
++		type = dev->cbd_notify_type;
++		if (type == NOTIFY_DEVICEID4_DELETE && clp->cl_devid_cache)
++			nfs4_delete_device(clp->cl_devid_cache,
++					   &dev->cbd_dev_id);
++		else if (type == NOTIFY_DEVICEID4_CHANGE)
++			printk(KERN_ERR "%s: NOTIFY_DEVICEID4_CHANGE "
++					"not supported\n", __func__);
++	}
++	return 0;
++}
++
++__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
++				  void *dummy)
++{
++	struct nfs_client *clp;
++	__be32 res = 0;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = __constant_htonl(NFS4ERR_INVAL);
++	clp = nfs_find_client(args->addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->addr));
++		goto out;
++	}
++
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		res = pnfs_devicenotify_client(clp, args);
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
+ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+ 	if (delegation == NULL)
+ 		return 0;
+ 
+-	/* seqid is 4-bytes long */
+-	if (((u32 *) &stateid->data)[0] != 0)
++	if (stateid->u.stateid.seqid != 0)
+ 		return 0;
+-	if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
+-		   sizeof(stateid->data)-4))
++	if (memcmp(&delegation->stateid.u.stateid.other,
++		   &stateid->u.stateid.other,
++		   NFS4_STATEID_OTHER_SIZE))
+ 		return 0;
+ 
+ 	return 1;
+@@ -335,13 +669,37 @@ out:
+ 	return status;
+ }
+ 
++static inline bool
++validate_bitmap_values(const unsigned long *mask)
++{
++	int i;
++
++	if (*mask == 0)
++		return true;
++	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
++	    test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
++		return true;
++	for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	return false;
++}
++
+ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+ 	__be32 status;
+ 	fmode_t flags = 0;
+ 
+-	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
++	status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ 	clp = nfs_find_client(args->craa_addr, 4);
+ 	if (clp == NULL)
+ 		goto out;
+@@ -349,16 +707,25 @@ __be32 nfs4_callback_recallany(struct cb
+ 	dprintk("NFS: RECALL_ANY callback request from %s\n",
+ 		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ 
++	status = cpu_to_be32(NFS4ERR_INVAL);
++	if (!validate_bitmap_values((const unsigned long *)
++				    &args->craa_type_mask))
++		return status;
++
++	status = cpu_to_be32(NFS4_OK);
+ 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags = FMODE_READ;
+ 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags |= FMODE_WRITE;
++	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
++		     &args->craa_type_mask))
++		if (pnfs_recall_all_layouts(clp) == -EAGAIN)
++			status = cpu_to_be32(NFS4ERR_DELAY);
+ 
+ 	if (flags)
+ 		nfs_expire_all_delegation_types(clp, flags);
+-	status = htonl(NFS4_OK);
+ out:
+ 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ 	return status;
+diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
+--- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-31 20:42:05.510143651 -0400
+@@ -22,6 +22,8 @@
+ #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ 
+ #if defined(CONFIG_NFS_V4_1)
++#define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
++#define CB_OP_DEVICENOTIFY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+ 					4 + 1 + 3)
+ #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+@@ -136,7 +138,7 @@ static __be32 decode_stateid(struct xdr_
+ 	p = read_buf(xdr, 16);
+ 	if (unlikely(p == NULL))
+ 		return htonl(NFS4ERR_RESOURCE);
+-	memcpy(stateid->data, p, 16);
++	memcpy(stateid->u.data, p, 16);
+ 	return 0;
+ }
+ 
+@@ -220,6 +222,148 @@ out:
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
++				       struct xdr_stream *xdr,
++				       struct cb_layoutrecallargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++
++	args->cbl_addr = svc_addr(rqstp);
++	p = read_buf(xdr, 4 * sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_BADXDR);
++		goto out;
++	}
++
++	args->cbl_layout_type = ntohl(*p++);
++	args->cbl_seg.iomode = ntohl(*p++);
++	args->cbl_layoutchanged = ntohl(*p++);
++	args->cbl_recall_type = ntohl(*p++);
++
++	if (likely(args->cbl_recall_type == RETURN_FILE)) {
++		status = decode_fh(xdr, &args->cbl_fh);
++		if (unlikely(status != 0))
++			goto out;
++
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_seg.offset);
++		p = xdr_decode_hyper(p, &args->cbl_seg.length);
++		status = decode_stateid(xdr, &args->cbl_stateid);
++		if (unlikely(status != 0))
++			goto out;
++	} else if (args->cbl_recall_type == RETURN_FSID) {
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
++		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
++	}
++	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d "
++		"fsid %llx-%llx fhsize %d\n", __func__,
++		args->cbl_layout_type, args->cbl_seg.iomode,
++		args->cbl_layoutchanged, args->cbl_recall_type,
++		args->cbl_fsid.major, args->cbl_fsid.minor,
++		args->cbl_fh.size);
++out:
++	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
++	return status;
++}
++
++static
++__be32 decode_devicenotify_args(struct svc_rqst *rqstp,
++				struct xdr_stream *xdr,
++				struct cb_devicenotifyargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++	u32 tmp;
++	int n, i;
++	args->ndevs = 0;
++
++	args->addr = svc_addr(rqstp);
++
++	/* Num of device notifications */
++	p = read_buf(xdr, sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_RESOURCE);
++		goto out;
++	}
++	n = ntohl(*p++);
++	if (n <= 0)
++		goto out;
++
++	/* XXX: need to possibly return error in this case */
++	if (n > NFS4_DEV_NOTIFY_MAXENTRIES) {
++		dprintk("%s: Processing (%d) notifications out of (%d)\n",
++			__func__, NFS4_DEV_NOTIFY_MAXENTRIES, n);
++		n = NFS4_DEV_NOTIFY_MAXENTRIES;
++	}
++
++	/* Decode each dev notification */
++	for (i = 0; i < n; i++) {
++		struct cb_devicenotifyitem *dev = &args->devs[i];
++
++		p = read_buf(xdr, (4 * sizeof(uint32_t))
++			     + NFS4_PNFS_DEVICEID4_SIZE);
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_RESOURCE);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* bitmap size */
++		if (tmp != 1) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_notify_type = ntohl(*p++);
++		if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		    dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* opaque size */
++		if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 8)) ||
++		    ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 4))) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_layout_type = ntohl(*p++);
++		memcpy(dev->cbd_dev_id.data, p, NFS4_PNFS_DEVICEID4_SIZE);
++		p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++
++		if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
++			p = read_buf(xdr, sizeof(uint32_t));
++			if (unlikely(p == NULL)) {
++				status = htonl(NFS4ERR_DELAY);
++				goto out;
++			}
++			dev->cbd_immediate = ntohl(*p++);
++		} else {
++			dev->cbd_immediate = 0;
++		}
++
++		args->ndevs++;
++
++		dprintk("%s: type %d layout 0x%x immediate %d\n",
++			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
++			dev->cbd_immediate);
++	}
++out:
++	dprintk("%s: status %d ndevs %d\n",
++		__func__, ntohl(status), args->ndevs);
++	return status;
++}
++
+ static __be32 decode_sessionid(struct xdr_stream *xdr,
+ 				 struct nfs4_sessionid *sid)
+ {
+@@ -574,11 +718,11 @@ preprocess_nfs41_op(int nop, unsigned in
+ 	case OP_CB_SEQUENCE:
+ 	case OP_CB_RECALL_ANY:
+ 	case OP_CB_RECALL_SLOT:
++	case OP_CB_LAYOUTRECALL:
++	case OP_CB_NOTIFY_DEVICEID:
+ 		*op = &callback_ops[op_nr];
+ 		break;
+ 
+-	case OP_CB_LAYOUTRECALL:
+-	case OP_CB_NOTIFY_DEVICEID:
+ 	case OP_CB_NOTIFY:
+ 	case OP_CB_PUSH_DELEG:
+ 	case OP_CB_RECALLABLE_OBJ_AVAIL:
+@@ -739,6 +883,18 @@ static struct callback_op callback_ops[]
+ 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+ 	},
+ #if defined(CONFIG_NFS_V4_1)
++	[OP_CB_LAYOUTRECALL] = {
++		.process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
++		.decode_args =
++			(callback_decode_arg_t)decode_layoutrecall_args,
++		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
++	},
++	[OP_CB_NOTIFY_DEVICEID] = {
++		.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
++		.decode_args =
++			(callback_decode_arg_t)decode_devicenotify_args,
++		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
++	},
+ 	[OP_CB_SEQUENCE] = {
+ 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
+ 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-31 20:41:19.144140225 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-31 20:42:05.511222861 -0400
+@@ -39,6 +39,7 @@
+ #include <net/ipv6.h>
+ #include <linux/nfs_xdr.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -48,6 +49,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_CLIENT
+ 
+@@ -150,11 +152,14 @@ static struct nfs_client *nfs_alloc_clie
+ 	clp->cl_boot_time = CURRENT_TIME;
+ 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+ 	clp->cl_minorversion = cl_init->minorversion;
++	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ #endif
+ 	cred = rpc_lookup_machine_cred();
+ 	if (!IS_ERR(cred))
+ 		clp->cl_machine_cred = cred;
+-
++#if defined(CONFIG_NFS_V4_1)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++#endif
+ 	nfs_fscache_get_client_cookie(clp);
+ 
+ 	return clp;
+@@ -178,7 +183,7 @@ static void nfs4_clear_client_minor_vers
+ 		clp->cl_session = NULL;
+ 	}
+ 
+-	clp->cl_call_sync = _nfs4_call_sync;
++	clp->cl_mvops = nfs_v4_minor_ops[0];
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
+@@ -188,7 +193,7 @@ static void nfs4_clear_client_minor_vers
+ static void nfs4_destroy_callback(struct nfs_client *clp)
+ {
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+-		nfs_callback_down(clp->cl_minorversion);
++		nfs_callback_down(clp->cl_mvops->minor_version);
+ }
+ 
+ static void nfs4_shutdown_client(struct nfs_client *clp)
+@@ -251,6 +256,7 @@ void nfs_put_client(struct nfs_client *c
+ 		nfs_free_client(clp);
+ 	}
+ }
++EXPORT_SYMBOL(nfs_put_client);
+ 
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ /*
+@@ -343,7 +349,7 @@ static int nfs_sockaddr_match_ipaddr(con
+  * Test if two socket addresses represent the same actual socket,
+  * by comparing (only) relevant fields, including the port number.
+  */
+-static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ 			    const struct sockaddr *sa2)
+ {
+ 	if (sa1->sa_family != sa2->sa_family)
+@@ -357,6 +363,7 @@ static int nfs_sockaddr_cmp(const struct
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs_sockaddr_cmp);
+ 
+ /*
+  * Find a client by IP address and protocol version
+@@ -548,6 +555,7 @@ int nfs4_check_client_ready(struct nfs_c
+ 		return -EPROTONOSUPPORT;
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs4_check_client_ready);
+ 
+ /*
+  * Initialise the timeout values for a connection
+@@ -865,9 +873,34 @@ error:
+ }
+ 
+ /*
++ * Initialize the pNFS layout driver and setup pNFS related parameters
++ */
++static void nfs4_init_pnfs(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
++{
++#if defined(CONFIG_NFS_V4_1)
++	struct nfs_client *clp = server->nfs_client;
++
++	if (nfs4_has_session(clp) &&
++	    (clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
++		server->pnfs_blksize = fsinfo->blksize;
++		set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype);
++		pnfs_set_ds_iosize(server);
++	}
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++static void nfs4_uninit_pnfs(struct nfs_server *server)
++{
++#if defined(CONFIG_NFS_V4_1)
++	if (server->nfs_client && nfs4_has_session(server->nfs_client))
++		unmount_pnfs_layoutdriver(server);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++/*
+  * Load up the server record from information gained in an fsinfo record
+  */
+-static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
++static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
+ {
+ 	unsigned long max_rpc_payload;
+ 
+@@ -897,6 +930,8 @@ static void nfs_server_set_fsinfo(struct
+ 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ 		server->wsize = NFS_MAX_FILE_IO_SIZE;
+ 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++	nfs4_init_pnfs(server, mntfh, fsinfo);
++
+ 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+ 
+ 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+@@ -938,7 +973,7 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	if (error < 0)
+ 		goto out_error;
+ 
+-	nfs_server_set_fsinfo(server, &fsinfo);
++	nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ 
+ 	/* Get some general file system info */
+ 	if (server->namelen == 0) {
+@@ -1016,6 +1051,7 @@ void nfs_free_server(struct nfs_server *
+ {
+ 	dprintk("--> nfs_free_server()\n");
+ 
++	nfs4_uninit_pnfs(server);
+ 	spin_lock(&nfs_client_lock);
+ 	list_del(&server->client_link);
+ 	list_del(&server->master_link);
+@@ -1126,7 +1162,7 @@ static int nfs4_init_callback(struct nfs
+ 				return error;
+ 		}
+ 
+-		error = nfs_callback_up(clp->cl_minorversion,
++		error = nfs_callback_up(clp->cl_mvops->minor_version,
+ 					clp->cl_rpcclient->cl_xprt);
+ 		if (error < 0) {
+ 			dprintk("%s: failed to start callback. Error = %d\n",
+@@ -1143,10 +1179,8 @@ static int nfs4_init_callback(struct nfs
+  */
+ static int nfs4_init_client_minor_version(struct nfs_client *clp)
+ {
+-	clp->cl_call_sync = _nfs4_call_sync;
+-
+ #if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion) {
++	if (clp->cl_mvops->minor_version) {
+ 		struct nfs4_session *session = NULL;
+ 		/*
+ 		 * Create the session and mark it expired.
+@@ -1158,7 +1192,13 @@ static int nfs4_init_client_minor_versio
+ 			return -ENOMEM;
+ 
+ 		clp->cl_session = session;
+-		clp->cl_call_sync = _nfs4_call_sync_session;
++		/*
++		 * The create session reply races with the server back
++		 * channel probe. Mark the client NFS_CS_SESSION_INITING
++		 * so that the client back channel can find the
++		 * nfs_client struct
++		 */
++		clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+@@ -1216,7 +1256,7 @@ error:
+ /*
+  * Set up an NFS4 client
+  */
+-static int nfs4_set_client(struct nfs_server *server,
++int nfs4_set_client(struct nfs_server *server,
+ 		const char *hostname,
+ 		const struct sockaddr *addr,
+ 		const size_t addrlen,
+@@ -1259,6 +1299,7 @@ error:
+ 	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ 	return error;
+ }
++EXPORT_SYMBOL(nfs4_set_client);
+ 
+ 
+ /*
+@@ -1448,7 +1489,7 @@ struct nfs_server *nfs4_create_referral_
+ 				data->authflavor,
+ 				parent_server->client->cl_xprt->prot,
+ 				parent_server->client->cl_timeout,
+-				parent_client->cl_minorversion);
++				parent_client->cl_mvops->minor_version);
+ 	if (error < 0)
+ 		goto error;
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-31 20:42:05.550110844 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-31 20:42:05.550110844 -0400
+@@ -0,0 +1,292 @@
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/sched.h>
++#include <linux/exportfs.h>
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/workqueue.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/nfs_fs.h>
++
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd4_block.h>
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++static ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++    char __user *, size_t);
++static ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
++static void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops bl_upcall_ops = {
++	.upcall		= bl_pipe_upcall,
++	.downcall	= bl_pipe_downcall,
++	.destroy_msg	= bl_pipe_destroy_msg,
++};
++
++bl_comm_t	*bl_comm_global;
++
++int
++nfsd_bl_start(void)
++{
++	bl_comm_t	*bl_comm = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	dprintk("%s: starting pipe\n", __func__);
++	if (bl_comm_global)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	bl_comm = kzalloc(sizeof (*bl_comm), GFP_KERNEL);
++	if (!bl_comm) {
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	/* FIXME: rename to "spnfs_block" */
++	bl_comm->pipe_dentry = rpc_mkpipe(nd.path.dentry, "pnfs_block", bl_comm,
++					 &bl_upcall_ops, 0);
++	if (IS_ERR(bl_comm->pipe_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++	mutex_init(&bl_comm->lock);
++	mutex_init(&bl_comm->pipe_lock);
++	init_waitqueue_head(&bl_comm->pipe_wq);
++
++	bl_comm_global = bl_comm;
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(bl_comm);
++	return rc;
++}
++
++void
++nfsd_bl_stop(void)
++{
++	bl_comm_t	*c = bl_comm_global;
++
++	dprintk("%s: stopping pipe\n", __func__);
++	if (!c)
++		return;
++	rpc_unlink(c->pipe_dentry);
++	rpc_put_mount();
++	bl_comm_global = NULL;
++	kfree(c);
++}
++
++static ssize_t
++bl_pipe_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *dst,
++    size_t buflen)
++{
++	char	*data	= (char *)msg->data + msg->copied;
++	ssize_t	mlen	= msg->len - msg->copied,
++		left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen		-= left;
++	msg->copied	+= mlen;
++	msg->errno	= 0;
++
++	return mlen;
++}
++
++static ssize_t
++bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode	*rpci	= RPC_I(filp->f_dentry->d_inode);
++	bl_comm_t		*bc	= (bl_comm_t *)rpci->private;
++	bl_comm_msg_t		*im	= &bc->msg;
++	int			ret;
++	bl_comm_res_t		*res;
++	
++
++	if (mlen == 0) {
++		im->msg_status = PNFS_BLOCK_FAILURE;
++		im->msg_res = NULL;
++		wake_up(&bc->pipe_wq);
++		return -EFAULT;
++	}
++	
++	if ((res = kmalloc(mlen, GFP_KERNEL)) == NULL)
++		return -ENOMEM;
++	
++	if (copy_from_user(res, src, mlen)) {
++		kfree(res);
++		return -EFAULT;
++	}
++	
++	mutex_lock(&bc->pipe_lock);
++	
++	ret		= mlen;
++	im->msg_status	= res->res_status;
++	im->msg_res	= res;
++	
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++	return ret;
++}
++
++static void
++bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	bl_comm_msg_t	*im = msg->data;
++	bl_comm_t	*bc = container_of(im, struct bl_comm, msg);
++	
++	if (msg->errno >= 0)
++		return;
++
++	mutex_lock(&bc->pipe_lock);
++	im->msg_status = PNFS_BLOCK_FAILURE;
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++}
++
++int
++bl_upcall(bl_comm_t *bc, bl_comm_msg_t *upmsg, bl_comm_res_t **res)
++{
++	struct rpc_pipe_msg	msg;
++	DECLARE_WAITQUEUE(wq, current);
++	int			rval	= 1;
++	bl_comm_msg_t		*m	= &bc->msg;
++	
++	if (bc == NULL) {
++		dprintk("%s: No pNFS block daemon available\n", __func__);
++		return 1;
++	}
++	
++	mutex_lock(&bc->lock);
++	mutex_lock(&bc->pipe_lock);
++	
++	memcpy(m, upmsg, sizeof (*m));
++	
++	memset(&msg, 0, sizeof (msg));
++	msg.data = m;
++	msg.len = sizeof (*m);
++	
++	add_wait_queue(&bc->pipe_wq, &wq);
++	rval = rpc_queue_upcall(bc->pipe_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&bc->pipe_wq, &wq);
++		goto out;
++	}
++	
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&bc->pipe_lock);
++	schedule();
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(&bc->pipe_wq, &wq);
++	mutex_lock(&bc->pipe_lock);
++	
++	if (m->msg_status == PNFS_BLOCK_SUCCESS) {
++		*res = m->msg_res;
++		rval = 0;
++	} else
++		rval = 1;
++	
++out:
++	mutex_unlock(&bc->pipe_lock);
++	mutex_unlock(&bc->lock);
++	return rval;
++}
++
++static ssize_t ctl_write(struct file *file, const char __user *buf, size_t len,
++    loff_t *offset)
++{
++	int		cmd,
++			rc;
++	bl_comm_t	*bc	= bl_comm_global;
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof (int)))
++		return -EFAULT;
++	switch (cmd) {
++	case PNFS_BLOCK_CTL_STOP:
++		msg.msg_type = PNFS_UPCALL_MSG_STOP;
++		(void) bl_upcall(bc, &msg, &res);
++		kfree(res);
++		nfsd_bl_stop();
++		break;
++		
++	case PNFS_BLOCK_CTL_START:
++		rc = nfsd_bl_start();
++		if (rc != 0)
++			return rc;
++		break;
++		
++	case PNFS_BLOCK_CTL_VERS:
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bc, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++			    __func__);
++			return 0;
++		}
++		kfree(res);
++		break;
++		
++	default:
++		dprintk("%s: unknown ctl command %d\n", __func__, cmd);
++		break;
++	}
++	return len;
++}
++
++static struct file_operations ctl_ops = {
++	.write	= ctl_write,
++};
++
++/*
++ * bl_init_proc -- set up proc interfaces
++ *
++ * Creating a pnfs_block directory isn't really required at this point
++ * since we've only got a single node in that directory. If the need for
++ * more nodes doesn't present itself shortly this code should revert
++ * to a single top level node. McNeal 11-Aug-2008.
++ */
++int
++bl_init_proc(void)
++{
++	struct proc_dir_entry *e;
++
++	e = proc_mkdir("fs/pnfs_block", NULL);
++	if (!e)
++		return -ENOMEM;
++
++	e = create_proc_entry("fs/pnfs_block/ctl", 0, NULL);
++	if (!e)
++		return -ENOMEM;
++	e->proc_fops = &ctl_ops;
++
++	return 0;
++}
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-31 20:42:05.551222888 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-31 20:42:05.551222888 -0400
+@@ -0,0 +1,1672 @@
++/*
++ *  bl_ops.c
++ *  spNFS
++ *
++ *  Created by Rick McNeal on 4/1/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++
++/*
++ * Block layout operations.
++ *
++ * These functions, with the exception of pnfs_block_enabled, are assigned to
++ * the super block s_export_op structure.
++ */
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/genhd.h>
++#include <linux/fs.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd/export.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/spinlock_types.h>
++#include <linux/dm-ioctl.h>
++#include <asm/uaccess.h>
++#include <linux/falloc.h>
++#include <linux/nfsd4_block.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY	NFSDDBG_PNFS
++
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++
++#define BL_LAYOUT_HASH_BITS	4
++#define BL_LAYOUT_HASH_SIZE	(1 << BL_LAYOUT_HASH_BITS)
++#define BL_LAYOUT_HASH_MASK	(BL_LAYOUT_HASH_SIZE - 1)
++#define BL_LIST_REQ	(sizeof (struct dm_ioctl) + 256)
++
++#define bl_layout_hashval(id) \
++	((id) & BL_LAYOUT_HASH_MASK)
++
++#define BLL_F_END(p) ((p)->bll_foff + (p)->bll_len)
++#define BLL_S_END(p) ((p)->bll_soff + (p)->bll_len)
++#define _2SECTS(v) ((v) >> 9)
++
++#ifndef READ32
++#define READ32(x)	(x) = ntohl(*p++)
++#define READ64(x)	do {			\
++(x) = (u64)ntohl(*p++) << 32;	\
++(x) |= ntohl(*p++);		\
++} while (0)
++#endif
++
++
++typedef enum {True, False} boolean_t;
++/* ---- block layoutget and commit structure ---- */
++typedef struct bl_layout_rec {
++	struct list_head	blr_hash,
++				blr_layouts;
++	dev_t			blr_rdev;
++	struct inode		*blr_inode;
++	int			blr_recalled;	// debug
++	u64			blr_orig_size,
++				blr_commit_size,
++				blr_ext_size;
++	spinlock_t		blr_lock;	// Protects blr_layouts
++} bl_layout_rec_t;
++
++static struct list_head layout_hash;
++static struct list_head layout_hashtbl[BL_LAYOUT_HASH_SIZE];
++static spinlock_t layout_hashtbl_lock;
++
++/* ---- prototypes ---- */
++static boolean_t device_slice(dev_t devid);
++static boolean_t device_dm(dev_t devid);
++static boolean_t layout_inode_add(struct inode *i, bl_layout_rec_t **);
++static bl_layout_rec_t *layout_inode_find(struct inode *i);
++static void layout_inode_del(struct inode *i);
++static char *map_state2name(enum pnfs_block_extent_state4 s);
++static pnfs_blocklayout_devinfo_t *bld_alloc(struct list_head *volume, int type);
++static void bld_free(pnfs_blocklayout_devinfo_t *bld);
++static pnfs_blocklayout_devinfo_t *bld_simple(struct list_head *volumes,
++    dev_t devid, int local_index);
++static pnfs_blocklayout_devinfo_t *bld_slice(struct list_head *volumes,
++    dev_t devid, int my_loc, int idx);
++static int layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg);
++struct list_head *layout_cache_iter(bl_layout_rec_t *r,
++    struct list_head *bl_possible, struct nfsd4_layout_seg *seg);
++static void layout_cache_merge(bl_layout_rec_t *r, struct list_head *h);
++static int layout_cache_update(bl_layout_rec_t *r, struct list_head *h);
++static void layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg);
++static void print_bll(pnfs_blocklayout_layout_t *b, char *);
++static inline boolean_t layout_cache_fill_from_list(bl_layout_rec_t *r,
++    struct list_head *h, struct nfsd4_layout_seg *seg);
++static inline void bll_collapse(bl_layout_rec_t *r,
++    pnfs_blocklayout_layout_t *c);
++static pnfs_blocklayout_layout_t *bll_alloc(u64 offset, u64 len,
++    enum bl_cache_state state, struct list_head *h);
++static pnfs_blocklayout_layout_t *bll_alloc_dup(pnfs_blocklayout_layout_t *b,
++    enum bl_cache_state c, struct list_head *h);
++static inline boolean_t layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++    enum pnfs_block_extent_state4 *s);
++static void extents_setup(struct fiemap_extent_info *fei);
++static void extents_count(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_get(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_process(struct fiemap_extent_info *fei,
++    struct list_head *bl_candidates, struct nfsd4_layout_seg *, dev_t dev,
++    pnfs_blocklayout_layout_t *b);
++static void extents_cleanup(struct fiemap_extent_info *fei);
++
++void
++nfsd_bl_init(void)
++{
++	int	i;
++	dprintk("%s loaded\n", __func__);
++
++	spin_lock_init(&layout_hashtbl_lock);
++	INIT_LIST_HEAD(&layout_hash);
++	for (i = 0; i < BL_LAYOUT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&layout_hashtbl[i]);
++	bl_init_proc();
++}
++
++/*
++ * pnfs_block_enabled -- check to see if this file system should be export as
++ * block pnfs
++ */
++int
++pnfs_block_enabled(struct inode *inode, int ex_flags)
++{
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res	= NULL;
++	static int bl_comm_once	= 0;
++	
++	dprintk("--> %s\n", __func__);
++	/*
++	 * FIXME: Figure out method to determine if this file system should
++	 * be exported. The following areas need to be checked.
++	 * (1) Validate that this file system was exported as a pNFS
++	 *     block-layout
++	 * (2) Has there been successful communication with the
++	 *     volume daemon?
++	 */
++	/* Check #1 */
++#ifdef notyet
++	if (!(ex_flags & NFSEXP_PNFS_BLOCK)) {
++		dprintk("%s: pnfs_block not set in export\n", __func__);
++		return 0;
++	}
++#endif
++	
++	/* Check #1 */
++	if (!bl_comm_once) {
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bl_comm_global, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++				__func__);
++			return 0;
++		}
++		if (msg.u.msg_vers != res->u.vers) {
++			dprintk("%s: vers mismatch, kernel != daemon\n",
++				__func__);
++			kfree(res);
++			return 0;
++		}
++	}
++	bl_comm_once = 1;
++
++	kfree(res);
++	
++	dprintk("<-- %s okay\n", __func__);
++	return 1;
++}
++
++int
++bl_layout_type(struct super_block *sb)
++{
++	return LAYOUT_BLOCK_VOLUME;
++}
++
++int
++bl_getdeviceiter(struct super_block *sb,
++		 u32 layout_type,
++		 struct nfsd4_pnfs_dev_iter_res *res)
++{
++	res->gd_eof = 1;	
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_devid	= sb->s_dev;
++	res->gd_verf	= 1;
++	res->gd_cookie	= 1;
++	return 0;
++}
++
++static int
++bl_getdeviceinfo_slice(struct super_block *sb, struct exp_xdr_stream *xdr,
++		       const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld_slice_p,
++					*bld_simple_p,
++					*bld;
++	int				status		= -EIO,
++					location	= 0;
++	struct list_head		volumes;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++
++	bld_simple_p = bld_simple(&volumes, devid->devid,
++				  location++);
++	if (!bld_simple_p)
++		goto out;
++	bld_slice_p = bld_slice(&volumes, devid->devid, location++,
++	    bld_simple_p->bld_index_loc);
++
++	if (!bld_slice_p)
++		goto out;
++	
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		if (bld->bld_type == PNFS_BLOCK_VOLUME_SIMPLE)
++			kfree(bld->u.simple.bld_sig);
++		bld_free(bld);
++	}
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++static int
++bl_getdeviceinfo_dm(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld		= NULL;
++	int				status		= -EIO,	// default to error
++					i,
++					location	= 0;
++	struct list_head		volumes;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++	
++	msg.msg_type = PNFS_UPCALL_MSG_DMGET;
++	msg.u.msg_dev = devid->devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: upcall for DMGET failed\n", __func__);
++		goto out;
++	}
++		
++	/*
++	 * Don't use bld_alloc() here. If used this will be the first volume
++	 * type added to the list whereas the protocol requires it to be the
++	 * last.
++	 */
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		goto out;
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type			= PNFS_BLOCK_VOLUME_STRIPE;
++	bld->u.stripe.bld_stripes	= res->u.stripe.num_stripes;
++	bld->u.stripe.bld_chunk_size	= res->u.stripe.stripe_size * 512LL;
++	dprintk("%s: stripes %d, chunk_size %Lu\n", __func__,
++	    bld->u.stripe.bld_stripes, bld->u.stripe.bld_chunk_size / 512LL);
++	
++	bld->u.stripe.bld_stripe_indexs = kmalloc(bld->u.stripe.bld_stripes *
++						  sizeof (int), GFP_KERNEL);
++	if (!bld->u.stripe.bld_stripe_indexs)
++		goto out;
++
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++) {
++		dev_t			dev;
++		pnfs_blocklayout_devinfo_t	*bldp;
++		
++		dev = MKDEV(res->u.stripe.devs[i].major,
++			    res->u.stripe.devs[i].minor);
++		if (dev == 0)
++			goto out;
++		
++		bldp = bld_simple(&volumes, dev, location++);
++		if (!bldp) {
++			dprintk("%s: bld_simple failed\n", __func__);
++			goto out;
++		}
++		bldp = bld_slice(&volumes, dev, location++, bldp->bld_index_loc);
++
++		if (!bldp) {
++			dprintk("%s: bld_slice failed\n", __func__);
++			goto out;
++		}
++		bld->u.stripe.bld_stripe_indexs[i] = bldp->bld_index_loc;
++
++	}
++	list_add_tail(&bld->bld_list, &volumes);
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++	
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SLICE:
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				// No memory to release for these
++				break;
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				kfree(bld->u.simple.bld_sig);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				kfree(bld->u.stripe.bld_stripe_indexs);
++				break;
++		}
++		bld_free(bld);
++	}
++	kfree(res);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++/*
++ * bl_getdeviceinfo -- determine device tree for requested devid
++ */
++int
++bl_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		 u32 layout_type,
++		 const struct nfsd4_pnfs_deviceid *devid)
++{
++	if (device_slice(devid->devid) == True)
++		return bl_getdeviceinfo_slice(sb, xdr, devid);
++	else if (device_dm(devid->devid) == True)
++		return bl_getdeviceinfo_dm(sb, xdr, devid);
++	return -EINVAL;
++}
++
++enum nfsstat4
++bl_layoutget(struct inode *i, struct exp_xdr_stream *xdr,
++	     const struct nfsd4_pnfs_layoutget_arg *arg,
++	     struct nfsd4_pnfs_layoutget_res *res)
++{
++	pnfs_blocklayout_layout_t	*b;
++	bl_layout_rec_t			*r;
++	struct list_head		bl_possible,
++					*bl_candidates	= NULL;
++	boolean_t			del_on_error	= False;
++	int				adj;
++	enum nfsstat4			nfserr		= NFS4_OK;
++	
++	dprintk("--> %s (inode=[0x%x:%lu], offset=%Lu, len=%Lu, iomode=%d)\n",
++	    __func__, i->i_sb->s_dev, i->i_ino, _2SECTS(res->lg_seg.offset),
++	    _2SECTS(res->lg_seg.length), res->lg_seg.iomode);
++
++	if (res->lg_seg.length == 0) {
++		printk("%s: request length of 0, error condition\n", __func__);
++		return NFS4ERR_BADLAYOUT;
++	}
++	
++	/*
++	 * Adjust the length as required per spec.
++	 * - First case is were the length is set to (u64)-1. Cheap means to
++	 *   define the end of the file.
++	 * - Second case is were the I/O mode is read-only, but the request is
++	 *   past the end of the file so the request needs to be trimed.
++	 */
++	if ((res->lg_seg.length == NFS4_MAX_UINT64) ||
++	    (((res->lg_seg.offset + res->lg_seg.length) > i->i_size) &&
++	     (res->lg_seg.iomode == IOMODE_READ)))
++		res->lg_seg.length = i->i_size - res->lg_seg.offset;
++	
++	adj = (res->lg_seg.offset & 511) ? res->lg_seg.offset & 511 : 0;
++	res->lg_seg.offset -= adj;
++	res->lg_seg.length = (res->lg_seg.length + adj + 511) & ~511;
++	
++	if (res->lg_seg.iomode != IOMODE_READ)
++		if (i->i_op->fallocate(i, FALLOC_FL_KEEP_SIZE,
++				       res->lg_seg.offset, res->lg_seg.length))
++			return NFS4ERR_IO;
++		
++	INIT_LIST_HEAD(&bl_possible);
++	
++	if ((r = layout_inode_find(i)) == NULL) {
++		if (layout_inode_add(i, &r) == False) {
++			printk("%s: layout_inode_add failed\n", __func__);
++			return NFS4ERR_IO;
++		}
++		del_on_error = True;
++	}
++	BUG_ON(!r);
++	
++	spin_lock(&r->blr_lock);
++	
++	if (layout_cache_fill_from(r, &bl_possible, &res->lg_seg)) {
++		/*
++		 * This will send LAYOUTTRYAGAIN error to the client.
++		 */
++		dprintk("%s: layout_cache_fill_from() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	res->lg_return_on_close	= 1;
++	res->lg_seg.length	= 0;
++	
++	bl_candidates = layout_cache_iter(r, &bl_possible, &res->lg_seg);
++	if (!bl_candidates) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	layout_cache_merge(r, bl_candidates);
++	if (layout_cache_update(r, bl_candidates)) {
++		/* ---- Failed to allocate memory. ---- */
++		dprintk("%s: layout_cache_update() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	nfserr = blocklayout_encode_layout(xdr, bl_candidates);
++	if (nfserr)
++		dprintk("%s: layoutget xdr routine failed\n", __func__);
++	
++layoutget_cleanup:
++	if (bl_candidates) {
++		while (!list_empty(bl_candidates)) {
++			b = list_entry(bl_candidates->next,
++			    struct pnfs_blocklayout_layout, bll_list);
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++	}
++
++	spin_unlock(&r->blr_lock);
++	if (unlikely(nfserr)) {
++		if (del_on_error == True)
++			layout_inode_del(i);
++		res->lg_seg.length = 0;
++		res->lg_seg.offset = 0;
++	}
++	
++	dprintk("<-- %s (rval %u)\n", __func__, nfserr);
++	return nfserr;
++}
++
++/*
++ * bl_layoutcommit -- commit changes, especially size, to file systemj
++ *
++ * Currently this routine isn't called and everything is handled within
++ * nfsd4_layoutcommit(). By not calling this routine the server doesn't
++ * handle a partial return, a set of extents, of the layout. The extents
++ * are decoded here, but nothing is done with them. If this routine is
++ * be called the interface must change to pass the 'dentry' pointer such
++ * that notify_change() can be called.
++ */
++int
++bl_layoutcommit(struct inode *i,
++		const struct nfsd4_pnfs_layoutcommit_arg *args,
++		struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	bl_layout_rec_t			*r;
++	int				status	= 0;
++	u64				lw_plus;
++	
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	r = layout_inode_find(i);
++	if (r) {
++		lw_plus = args->lc_last_wr + 1;
++		if (args->lc_newoffset) {
++			dprintk("  lc_last_wr %Lu\n", lw_plus);
++			if (r->blr_orig_size < lw_plus) {
++				r->blr_orig_size	= lw_plus;
++				res->lc_size_chg	= 1;
++				res->lc_newsize		= lw_plus;
++			}
++		}
++
++		if (args->lc_up_len) {
++			int	extents,
++				i;
++			struct pnfs_blocklayout_layout *b;
++			__be32 *p = args->lc_up_layout;
++			
++			/*
++			 * Client is returning a set of extents which
++			 * should/could be used to update the file system.
++			 * See section 2.3.2 in draft-ietf-nfsv4-pnfs-block-08
++			 */
++			READ32(extents);
++			dprintk("  Client returning %d extents: data size %d\n",
++			    extents, args->lc_up_len);
++			b = kmalloc(sizeof (struct pnfs_blocklayout_layout) *
++				    extents, GFP_KERNEL);
++			if (b) {
++				for (i = 0; i < extents; i++) {
++					READ64(b[i].bll_vol_id.sbid);
++					READ64(b[i].bll_vol_id.devid);
++					READ64(b[i].bll_foff);
++					READ64(b[i].bll_len);
++					READ64(b[i].bll_soff);
++					READ32(b[i].bll_es);
++					dprintk("  %d: foff %Lu, len %Lu, soff %Lu "
++					    "state %s\n",
++					    i, _2SECTS(b[i].bll_foff),
++					    _2SECTS(b[i].bll_len),
++					    _2SECTS(b[i].bll_soff),
++					    map_state2name(b[i].bll_es));
++				}
++				kfree(b);
++			} else {
++				status = -ENOMEM;
++			}
++		}
++	} else
++		dprintk("%s: Unexpected commit to inode %p\n", __func__, i);
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutreturn(struct inode *i,
++		const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	int				status	= 0;
++	bl_layout_rec_t			*r;
++
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	
++	r = layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		layout_cache_del(r, &args->lr_seg);
++		spin_unlock(&r->blr_lock);
++		dprintk("    ext_size %Lu, i_size %Lu, orig_size %Lu\n",
++		    r->blr_ext_size, i->i_size, r->blr_orig_size);
++	}
++
++	layout_inode_del(i);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block		*sb;
++	struct nfsd4_pnfs_cb_layout	lr;
++	bl_layout_rec_t			*r;
++	pnfs_blocklayout_layout_t	*b;
++	u64				adj;
++	
++	dprintk("--> %s\n", __func__);
++	BUG_ON(!len);
++	switch (type) {
++		case RETURN_FILE:
++			sb = inode->i_sb;
++			dprintk("  recalling layout [0x%x:%lu], %Lu:%Lu\n",
++			    inode->i_sb->s_dev, inode->i_ino,
++				_2SECTS(offset), _2SECTS(len));
++			break;
++		case RETURN_FSID:
++			sb = inode->i_sb;
++			dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++				__func__);
++			return 0;
++		case RETURN_ALL:
++			/*
++			 * XXX figure out how to get a sb since there's no
++			 * inode ptr
++			 */
++			dprintk("%s: recalling all layouts (unimplemented)\n",
++				__func__);
++			return 0;
++		default:
++			return -EINVAL;
++	}
++	
++restart:
++	r = layout_inode_find(inode);
++	if (r && len && !r->blr_recalled) {
++		spin_lock(&r->blr_lock);
++		list_for_each_entry(b, &r->blr_layouts, bll_list) {
++			if (!r->blr_recalled && !b->bll_recalled &&
++			    (offset >= b->bll_foff) && (offset < BLL_F_END(b))) {
++				b->bll_recalled		= 1;
++				lr.cbl_recall_type	= type;
++				lr.cbl_seg.layout_type	= LAYOUT_BLOCK_VOLUME;
++				lr.cbl_seg.clientid	= 0;
++				lr.cbl_seg.offset	= 0;
++				lr.cbl_seg.length	= NFS4_MAX_UINT64;
++				r->blr_recalled		= 1;
++				dprintk("  FULL LAYOUTRECALL\n");
++				lr.cbl_seg.iomode = IOMODE_ANY;
++
++				/*
++				 * Currently there are only two cases where the
++				 * layout is being returned.
++				 *    (1) Someone is issuing a NFS_WRITE operation
++				 *        to this layout.
++				 *    (2) The file has been truncated which means
++				 *        the layout is immediately made invalid.
++				 * In both cases the client must write any
++				 * uncommitted modifications to the server via
++				 * NFS_WRITE.
++				 */
++				lr.cbl_layoutchanged = 1;
++
++				/*
++				 * Need to drop the lock because we'll get a
++				 * layoutreturn which will block waiting for
++				 * the lock. The request will come in on the
++				 * same thread which will cause a deadlock.
++				 */
++				spin_unlock(&r->blr_lock);
++				nfsd_layout_recall_cb(sb, inode, &lr);
++				adj = MIN(b->bll_len - (offset - b->bll_foff),
++				    len);
++				offset += adj;
++				len -= adj;
++				if (!len) {
++					spin_lock(&r->blr_lock);
++					break;
++				}
++				/*
++				 * Since layoutreturn will have been called we
++				 * can't assume blr_layouts is still valid,
++				 * so restart.
++				 */
++				goto restart;
++			}
++		}
++		spin_unlock(&r->blr_lock);
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++/*
++ * []------------------------------------------------------------------[]
++ * | Support functions from here on down.				|
++ * []------------------------------------------------------------------[]
++ */
++
++/*
++ * bld_simple -- given a dev_t build a simple volume structure
++ *
++ * Simple volume contains the device signature and offset to that data in
++ * the storage volume.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_simple(struct list_head *volumes, dev_t devid, int local_index)
++{
++	pnfs_blocklayout_devinfo_t	*bld	= NULL;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res	= NULL;
++	
++	msg.msg_type = PNFS_UPCALL_MSG_GETSIG;
++	msg.u.msg_dev = devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: Failed to get signature information\n", __func__);
++		goto error;
++	}
++	
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SIMPLE);
++	if (!bld)
++		return NULL;
++	
++	bld->u.simple.bld_offset = (res->u.sig.sector * 512LL) + res->u.sig.offset;
++	bld->u.simple.bld_sig_len = res->u.sig.len;
++	bld->u.simple.bld_sig = kmalloc(res->u.sig.len, GFP_KERNEL);
++	if (!bld->u.simple.bld_sig)
++		goto error;
++	
++	memcpy(bld->u.simple.bld_sig, res->u.sig.sig, res->u.sig.len);
++	kfree(res);
++	return bld;
++	
++error:
++	if (bld)
++		bld_free(bld);
++	if (res)
++		kfree(res);
++	dprintk("%s: error in bld_simple\n", __func__);
++	return NULL;
++}
++
++/*
++ * bld_slice -- given a dev_t build a slice volume structure
++ *
++ * A slice volume contains the length of the slice/partition and its offset
++ * from the beginning of the storage volume. There's also a reference to
++ * the "simple" volume which contains this slice.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_slice(struct list_head *volumes, dev_t devid, int my_loc, int simple_loc)
++{
++	pnfs_blocklayout_devinfo_t	*bld;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SLICE);
++	if (!bld)
++		return NULL;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_GETSLICE;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Upcall to get slice info failed\n");
++		bld_free(bld);
++		return NULL;
++	}
++	
++	bld->bld_devid.devid = devid;
++	bld->bld_index_loc	= my_loc;
++	bld->u.slice.bld_start	= res->u.slice.start * 512LL;
++	bld->u.slice.bld_len	= res->u.slice.length * 512LL;
++	bld->u.slice.bld_index	= simple_loc;
++
++	dprintk("%s: start %Lu, len %Lu\n", __func__,
++		bld->u.slice.bld_start / 512LL, bld->u.slice.bld_len / 512LL);
++
++	kfree(res);
++	dprintk("<-- %s (rval %p)\n", __func__, bld);
++	return bld;
++}
++
++static int
++layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	dprintk("--> %s\n", __func__);
++	
++	if (!list_empty(&r->blr_layouts))
++		if (layout_cache_fill_from_list(r, h, seg) == False)
++			return -EIO;
++	
++	/*
++	 * This deals with two conditions.
++	 *    (1) When blr_layouts is empty we need to create the first entry
++	 *    (2) When the range requested falls past the end of any current
++	 *        layout the residual must be taken care of.
++	 */	
++	if (seg->length) {
++		n = bll_alloc(seg->offset, seg->length, BLOCK_LAYOUT_NEW, h);
++		if (!n)
++			return -ENOMEM;
++		dprintk("  remaining at %Lu, len %Lu\n", _2SECTS(n->bll_foff),
++			_2SECTS(n->bll_len));
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++struct list_head *
++layout_cache_iter(bl_layout_rec_t *r, struct list_head *bl_possible,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n		= NULL;
++	struct list_head		*bl_candidates	= NULL;
++	struct fiemap_extent_info	fei;
++	struct inode			*i;
++	dev_t				dev;
++	
++	dev	= r->blr_rdev;
++	i	= r->blr_inode;
++	
++	dprintk("--> %s\n", __func__);
++	bl_candidates = kmalloc(sizeof (*bl_candidates), GFP_KERNEL);
++	if (!bl_candidates)
++		return NULL;
++	INIT_LIST_HEAD(bl_candidates);
++	extents_setup(&fei);
++	
++	list_for_each_entry(b, bl_possible, bll_list) {
++		if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			extents_count(&fei, i, b->bll_foff, b->bll_len);
++			if (fei.fi_extents_mapped) {
++				
++				/*
++				 * Common case here. Got a range which has
++				 * extents. Now get those extents and process
++				 * them into pNFS extents.
++				 */
++				if (extents_get(&fei, i, b->bll_foff,
++				    b->bll_len) == False)
++					goto cleanup;
++				if (extents_process(&fei, bl_candidates,
++				    seg, dev, b) == False)
++					goto cleanup;
++				extents_cleanup(&fei);
++				
++			} else if (seg->iomode == IOMODE_READ) {
++				
++				/*
++				 * Found a hole in a file while reading. No 
++				 * problem, just create a pNFS extent for the
++				 * range and let the client know there's no
++				 * backing store.
++				 */
++				n = bll_alloc(b->bll_foff, b->bll_len,
++				    BLOCK_LAYOUT_NEW, bl_candidates);
++				n->bll_es = PNFS_BLOCK_NONE_DATA;
++				n->bll_vol_id.sbid = 0;
++				n->bll_vol_id.devid = dev;
++				seg->length += b->bll_len;
++			} else {
++				
++				/*
++				 * There's a problem here. Since the iomode
++				 * is read/write fallocate should have allocated
++				 * any necessary storage for the given range.
++				 */
++				dprintk("    Extent count for RW is 0\n");
++				goto cleanup;
++			}
++			
++		} else {
++			n = bll_alloc_dup(b, b->bll_cache_state, bl_candidates);
++			seg->length += n->bll_len;
++		}
++
++		if (r->blr_ext_size < (b->bll_foff + b->bll_len))
++			r->blr_ext_size = b->bll_foff + b->bll_len;
++	}
++	
++	while (!list_empty(bl_possible)) {
++		b = list_entry(bl_possible->next,
++		    struct pnfs_blocklayout_layout, bll_list);
++		list_del(&b->bll_list);
++		kfree(b);
++	}
++		
++	b = list_first_entry(bl_candidates, struct pnfs_blocklayout_layout,
++	    bll_list);
++	seg->offset = b->bll_foff;
++	dprintk("<-- %s okay\n", __func__);
++	return bl_candidates;
++	
++cleanup:
++	extents_cleanup(&fei);
++	if (bl_candidates)
++		kfree(bl_candidates);
++	dprintk("<-- %s, error occurred\n", __func__);
++	return NULL;
++}
++
++/*
++ * layout_cache_merge -- collapse layouts which make up a contiguous range.
++ */
++static void
++layout_cache_merge(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*p;
++	
++	dprintk("--> %s\n", __func__);
++restart:
++	p = NULL;
++	list_for_each_entry(b, h, bll_list) {
++		if (p && (BLL_S_END(p) == b->bll_soff) &&
++		    (p->bll_es == b->bll_es) &&
++		    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++			/*
++			 * We've got a condidate.
++			 */
++#ifdef too_verbose
++			dprintk("  merge %Lu(f):%Lu(l):%Lu(s) into %Lu(f):%Lu(l):%Lu(s)\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff),
++				_2SECTS(p->bll_foff), _2SECTS(p->bll_len),
++				_2SECTS(b->bll_soff));
++#endif
++			
++			if (p->bll_cache_state == BLOCK_LAYOUT_CACHE)
++				p->bll_cache_state = BLOCK_LAYOUT_UPDATE;
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else if (p && (BLL_F_END(p) == b->bll_foff) &&
++			   (p->bll_es == b->bll_es) &&
++			   (b->bll_es == PNFS_BLOCK_NONE_DATA)) {
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else
++			p = b;
++	}
++	dprintk("<-- %s\n", __func__);
++}
++
++static int
++layout_cache_update(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*c,
++					*n;
++	boolean_t			status = 0;
++	
++	dprintk("--> %s\n", __func__);
++	if (list_empty(&r->blr_layouts)) {
++		/* ---- Just add entries and return ---- */
++		dprintk("  cache empty for inode 0x%x:%ld\n", r->blr_rdev,
++			r->blr_inode->i_ino);
++		list_for_each_entry(b, h, bll_list) {
++			c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE,
++					  &r->blr_layouts);
++			if (!c) {
++				status = -ENOMEM;
++				break;
++			}
++			dprintk("    adding %Lu(f):%Lu(l):%Lu(s):%d\n",
++				_2SECTS(c->bll_foff), _2SECTS(c->bll_len),
++				_2SECTS(c->bll_soff), c->bll_es);
++		}
++		return status;
++	}
++	
++	list_for_each_entry(b, h, bll_list) {
++		BUG_ON(!b->bll_vol_id.devid);
++		if (b->bll_cache_state == BLOCK_LAYOUT_UPDATE) {
++			boolean_t found = False;
++			list_for_each_entry(c, &r->blr_layouts, bll_list) {
++				if ((b->bll_soff >= c->bll_soff) &&
++				    (b->bll_soff < BLL_S_END(c)) &&
++				    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++					u64	u;
++					
++					if ((b->bll_foff < c->bll_foff) ||
++					    (b->bll_foff > BLL_F_END(c)))
++						BUG();
++					
++					u = BLL_S_END(b) - BLL_S_END(c);
++					/*
++					 * The updated cache entry has to be
++					 * different than the current.
++					 * Otherwise the cache state for 'b'
++					 * should be BLOCK_LAYOUT_CACHE.
++					 */
++					BUG_ON(BLL_S_END(b) < BLL_S_END(c));
++					
++					dprintk("  "
++						"updating %Lu(f):%Lu(l):%Lu(s) to len %Lu\n",
++						_2SECTS(c->bll_foff),
++						_2SECTS(c->bll_len),
++						_2SECTS(c->bll_soff),
++						_2SECTS(c->bll_len + u));
++					c->bll_len += u;
++					bll_collapse(r, c);
++					found = True;
++					break;
++				}
++			}
++
++			if (found == False) {
++				dprintk("  ERROR Expected to find"
++				    " %Lu(f):%Lu(l):%Lu(s), but didn't\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff));
++				list_for_each_entry(c, &r->blr_layouts, bll_list)
++					print_bll(c, "Cached");
++				BUG();
++			}
++		} else if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			c = list_first_entry(&r->blr_layouts,
++			    struct pnfs_blocklayout_layout, bll_list);
++			if (b->bll_foff < c->bll_foff) {
++				/*
++				 * Special case where new entry is before
++				 * first cached entry.
++				 */
++				c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE, NULL);
++				list_add(&c->bll_list, &r->blr_layouts);
++				dprintk("  new entry at head of list at %Lu, "
++					"len %Lu\n",
++					_2SECTS(c->bll_foff), _2SECTS(c->bll_len));
++			} else {
++				list_for_each_entry(c, &r->blr_layouts,
++				    bll_list) {
++					n = list_entry(c->bll_list.next,
++					    struct pnfs_blocklayout_layout,
++					    bll_list);
++					/*
++					 * This is ugly, but can't think of
++					 * another way to examine this case.
++					 * Consider the following. Need to
++					 * add an entry which starts at 40
++					 * and the cache has the following
++					 * entries:
++					 * Start    Length
++					 * 10       5
++					 * 30       5
++					 * 50       5
++					 * So, need to look and see if the new
++					 * entry starts after the current
++					 * cache, but before the next one.
++					 * There's a catch in that the next
++					 * entry might not be valid as it's
++					 * really just a pointer to the list
++					 * head.
++					 */
++					if (((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (c->bll_list.next == &r->blr_layouts)) ||
++					    ((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (b->bll_foff < n->bll_foff))) {
++						
++						n = bll_alloc_dup(b,
++								  BLOCK_LAYOUT_CACHE, NULL);
++						dprintk("  adding new %Lu:%Lu"
++							" after %Lu:%Lu\n",
++							_2SECTS(n->bll_foff),
++							_2SECTS(n->bll_len),
++							_2SECTS(c->bll_foff),
++							_2SECTS(c->bll_len));
++						list_add(&n->bll_list,
++							 &c->bll_list);
++						break;
++					}
++				}
++			}
++		}
++	}
++	dprintk("<-- %s\n", __func__);
++	return status;
++}
++
++static void
++layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg_in)
++{
++	struct pnfs_blocklayout_layout	*b,
++					*n;
++	u64				len;
++	struct nfsd4_layout_seg		seg = *seg_in;
++	
++	dprintk("--> %s\n", __func__);
++	if (seg.length == NFS4_MAX_UINT64) {
++		r->blr_recalled = 0;
++		dprintk("  Fast return of all layouts\n");
++		while (!list_empty(&r->blr_layouts)) {
++			b = list_entry(r->blr_layouts.next,
++				       struct pnfs_blocklayout_layout, bll_list);
++			dprintk("    foff %Lu, len %Lu, soff %Lu\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff));
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++		dprintk("<-- %s\n", __func__);
++		return;
++	}
++
++restart:
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg.offset == b->bll_foff) {
++			/*
++			 * This handle the following three cases:
++			 * (1) return layout matches entire cache layout
++			 * (2) return layout matches beginning portion of cache
++			 * (3) return layout matches entire cache layout and
++			 *     into next entry. Varies from #1 in end case.
++			 */
++			dprintk("  match on offsets, %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length));
++			len = MIN(seg.length, b->bll_len);
++			b->bll_foff	+= len;
++			b->bll_soff	+= len;
++			b->bll_len	-= len;
++			seg.length	-= len;
++			seg.offset	+= len;
++			if (!b->bll_len) {
++				list_del(&b->bll_list);
++				kfree(b);
++				dprintk("    removing cache line\n");
++				if (!seg.length) {
++					dprintk("    also finished\n");
++					goto complete;
++				}
++				/*
++				 * Since 'b' was freed we can't continue at the
++				 * next entry which is referenced as
++				 * b->bll_list.next by the list_for_each_entry
++				 * macro. Need to restart the loop.
++				 * TODO: Think about creating a dummy 'b' which
++				 *       would keep list_for_each_entry() happy.
++				 */
++				goto restart;
++			}
++			if (!seg.length) {
++				dprintk("    finished, but cache line not"
++					"empty\n");
++				goto complete;
++			}
++		} else if ((seg.offset >= b->bll_foff) &&
++		    (seg.offset < BLL_F_END(b))) {
++			/*
++			 * layout being returned is within this cache line.
++			 */
++			dprintk("  layout %Lu:%Lu within cache line %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length),
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			BUG_ON(!seg.length);
++			if ((seg.offset + seg.length) >= BLL_F_END(b)) {
++				/*
++				 * Layout returned starts in the middle of
++				 * cache entry and just need to trim back
++				 * cache to shorter length.
++				 */
++				dprintk("    trim back cache line\n");
++				len = seg.offset - b->bll_foff;
++				seg.offset += b->bll_len - len;
++				seg.length -= b->bll_len - len;
++				b->bll_len = len;
++				if (!seg.length)
++					return;
++			} else {
++				/*
++				 * Need to split current cache layout because
++				 * chunk is being removed from the middle.
++				 */
++				dprintk("    split cache line\n");
++				len = seg.offset + seg.length;
++				n = bll_alloc(len,
++					      (b->bll_foff + b->bll_len) - len,
++					      BLOCK_LAYOUT_CACHE, NULL);
++				n->bll_soff = b->bll_soff + len;
++				list_add(&n->bll_list, &b->bll_list);
++				b->bll_len = seg.offset - b->bll_foff;
++				return;
++			}
++		}
++	}
++complete:
++	if (list_empty(&r->blr_layouts))
++		r->blr_recalled = 0;
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++ * layout_cache_fill_from_list -- fills from cache list
++ *
++ * NOTE: This routine was only seperated out from layout_cache_file_from()
++ * to reduce the indentation level which makes the code easier to read.
++ */
++static inline boolean_t
++layout_cache_fill_from_list(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n;
++	enum pnfs_block_extent_state4	s;
++	
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg->offset < b->bll_foff) {
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, b->bll_foff - seg->offset),
++			    BLOCK_LAYOUT_NEW, NULL);
++			if (!n)
++				return False;
++			
++			list_add(&n->bll_list, h->prev);
++			dprintk("  new: %Lu:%Lu, added before %Lu:%Lu\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++		
++		if ((seg->offset >= b->bll_foff) &&
++		    (seg->offset < BLL_F_END(b))) {
++			if (layout_conflict(b, seg->iomode, &s) == False) {
++				dprintk("  CONFLICT FOUND: "
++				    "%Lu(f):%Lu(l):%Lu(s) state %d, iomode %d\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff), b->bll_es,
++				    seg->iomode);
++				return False;
++			}
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, BLL_F_END(b) - seg->offset),
++			    BLOCK_LAYOUT_CACHE, h);
++			dprintk("  CACHE hit: Found %Lu(f):%Lu(l): "
++			    "in %Lu(f):%Lu(l):%Lu(s):%d\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++			    _2SECTS(b->bll_soff), b->bll_es);
++			if (!n)
++				return False;
++			
++			n->bll_soff = b->bll_soff + seg->offset - b->bll_foff;
++			n->bll_vol_id.sbid = 0;
++			n->bll_vol_id.devid = b->bll_vol_id.devid;
++			n->bll_es = s;
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++	}
++	return True;
++}
++
++static u64
++bll_alloc_holey(struct list_head *bl_candidates, u64 offset, u64 length,
++    dev_t dev)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	n = bll_alloc(offset, length, BLOCK_LAYOUT_NEW, bl_candidates);
++	if (!n)
++		return 0;
++	n->bll_es = PNFS_BLOCK_NONE_DATA;
++	n->bll_vol_id.sbid = 0;
++	n->bll_vol_id.devid = dev;
++	
++	return n->bll_len;
++}
++
++static void
++extents_setup(struct fiemap_extent_info *fei)
++{
++	fei->fi_extents_start	= NULL;
++}
++
++/*
++ * extents_count -- Determine the number of extents for a given range.
++ *
++ * No need to call set_fs() here because the function
++ * doesn't use copy_to_user() if it's only counting
++ * the number of extents needed.
++ */
++static void
++extents_count(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	dprintk("    Need fiemap of %Ld:%Ld\n", _2SECTS(foff), _2SECTS(len));
++	fei->fi_flags		= FIEMAP_FLAG_SYNC;
++	fei->fi_extents_max	= 0;
++	fei->fi_extents_start	= NULL;
++	fei->fi_extents_mapped	= 0;
++	i->i_op->fiemap(i, fei, foff, len + (1 << i->i_sb->s_blocksize_bits) - 1);
++}
++
++/*
++ * extents_get -- Get list of extents for range
++ *
++ * extents_count() must have been called before this routine such that
++ * fi_extents_mapped is known.
++ */
++static boolean_t
++extents_get(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	int			m_space,
++				rval;
++	struct fiemap_extent	*fe;
++	mm_segment_t		old_fs = get_fs();
++	
++	/*
++	 * Now malloc the correct amount of space
++	 * needed. It's possible for the file to have changed
++	 * between calls which would require more space for
++	 * the extents. If that occurs the last extent will
++	 * not have FIEMAP_EXTENT_LAST set and the error will
++	 * be caught in extents_process().
++	 */
++	m_space = fei->fi_extents_mapped * sizeof (struct fiemap_extent);
++	fe = kmalloc(m_space, GFP_KERNEL);
++	if (!fe)
++		return False;
++	memset(fe, 0, m_space);
++	
++	fei->fi_extents_max	= fei->fi_extents_mapped;
++	fei->fi_extents_mapped	= 0;
++	fei->fi_extents_start	= fe;
++	
++	set_fs(KERNEL_DS);
++	rval = i->i_op->fiemap(i, fei, foff, len +
++	    (1 << i->i_sb->s_blocksize_bits) - 1);
++	set_fs(old_fs);
++	
++	if (rval || !fei->fi_extents_mapped) {
++		dprintk("    No extents. Wanted %d, got %d\n",
++			fei->fi_extents_max, fei->fi_extents_mapped);
++		kfree(fe);
++		fei->fi_extents_start = NULL;
++		return False;
++	} else
++		return True;
++}
++
++/*
++ * extents_process -- runs through the extent returned from the file system and
++ *	 creates block layout entries.
++ */
++static boolean_t
++extents_process(struct fiemap_extent_info *fei, struct list_head *bl_candidates,
++    struct nfsd4_layout_seg *seg, dev_t dev, pnfs_blocklayout_layout_t *b)
++{
++	struct fiemap_extent		*fep,
++					*fep_last	= NULL;
++	int				i;
++	pnfs_blocklayout_layout_t	*n;
++	u64				last_end,
++					rval;
++	
++	dprintk("--> %s\n", __func__);
++	for (fep = fei->fi_extents_start, i = 0; i < fei->fi_extents_mapped;
++	    i++, fep++) {
++		
++		BUG_ON(!fep->fe_physical);
++		/*
++		 * Deal with corner cases of hoel-y files.
++		 */
++		if (fep_last && ((fep_last->fe_logical + fep_last->fe_length) !=
++				 fep->fe_logical)) {
++			
++			/*
++			 * If the last extent doesn't end logically
++			 * at the beginning of the current we've got
++			 * hole and need to create a pNFS extent.
++			 */
++			dprintk("    Got a hole at %Ld:%Ld \n", 
++			    _2SECTS(fep_last->fe_logical),
++			    _2SECTS(fep_last->fe_length));
++			last_end = fep_last->fe_logical + fep_last->fe_length;
++			rval = bll_alloc_holey(bl_candidates, last_end,
++			    fep->fe_logical - last_end, dev);
++			if (!rval)
++				return False;
++			seg->length += rval;
++		}
++		
++		n = bll_alloc(fep->fe_logical, fep->fe_length,
++		    BLOCK_LAYOUT_NEW, bl_candidates);
++		if (unlikely(n == NULL)) {
++			dprintk("%s: bll_alloc failed\n", __func__);
++			return False;
++		}
++		
++		n->bll_soff = fep->fe_physical;
++		n->bll_es = seg->iomode == IOMODE_READ ?
++		    PNFS_BLOCK_READ_DATA : PNFS_BLOCK_READWRITE_DATA;
++		n->bll_vol_id.sbid = 0;
++		n->bll_vol_id.devid = dev;
++		seg->length += fep->fe_length;
++		print_bll(n, "New extent");
++		fep_last = fep;
++	}
++	dprintk("<-- %s (i=%d)\n", __func__, i);
++	
++	return True;
++}
++
++static void
++extents_cleanup(struct fiemap_extent_info *fei)
++{
++	if (fei->fi_extents_start) {
++		kfree(fei->fi_extents_start);
++		fei->fi_extents_start = NULL;
++	}
++}
++
++/*
++ * device_slice -- check to see if device is a slice or DM
++ */
++static boolean_t
++device_slice(dev_t devid)
++{
++	struct block_device	*bd	= open_by_devnum(devid, FMODE_READ);
++	boolean_t		rval	= False;
++	
++	if (bd) {
++		if (bd->bd_disk->minors > 1)
++			rval = True;
++		blkdev_put(bd, FMODE_READ);
++	}
++	return rval;
++}
++
++/*
++ * device_dm -- check to see if device is a Device Mapper volume.
++ *
++ * Returns 1 for DM or 0 if not
++ */
++static boolean_t
++device_dm(dev_t devid)
++{
++	boolean_t		rval = False;
++	bl_comm_msg_t		msg;
++	bl_comm_res_t		*res;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_DMCHK;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Failed upcall to check on DM status\n");
++	} else if (res->u.dm_vol) {
++		rval = True;
++		dprintk("Device is DM volume\n");
++	} else
++		dprintk("Device is not DM volume\n");
++	kfree(res);
++	
++	return rval;
++}
++
++static boolean_t
++layout_inode_add(struct inode *i, bl_layout_rec_t **p)
++{
++	bl_layout_rec_t		*r	= NULL;
++
++	if (!i->i_op->fiemap || !i->i_op->fallocate) {
++		printk("pNFS: file system doesn't support required fiemap or"
++		    "fallocate methods\n");
++		return False;
++	}
++	
++	r = kmalloc(sizeof (*r), GFP_KERNEL);
++	if (!r)
++		goto error;
++
++	r->blr_rdev	= i->i_sb->s_dev;
++	r->blr_inode	= i;
++	r->blr_orig_size = i->i_size;
++	r->blr_ext_size	= 0;
++	r->blr_recalled	= 0;
++	INIT_LIST_HEAD(&r->blr_layouts);
++	spin_lock_init(&r->blr_lock);
++	spin_lock(&layout_hashtbl_lock);
++	list_add_tail(&r->blr_hash, &layout_hash);
++	spin_unlock(&layout_hashtbl_lock);
++	*p = r;
++	return True;
++	
++error:
++	if (r)
++		kfree(r);
++	return False;
++}
++
++static bl_layout_rec_t *
++__layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	if (!list_empty(&layout_hash)) {
++		list_for_each_entry(r, &layout_hash, blr_hash) {
++			if ((r->blr_inode->i_ino == i->i_ino) &&
++			    (r->blr_rdev == i->i_sb->s_dev)) {
++				return r;
++			}
++		}
++	}
++	return NULL;
++}
++
++static bl_layout_rec_t *
++layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	spin_unlock(&layout_hashtbl_lock);
++	
++	return r;
++}
++
++static void
++layout_inode_del(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		if (list_empty(&r->blr_layouts)) {
++			list_del(&r->blr_hash);
++			spin_unlock(&r->blr_lock);
++			kfree(r);
++		} else {
++			spin_unlock(&r->blr_lock);
++		}
++	} else {
++		dprintk("%s: failed to find inode [0x%x:%lu] in table for delete\n",
++			__func__, i->i_sb->s_dev, i->i_ino);
++	}
++	spin_unlock(&layout_hashtbl_lock);
++}
++
++/*
++ * map_state2name -- converts state in ascii string.
++ *
++ * Used for debug messages only.
++ */
++static char *
++map_state2name(enum pnfs_block_extent_state4 s)
++{
++	switch (s) {
++	case PNFS_BLOCK_READWRITE_DATA:	return "     RW";
++	case PNFS_BLOCK_READ_DATA:	return "     RO";
++	case PNFS_BLOCK_INVALID_DATA:	return "INVALID";
++	case PNFS_BLOCK_NONE_DATA:	return "   NONE";
++	default:
++		BUG();
++	}
++}
++
++static pnfs_blocklayout_devinfo_t *
++bld_alloc(struct list_head *volumes, int type)
++{
++	pnfs_blocklayout_devinfo_t *bld;
++	
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		return NULL;
++
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type = type;
++	list_add_tail(&bld->bld_list, volumes);
++
++	return bld;
++}
++
++static void
++bld_free(pnfs_blocklayout_devinfo_t *bld)
++{
++	list_del(&bld->bld_list);
++	kfree(bld);
++}
++
++static void
++print_bll(pnfs_blocklayout_layout_t *b, char *text)
++{
++	dprintk("    BLL: %s\n", text);
++	dprintk("    foff %Lu, soff %Lu, len %Lu, state %s\n",
++	    _2SECTS(b->bll_foff), _2SECTS(b->bll_soff), _2SECTS(b->bll_len),
++	    map_state2name(b->bll_es));
++}
++
++static inline void
++bll_collapse(bl_layout_rec_t *r, pnfs_blocklayout_layout_t *c)
++{
++	pnfs_blocklayout_layout_t	*n;
++	int				dbg_count	= 0;
++	u64				endpoint;
++	
++	BUG_ON(c->bll_es == PNFS_BLOCK_NONE_DATA);
++	while (c->bll_list.next != &r->blr_layouts) {
++		n = list_entry(c->bll_list.next,
++			       struct pnfs_blocklayout_layout, bll_list);
++		endpoint = BLL_S_END(c);
++		if ((n->bll_soff >= c->bll_soff) &&
++		    (n->bll_soff < endpoint)) {
++			if (endpoint < BLL_S_END(n)) {
++				/*
++				 * The following is possible.
++				 *
++				 * 
++				 * Existing: +---+                 +---+
++				 *      New: +-----------------------+
++				 * The client request merge entries together
++				 * but didn't require picking up all of the
++				 * last entry. So, we still need to delete
++				 * the last entry and add the remaining space
++				 * to the new entry.
++				 */
++				c->bll_len += BLL_S_END(n) - endpoint;
++			}
++			dbg_count++;
++			list_del(&n->bll_list);
++			kfree(n);
++		} else {
++			break;
++		}
++	}
++	/* ---- Debug only, remove before integration ---- */
++	if (dbg_count)
++		dprintk("  Collapsed %d cache entries between %Lu(s) and %Lu(s)\n",
++			dbg_count, _2SECTS(c->bll_soff), _2SECTS(BLL_S_END(c)));
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc(u64 offset, u64 len, enum bl_cache_state state, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = kmalloc(sizeof (*n), GFP_KERNEL);
++	if (n) {
++		memset(n, 0, sizeof (*n));
++		n->bll_foff		= offset;
++		n->bll_len		= len;
++		n->bll_cache_state	= state;
++		if (h)
++			list_add_tail(&n->bll_list, h);
++	}
++	return n;
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc_dup(pnfs_blocklayout_layout_t *b, enum bl_cache_state c,
++	      struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = bll_alloc(b->bll_foff, b->bll_len, c, h);
++	if (n) {
++		n->bll_es			= b->bll_es;
++		n->bll_soff			= b->bll_soff;
++		n->bll_vol_id.devid		= b->bll_vol_id.devid;
++	}
++	return n;
++}
++
++static inline boolean_t
++layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++		enum pnfs_block_extent_state4 *s)
++{
++	/* ---- Normal case ---- */
++	*s = b->bll_es;
++	
++	switch (b->bll_es) {
++	case PNFS_BLOCK_READWRITE_DATA:
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_READ_DATA;
++		/* ---- Any use is permitted. ---- */
++		break;
++	case PNFS_BLOCK_READ_DATA:
++		/* ---- Committed as read only data. ---- */
++		if (iomode == IOMODE_RW)
++			return False;
++		break;
++	case PNFS_BLOCK_INVALID_DATA:
++		/* ---- Blocks have been allocated, but not initialized ---- */
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_NONE_DATA;
++		break;
++	case PNFS_BLOCK_NONE_DATA:
++		/* ---- Hole-y file. No backing store avail. ---- */
++		if (iomode != IOMODE_READ)
++			return False;
++		break;
++	default:
++		BUG();
++	}
++	return True;
++}
++
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-31 20:41:19.144140225 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-31 20:42:05.512106042 -0400
+@@ -104,7 +104,8 @@ again:
+ 			continue;
+ 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 			continue;
+-		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
++		if (memcmp(state->stateid.u.data, stateid->u.data,
++			   sizeof(state->stateid.u.data)) != 0)
+ 			continue;
+ 		get_nfs_open_context(ctx);
+ 		spin_unlock(&inode->i_lock);
+@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct
+ 	if (delegation != NULL) {
+ 		spin_lock(&delegation->lock);
+ 		if (delegation->inode != NULL) {
+-			memcpy(delegation->stateid.data, res->delegation.data,
+-			       sizeof(delegation->stateid.data));
++			memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			       sizeof(delegation->stateid.u.data));
+ 			delegation->type = res->delegation_type;
+ 			delegation->maxsize = res->maxsize;
+ 			oldcred = delegation->cred;
+@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach
+ 	if (delegation == NULL)
+ 		goto nomatch;
+ 	spin_lock(&delegation->lock);
+-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+-				sizeof(delegation->stateid.data)) != 0)
++	if (stateid != NULL && memcmp(delegation->stateid.u.data,
++				      stateid->u.data,
++				      sizeof(delegation->stateid.u.data)) != 0)
+ 		goto nomatch_unlock;
+ 	list_del_rcu(&delegation->super_list);
+ 	delegation->inode = NULL;
+@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inod
+ 	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+-	memcpy(delegation->stateid.data, res->delegation.data,
+-			sizeof(delegation->stateid.data));
++	memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			sizeof(delegation->stateid.u.data));
+ 	delegation->type = res->delegation_type;
+ 	delegation->maxsize = res->maxsize;
+ 	delegation->change_attr = nfsi->change_attr;
+@@ -471,9 +473,7 @@ void nfs_expire_unreferenced_delegations
+ /*
+  * Asynchronous delegation recall!
+  */
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid))
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+ {
+ 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ 	struct nfs_delegation *delegation;
+@@ -481,7 +481,7 @@ int nfs_async_inode_return_delegation(st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(NFS_I(inode)->delegation);
+ 
+-	if (!validate_stateid(delegation, stateid)) {
++	if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+ 		rcu_read_unlock();
+ 		return -ENOENT;
+ 	}
+@@ -562,7 +562,8 @@ int nfs4_copy_delegation_stateid(nfs4_st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(nfsi->delegation);
+ 	if (delegation != NULL) {
+-		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
++		memcpy(dst->u.data, delegation->stateid.u.data,
++		       sizeof(dst->u.data));
+ 		ret = 1;
+ 	}
+ 	rcu_read_unlock();
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
+--- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-31 20:42:05.513114811 -0400
+@@ -34,9 +34,7 @@ enum {
+ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ int nfs_inode_return_delegation(struct inode *inode);
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid));
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+ 
+ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-31 20:41:19.196140434 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-31 20:42:05.553222784 -0400
+@@ -17,11 +17,19 @@
+ #include <linux/module.h>
+ #include <linux/exportfs.h>
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++#if defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
++#endif
+ #include <linux/nfsd/syscall.h>
+ #include <net/ipv6.h>
+ 
+ #include "nfsd.h"
+ #include "nfsfh.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
+ 
+@@ -352,6 +360,40 @@ static int svc_export_upcall(struct cach
+ 	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static struct pnfsd_cb_operations pnfsd_cb_op = {
++	.cb_layout_recall = nfsd_layout_recall_cb,
++	.cb_device_notify = nfsd_device_notify_cb,
++
++	.cb_get_state = nfs4_pnfs_cb_get_state,
++	.cb_change_state = nfs4_pnfs_cb_change_state,
++};
++
++#if defined(CONFIG_SPNFS)
++static struct pnfs_export_operations spnfs_export_ops = {
++	.layout_type = spnfs_layout_type,
++	.get_device_info = spnfs_getdeviceinfo,
++	.get_device_iter = spnfs_getdeviceiter,
++	.layout_get = spnfs_layoutget,
++	.layout_return = spnfs_layoutreturn,
++};
++
++static struct pnfs_export_operations spnfs_ds_export_ops = {
++	.get_state = spnfs_get_state,
++};
++
++#if defined(CONFIG_SPNFS_BLOCK)
++static struct pnfs_export_operations bl_export_ops = {
++	.layout_type = bl_layout_type,
++	.get_device_info = bl_getdeviceinfo,
++	.get_device_iter = bl_getdeviceiter,
++	.layout_get = bl_layoutget,
++	.layout_return = bl_layoutreturn,
++};
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_SPNFS */
++#endif /* CONFIG_PNFSD */
++
+ static struct svc_export *svc_export_update(struct svc_export *new,
+ 					    struct svc_export *old);
+ static struct svc_export *svc_export_lookup(struct svc_export *);
+@@ -395,6 +437,47 @@ static int check_export(struct inode *in
+ 		return -EINVAL;
+ 	}
+ 
++#if !defined(CONFIG_SPNFS)
++	if (inode->i_sb->s_pnfs_op &&
++	    (!inode->i_sb->s_pnfs_op->layout_type ||
++	     !inode->i_sb->s_pnfs_op->get_device_info ||
++	     !inode->i_sb->s_pnfs_op->layout_get)) {
++		dprintk("exp_export: export of invalid fs pnfs export ops.\n");
++		return -EINVAL;
++	}
++#endif /* CONFIG_SPNFS */
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	if (!inode->i_sb->s_pnfs_op)
++		pnfsd_lexp_init(inode);
++	return 0;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(inode, *flags)) {
++		dprintk("set pnfs block export structure... \n");
++		inode->i_sb->s_pnfs_op = &bl_export_ops;
++	} else
++#endif /* CONFIG_SPNFS_BLOCK */
++	/*
++	 * spnfs_enabled() indicates we're an MDS.
++	 * XXX Better to check an export time option as well.
++	 */
++	if (spnfs_enabled()) {
++		dprintk("set spnfs export structure...\n");
++		inode->i_sb->s_pnfs_op = &spnfs_export_ops;
++	} else {
++		dprintk("%s spnfs not in use\n", __func__);
++
++		/*
++		 * get_state is needed if we're a DS using spnfs.
++		 * XXX Better to check an export time option instead.
++		 */
++		inode->i_sb->s_pnfs_op = &spnfs_ds_export_ops;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	return 0;
+ 
+ }
+@@ -586,6 +669,8 @@ static int svc_export_parse(struct cache
+ 					if (exp.ex_uuid == NULL)
+ 						err = -ENOMEM;
+ 				}
++			} else if (strcmp(buf, "pnfs") == 0) {
++				exp.ex_pnfs = 1;
+ 			} else if (strcmp(buf, "secinfo") == 0)
+ 				err = secinfo_parse(&mesg, buf, &exp);
+ 			else
+@@ -660,6 +745,8 @@ static int svc_export_show(struct seq_fi
+ 				seq_printf(m, "%02x", exp->ex_uuid[i]);
+ 			}
+ 		}
++		if (exp->ex_pnfs)
++			seq_puts(m, ",pnfs");
+ 		show_secinfo(m, exp);
+ 	}
+ 	seq_puts(m, ")\n");
+@@ -687,6 +774,7 @@ static void svc_export_init(struct cache
+ 	new->ex_fslocs.locations = NULL;
+ 	new->ex_fslocs.locations_count = 0;
+ 	new->ex_fslocs.migrated = 0;
++	new->ex_pnfs = 0;
+ }
+ 
+ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+@@ -699,6 +787,7 @@ static void export_update(struct cache_h
+ 	new->ex_anon_uid = item->ex_anon_uid;
+ 	new->ex_anon_gid = item->ex_anon_gid;
+ 	new->ex_fsid = item->ex_fsid;
++	new->ex_pnfs = item->ex_pnfs;
+ 	new->ex_uuid = item->ex_uuid;
+ 	item->ex_uuid = NULL;
+ 	new->ex_pathname = item->ex_pathname;
+@@ -1635,8 +1724,17 @@ nfsd_export_init(void)
+ 	if (rv)
+ 		return rv;
+ 	rv = cache_register(&svc_expkey_cache);
+-	if (rv)
++	if (rv) {
+ 		cache_unregister(&svc_export_cache);
++		goto out;
++	}
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = THIS_MODULE;
++	pnfsd_cb_ctl.cb_op = &pnfsd_cb_op;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
++out:
+ 	return rv;
+ 
+ }
+@@ -1664,6 +1762,12 @@ nfsd_export_shutdown(void)
+ 
+ 	exp_writelock();
+ 
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = NULL;
++	pnfsd_cb_ctl.cb_op = NULL;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
+ 	cache_unregister(&svc_expkey_cache);
+ 	cache_unregister(&svc_export_cache);
+ 	svcauth_unix_purge();
+diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
+--- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-31 20:42:05.514196343 -0400
+@@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
+ 	.rpc_release = nfs_direct_read_release,
+ };
+ 
++static long nfs_direct_read_execute(struct nfs_read_data *data,
++				    struct rpc_task_setup *task_setup_data,
++				    struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	nfs_fattr_init(&data->fattr);
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	NFS_PROTO(inode)->read_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct read call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
+  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
+@@ -283,7 +315,6 @@ static ssize_t nfs_direct_read_schedule_
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+ 	size_t rsize = NFS_SERVER(inode)->rsize;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -343,26 +374,9 @@ static ssize_t nfs_direct_read_schedule_
+ 		data->res.fattr = &data->fattr;
+ 		data->res.eof = 0;
+ 		data->res.count = bytes;
+-		nfs_fattr_init(&data->fattr);
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct read call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_read_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+@@ -448,12 +462,15 @@ static void nfs_direct_free_writedata(st
+ }
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg);
++
+ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+ {
+ 	struct inode *inode = dreq->inode;
+ 	struct list_head *p;
+ 	struct nfs_write_data *data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = dreq->ctx->cred,
+ 	};
+@@ -487,25 +504,7 @@ static void nfs_direct_write_reschedule(
+ 		 * Reuse data->task; data->args should not have changed
+ 		 * since the original request was sent.
+ 		 */
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		/*
+-		 * We're called via an RPC callback, so BKL is already held.
+-		 */
+-		task = rpc_run_task(&task_setup_data);
+-		if (!IS_ERR(task))
+-			rpc_put_task(task);
+-
+-		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				data->args.count,
+-				(unsigned long long)data->args.offset);
++		nfs_direct_write_execute(data, &task_setup_data, &msg);
+ 	}
+ 
+ 	if (put_dreq(dreq))
+@@ -548,10 +547,31 @@ static const struct rpc_call_ops nfs_com
+ 	.rpc_release = nfs_direct_commit_release,
+ };
+ 
++static long nfs_direct_commit_execute(struct nfs_direct_req *dreq,
++				      struct nfs_write_data *data,
++				      struct rpc_task_setup *task_setup_data,
++				      struct rpc_message *msg)
++{
++	struct rpc_task *task;
++
++	NFS_PROTO(data->inode)->commit_setup(data, msg);
++
++	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
++	dreq->commit_data = NULL;
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++	return 0;
++}
++
+ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+ {
+ 	struct nfs_write_data *data = dreq->commit_data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+@@ -579,16 +599,7 @@ static void nfs_direct_commit_schedule(s
+ 	data->res.verf = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	NFS_PROTO(data->inode)->commit_setup(data, &msg);
+-
+-	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+-	dreq->commit_data = NULL;
+-
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (!IS_ERR(task))
+-		rpc_put_task(task);
++	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
+ }
+ 
+ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+@@ -690,6 +701,36 @@ static const struct rpc_call_ops nfs_wri
+ 	.rpc_release = nfs_direct_write_release,
+ };
+ 
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++	NFS_PROTO(inode)->write_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+@@ -705,7 +746,6 @@ static ssize_t nfs_direct_write_schedule
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -771,24 +811,8 @@ static ssize_t nfs_direct_write_schedule
+ 		data->res.verf = &data->verf;
+ 		nfs_fattr_init(&data->fattr);
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct write call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_write_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
+--- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-31 20:42:05.549222922 -0400
+@@ -79,3 +79,52 @@ config NFSD_V4
+ 	  available from http://linux-nfs.org/.
+ 
+ 	  If unsure, say N.
++
++config PNFSD
++	bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
++	depends on NFSD_V4 && EXPERIMENTAL
++	select EXPORTFS_FILE_LAYOUT
++	help
++	  This option enables support for the parallel NFS features of the
++	  minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
++	  in the kernel's NFS server.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFSD_LOCAL_EXPORT
++	bool "Enable pNFS support for exporting local filesystems for debugging purposes"
++	depends on PNFSD
++	help
++	  Say Y here if you want your pNFS server to export local file systems
++	  over the files layout type.  With this option the MDS (metadata
++	  server) functions also as a single DS (data server).  This is mostly
++	  useful for development and debugging purposes.
++
++	  If unsure, say N.
++
++config SPNFS
++	bool "Provide spNFS server support (EXPERIMENTAL)"
++	depends on PNFSD
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS server support.
++
++	  If unsure, say N.
++
++config SPNFS_LAYOUTSEGMENTS
++	bool "Allow spNFS to return partial file layouts (EXPERIMENTAL)"
++	depends on SPNFS
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS to be able to return layout segments.
++
++	  If unsure, say N.
++
++config SPNFS_BLOCK
++	bool "Provide Block Layout server support (EXPERIMENTAL)"
++	depends on SPNFS
++	select EXPORTFS_BLOCK_LAYOUT
++	help
++	  Say Y here if you want spNFS block layout support
++
++	  If unsure, say N.
+diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
+--- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-31 20:42:05.549222922 -0400
+@@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ 			   nfs4acl.o nfs4callback.o nfs4recover.o
++nfsd-$(CONFIG_PNFSD)	+= nfs4pnfsd.o nfs4pnfsdlm.o nfs4pnfsds.o
++nfsd-$(CONFIG_PNFSD_LOCAL_EXPORT) += pnfsd_lexp.o
++nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
++nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-31 20:41:19.197150385 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-31 20:42:05.554114789 -0400
+@@ -40,7 +40,6 @@
+ 
+ #define NFSPROC4_CB_NULL 0
+ #define NFSPROC4_CB_COMPOUND 1
+-#define NFS4_STATEID_SIZE 16
+ 
+ /* Index of predefined Linux callback client operations */
+ 
+@@ -48,11 +47,17 @@ enum {
+ 	NFSPROC4_CLNT_CB_NULL = 0,
+ 	NFSPROC4_CLNT_CB_RECALL,
+ 	NFSPROC4_CLNT_CB_SEQUENCE,
++#if defined(CONFIG_PNFSD)
++	NFSPROC4_CLNT_CB_LAYOUT,
++	NFSPROC4_CLNT_CB_DEVICE,
++#endif
+ };
+ 
+ enum nfs_cb_opnum4 {
+ 	OP_CB_RECALL            = 4,
++	OP_CB_LAYOUT            = 5,
+ 	OP_CB_SEQUENCE          = 11,
++	OP_CB_DEVICE            = 14,
+ };
+ 
+ #define NFS4_MAXTAGLEN		20
+@@ -78,6 +83,19 @@ enum nfs_cb_opnum4 {
+ #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
++#define NFS4_enc_cb_layout_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 3 +                         \
++					enc_nfs4_fh_sz + 4)
++#define NFS4_dec_cb_layout_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
++#define NFS4_enc_cb_device_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 6)
++#define NFS4_dec_cb_device_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
+ 
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+@@ -94,6 +112,10 @@ xdr_writemem(__be32 *p, const void *ptr,
+ }
+ 
+ #define WRITE32(n)               *p++ = htonl(n)
++#define WRITE64(n)               do {				\
++	*p++ = htonl((u32)((n) >> 32));				\
++	*p++ = htonl((u32)(n));					\
++} while (0)
+ #define WRITEMEM(ptr,nbytes)     do {                           \
+ 	p = xdr_writemem(p, ptr, nbytes);                       \
+ } while (0)
+@@ -204,6 +226,16 @@ nfs_cb_stat_to_errno(int stat)
+  */
+ 
+ static void
++encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
++{
++	__be32 *p;
++
++	RESERVE_SPACE(sizeof(stateid_t));
++	WRITE32(sid->si_generation);
++	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
++}
++
++static void
+ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+ {
+ 	__be32 * p;
+@@ -228,10 +260,10 @@ encode_cb_recall(struct xdr_stream *xdr,
+ 	__be32 *p;
+ 	int len = dp->dl_fh.fh_size;
+ 
+-	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
++	RESERVE_SPACE(4);
+ 	WRITE32(OP_CB_RECALL);
+-	WRITE32(dp->dl_stateid.si_generation);
+-	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
++	encode_stateid(xdr, &dp->dl_stateid);
++	RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
+ 	WRITE32(0); /* truncate optimization not implemented */
+ 	WRITE32(len);
+ 	WRITEMEM(&dp->dl_fh.fh_base, len);
+@@ -259,6 +291,111 @@ encode_cb_sequence(struct xdr_stream *xd
+ 	hdr->nops++;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++#include "pnfsd.h"
++
++static void
++encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(20);
++	WRITE32(OP_CB_LAYOUT);
++	WRITE32(clr->cb.cbl_seg.layout_type);
++	WRITE32(clr->cb.cbl_seg.iomode);
++	WRITE32(clr->cb.cbl_layoutchanged);
++	WRITE32(clr->cb.cbl_recall_type);
++	if (unlikely(clr->cb.cbl_recall_type == RETURN_FSID)) {
++		struct nfs4_fsid fsid = clr->cb.cbl_fsid;
++
++		RESERVE_SPACE(16);
++		WRITE64(fsid.major);
++		WRITE64(fsid.minor);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"fsid 0x%llx-0x%llx\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type, fsid.major, fsid.minor);
++	} else if (clr->cb.cbl_recall_type == RETURN_FILE) {
++		int len = clr->clr_file->fi_fhlen;
++		stateid_t *cbl_sid = (stateid_t *)&clr->cb.cbl_sid;
++
++		RESERVE_SPACE(20 + len);
++		WRITE32(len);
++		WRITEMEM(clr->clr_file->fi_fhval, len);
++		WRITE64(clr->cb.cbl_seg.offset);
++		WRITE64(clr->cb.cbl_seg.length);
++		encode_stateid(xdr, cbl_sid);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"offset %lld length %lld stateid " STATEID_FMT "\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type,
++			clr->cb.cbl_seg.offset, clr->cb.cbl_seg.length,
++			STATEID_VAL(cbl_sid));
++	} else {
++		dprintk("%s: type %x iomode %d changed %d recall_type %d\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type);
++	}
++	hdr->nops++;
++}
++
++static void
++encode_cb_device(struct xdr_stream *xdr, struct nfs4_notify_device *nd,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++	int i;
++	int len					= nd->nd_list->cbd_len;
++	struct nfsd4_pnfs_cb_dev_item *cbd	= nd->nd_list->cbd_list;
++
++	dprintk("NFSD %s: --> num %d\n", __func__, len);
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(8);
++	WRITE32(OP_CB_DEVICE);
++
++	/* notify4 cnda_changes<>; */
++	WRITE32(len);
++	for (i = 0; i < len; i++) {
++		dprintk("%s: nt %d lt %d devid x%llx-x%llx im %d i %d\n",
++			__func__, cbd[i].cbd_notify_type,
++			cbd[i].cbd_layout_type,
++			cbd[i].cbd_devid.sbid,
++			cbd[i].cbd_devid.devid,
++			cbd[i].cbd_immediate, i);
++
++		BUG_ON(cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		       cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_DELETE);
++		RESERVE_SPACE(32);
++		/* bitmap4         notify_mask; */
++		WRITE32(1);
++		WRITE32(cbd[i].cbd_notify_type);
++		/* opaque     notify_vals<>; */
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
++			WRITE32(24);
++		else
++			WRITE32(20);
++		WRITE32(cbd[i].cbd_layout_type);
++		WRITE64(cbd[i].cbd_devid.sbid);
++		WRITE64(cbd[i].cbd_devid.devid);
++
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) {
++			RESERVE_SPACE(4);
++			WRITE32(cbd[i].cbd_immediate);
++		}
++	}
++	hdr->nops++;
++}
++#endif /* CONFIG_PNFSD */
++
+ static int
+ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+ {
+@@ -288,6 +425,45 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_layoutrecall *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_layout(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++
++static int
++nfs4_xdr_enc_cb_device(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_notify_device *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_device(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++#endif /* CONFIG_PNFSD */
+ 
+ static int
+ decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+@@ -403,6 +579,48 @@ out:
+ 	return status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_LAYOUT);
++out:
++	return status;
++}
++
++static int
++nfs4_xdr_dec_cb_device(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_DEVICE);
++out:
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * RPC procedure tables
+  */
+@@ -420,6 +638,10 @@ out:
+ static struct rpc_procinfo     nfs4_cb_procedures[] = {
+     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
++#if defined(CONFIG_PNFSD)
++    PROC(CB_LAYOUT,    COMPOUND,   enc_cb_layout,      dec_cb_layout),
++    PROC(CB_DEVICE,    COMPOUND,   enc_cb_device,      dec_cb_device),
++#endif
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
+@@ -606,10 +828,9 @@ out:
+  * TODO: cb_sequence should support referring call lists, cachethis, multiple
+  * slots, and mark callback channel down on communication errors.
+  */
+-static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_prepare_sequence(struct rpc_task *task,
++				      struct nfs4_client *clp)
+ {
+-	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+ 	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+ 	int status = 0;
+@@ -629,11 +850,15 @@ static void nfsd4_cb_prepare(struct rpc_
+ 	rpc_call_start(task);
+ }
+ 
+-static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_recall_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
++	nfsd4_cb_prepare_sequence(task, dp->dl_client);
++}
+ 
++static void nfsd4_cb_done_sequence(struct rpc_task *task,
++				   struct nfs4_client *clp)
++{
+ 	dprintk("%s: minorversion=%d\n", __func__,
+ 		clp->cl_cb_conn.cb_minorversion);
+ 
+@@ -657,7 +882,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	struct nfs4_client *clp = dp->dl_client;
+ 	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+-	nfsd4_cb_done(task, calldata);
++	nfsd4_cb_done_sequence(task, clp);
+ 
+ 	if (current_rpc_client == NULL) {
+ 		/* We're shutting down; give up. */
+@@ -688,7 +913,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+ 		task->tk_status = 0;
+-		rpc_restart_call(task);
++		rpc_restart_call_prepare(task);
+ 		return;
+ 	} else {
+ 		atomic_set(&clp->cl_cb_set, 0);
+@@ -704,7 +929,7 @@ static void nfsd4_cb_recall_release(void
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+-	.rpc_call_prepare = nfsd4_cb_prepare,
++	.rpc_call_prepare = nfsd4_cb_recall_prepare,
+ 	.rpc_call_done = nfsd4_cb_recall_done,
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+@@ -781,3 +1006,173 @@ void nfsd4_cb_recall(struct nfs4_delegat
+ {
+ 	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
++
++#if defined(CONFIG_PNFSD)
++static void nfsd4_cb_layout_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	nfsd4_cb_prepare_sequence(task, clr->clr_client);
++}
++
++static void nfsd4_cb_layout_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	struct nfs4_client *clp = clr->clr_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	if (!task->tk_status)
++		return;
++
++	printk("%s: clp %p cb_client %p fp %p failed with status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       clr->clr_file,
++	       task->tk_status);
++
++	switch (task->tk_status) {
++	case -EIO:
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++		/* FIXME:
++		 * The pnfs standard states that we need to only expire
++		 * the client after at-least "lease time" .eg lease-time * 2
++		 * when failing to communicate a recall
++		 */
++		break;
++	case -NFS4ERR_DELAY:
++		/* Pole the client until it's done with the layout */
++		rpc_delay(task, HZ/100); /* 10 mili-seconds */
++		task->tk_status = 0;
++		rpc_restart_call_prepare(task);
++		break;
++	case -NFS4ERR_NOMATCHING_LAYOUT:
++		task->tk_status = 0;
++		nomatching_layout(clr);
++	}
++}
++
++static void nfsd4_cb_layout_release(void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	kfree(clr->clr_args);
++	clr->clr_args = NULL;
++	put_layoutrecall(clr);
++}
++
++static const struct rpc_call_ops nfsd4_cb_layout_ops = {
++	.rpc_call_prepare = nfsd4_cb_layout_prepare,
++	.rpc_call_done = nfsd4_cb_layout_done,
++	.rpc_release = nfsd4_cb_layout_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfs4_client *clp = clr->clr_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_LAYOUT],
++		.rpc_cred = callback_cred
++	};
++	int status;
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	clr->clr_args = args;
++	args->args_op = clr;
++	msg.rpc_argp = args;
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_layout_ops, clr);
++out:
++	if (status) {
++		kfree(args);
++		put_layoutrecall(clr);
++	}
++	dprintk("NFSD: nfsd4_cb_layout: status %d\n", status);
++	return status;
++}
++
++static void nfsd4_cb_device_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	nfsd4_cb_prepare_sequence(task, cbnd->nd_client);
++}
++
++static void nfsd4_cb_device_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	struct nfs4_client *clp = cbnd->nd_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	dprintk("%s: clp %p cb_client %p: status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       task->tk_status);
++
++	if (task->tk_status == -EIO) {
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++	}
++}
++
++static void nfsd4_cb_device_release(void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	kfree(cbnd->nd_args);
++	cbnd->nd_args = NULL;
++	kfree(cbnd);
++}
++
++static const struct rpc_call_ops nfsd4_cb_device_ops = {
++	.rpc_call_prepare = nfsd4_cb_device_prepare,
++	.rpc_call_done = nfsd4_cb_device_done,
++	.rpc_release = nfsd4_cb_device_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_notify_device(struct nfs4_notify_device *cbnd)
++{
++	struct nfs4_client *clp = cbnd->nd_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_DEVICE],
++		.rpc_cred = callback_cred
++	};
++	int status = -EIO;
++
++	dprintk("%s: clp %p\n", __func__, clp);
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	args->args_op = cbnd;
++	msg.rpc_argp = args;
++
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_device_ops, cbnd);
++out:
++	if (status)
++		kfree(args);
++	dprintk("%s: status %d\n", __func__, status);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-31 20:42:05.556172071 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-31 20:42:05.556172071 -0400
+@@ -0,0 +1,1679 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *****************************************************************************/
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Globals */
++static u32 current_layoutid = 1;
++
++/*
++ * Currently used for manipulating the layout state.
++ */
++static DEFINE_SPINLOCK(layout_lock);
++
++#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_SMP)
++#  define BUG_ON_UNLOCKED_LAYOUT() BUG_ON(!spin_is_locked(&layout_lock))
++#else
++#  define BUG_ON_UNLOCKED_LAYOUT()
++#endif
++
++/*
++ * Layout state - NFSv4.1 pNFS
++ */
++static struct kmem_cache *pnfs_layout_slab;
++static struct kmem_cache *pnfs_layoutrecall_slab;
++
++/* hash table for nfsd4_pnfs_deviceid.sbid */
++#define SBID_HASH_BITS	8
++#define SBID_HASH_SIZE	(1 << SBID_HASH_BITS)
++#define SBID_HASH_MASK	(SBID_HASH_SIZE - 1)
++
++struct sbid_tracker {
++	u64 id;
++	struct super_block *sb;
++	struct list_head hash;
++};
++
++static u64 current_sbid;
++static struct list_head sbid_hashtbl[SBID_HASH_SIZE];
++
++static inline unsigned long
++sbid_hashval(struct super_block *sb)
++{
++	return hash_ptr(sb, SBID_HASH_BITS);
++}
++
++static inline struct sbid_tracker *
++alloc_sbid(void)
++{
++	return kmalloc(sizeof(struct sbid_tracker), GFP_KERNEL);
++}
++
++static void
++destroy_sbid(struct sbid_tracker *sbid)
++{
++	spin_lock(&layout_lock);
++	list_del(&sbid->hash);
++	spin_unlock(&layout_lock);
++	kfree(sbid);
++}
++
++void
++nfsd4_free_pnfs_slabs(void)
++{
++	int i;
++	struct sbid_tracker *sbid;
++
++	nfsd4_free_slab(&pnfs_layout_slab);
++	nfsd4_free_slab(&pnfs_layoutrecall_slab);
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		while (!list_empty(&sbid_hashtbl[i])) {
++			sbid = list_first_entry(&sbid_hashtbl[i],
++						struct sbid_tracker,
++						hash);
++			destroy_sbid(sbid);
++		}
++	}
++}
++
++int
++nfsd4_init_pnfs_slabs(void)
++{
++	int i;
++
++	pnfs_layout_slab = kmem_cache_create("pnfs_layouts",
++			sizeof(struct nfs4_layout), 0, 0, NULL);
++	if (pnfs_layout_slab == NULL)
++		return -ENOMEM;
++	pnfs_layoutrecall_slab = kmem_cache_create("pnfs_layoutrecalls",
++			sizeof(struct nfs4_layoutrecall), 0, 0, NULL);
++	if (pnfs_layoutrecall_slab == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		INIT_LIST_HEAD(&sbid_hashtbl[i]);
++	}
++
++	return 0;
++}
++
++/* XXX: Need to implement the notify types and track which
++ * clients have which devices. */
++void pnfs_set_device_notify(clientid_t *clid, unsigned int types)
++{
++	struct nfs4_client *clp;
++	dprintk("%s: -->\n", __func__);
++
++	nfs4_lock_state();
++	/* Indicate that client has a device so we can only notify
++	 * the correct clients */
++	clp = find_confirmed_client(clid);
++	if (clp) {
++		atomic_inc(&clp->cl_deviceref);
++		dprintk("%s: Incr device count (clnt %p) to %d\n",
++			__func__, clp, atomic_read(&clp->cl_deviceref));
++	}
++	nfs4_unlock_state();
++}
++
++/* Clear notifications for this client
++ * XXX: Do we need to loop through a clean up all
++ *      krefs when nfsd cleans up the client? */
++void pnfs_clear_device_notify(struct nfs4_client *clp)
++{
++	atomic_dec(&clp->cl_deviceref);
++	dprintk("%s: Decr device count (clnt %p) to %d\n",
++		__func__, clp, atomic_read(&clp->cl_deviceref));
++}
++
++static struct nfs4_layout_state *
++alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
++			stateid_t *stateid)
++{
++	struct nfs4_layout_state *new;
++
++	/* FIXME: use a kmem_cache */
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return new;
++	get_nfs4_file(fp);
++	INIT_LIST_HEAD(&new->ls_perfile);
++	INIT_LIST_HEAD(&new->ls_layouts);
++	kref_init(&new->ls_ref);
++	new->ls_client = clp;
++	new->ls_file = fp;
++	new->ls_stateid.si_boot = stateid->si_boot;
++	new->ls_stateid.si_stateownerid = 0; /* identifies layout stateid */
++	new->ls_stateid.si_generation = 1;
++	spin_lock(&layout_lock);
++	new->ls_stateid.si_fileid = current_layoutid++;
++	list_add(&new->ls_perfile, &fp->fi_layout_states);
++	spin_unlock(&layout_lock);
++	return new;
++}
++
++static inline void
++get_layout_state(struct nfs4_layout_state *ls)
++{
++	kref_get(&ls->ls_ref);
++}
++
++static void
++destroy_layout_state_common(struct nfs4_layout_state *ls)
++{
++	struct nfs4_file *fp = ls->ls_file;
++
++	dprintk("pNFS %s: ls %p fp %p clp %p\n", __func__, ls, fp,
++		ls->ls_client);
++	BUG_ON(!list_empty(&ls->ls_layouts));
++	kfree(ls);
++	put_nfs4_file(fp);
++}
++
++static void
++destroy_layout_state(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	spin_lock(&layout_lock);
++	list_del(&ls->ls_perfile);
++	spin_unlock(&layout_lock);
++	destroy_layout_state_common(ls);
++}
++
++static void
++destroy_layout_state_locked(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	list_del(&ls->ls_perfile);
++	destroy_layout_state_common(ls);
++}
++
++static inline void
++put_layout_state(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state);
++}
++
++static inline void
++put_layout_state_locked(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state_locked);
++}
++
++/*
++ * Search the fp->fi_layout_state list for a layout state with the clientid.
++ * If not found, then this is a 'first open/delegation/lock stateid' from
++ * the client for this file.
++ * Called under the layout_lock.
++ */
++static struct nfs4_layout_state *
++find_get_layout_state(struct nfs4_client *clp, struct nfs4_file *fp)
++{
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_for_each_entry(ls, &fp->fi_layout_states, ls_perfile) {
++		if (ls->ls_client == clp) {
++			dprintk("pNFS %s: before GET ls %p ls_ref %d\n",
++				__func__, ls,
++				atomic_read(&ls->ls_ref.refcount));
++			get_layout_state(ls);
++			return ls;
++		}
++	}
++	return NULL;
++}
++
++static __be32
++verify_stateid(struct nfs4_file *fp, stateid_t *stateid)
++{
++	struct nfs4_stateid *local = NULL;
++	struct nfs4_delegation *temp = NULL;
++
++	/* check if open or lock stateid */
++	local = find_stateid(stateid, RD_STATE);
++	if (local)
++		return 0;
++	temp = find_delegation_stateid(fp->fi_inode, stateid);
++	if (temp)
++		return 0;
++	return nfserr_bad_stateid;
++}
++
++/*
++ * nfs4_preocess_layout_stateid ()
++ *
++ * We have looked up the nfs4_file corresponding to the current_fh, and
++ * confirmed the clientid. Pull the few tests from nfs4_preprocess_stateid_op()
++ * that make sense with a layout stateid.
++ *
++ * Called with the state_lock held
++ * Returns zero and stateid is updated, or error.
++ *
++ * Note: the struct nfs4_layout_state pointer is only set by layoutget.
++ */
++static __be32
++nfs4_process_layout_stateid(struct nfs4_client *clp, struct nfs4_file *fp,
++			    stateid_t *stateid, struct nfs4_layout_state **lsp)
++{
++	struct nfs4_layout_state *ls = NULL;
++	__be32 status = 0;
++
++	dprintk("--> %s clp %p fp %p \n", __func__, clp, fp);
++
++	dprintk("%s: operation stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		goto out;
++
++	/* Is this the first use of this layout ? */
++	spin_lock(&layout_lock);
++	ls = find_get_layout_state(clp, fp);
++	spin_unlock(&layout_lock);
++	if (!ls) {
++		/* Only alloc layout state on layoutget (which sets lsp). */
++		if (!lsp) {
++			dprintk("%s ERROR: Not layoutget & no layout stateid\n",
++				__func__);
++			status = nfserr_bad_stateid;
++			goto out;
++		}
++		dprintk("%s Initial stateid for layout: file %p client %p\n",
++			__func__, fp, clp);
++
++		/* verify input stateid */
++		status = verify_stateid(fp, stateid);
++		if (status) {
++			dprintk("%s ERROR: invalid open/deleg/lock stateid\n",
++				__func__);
++			goto out;
++		}
++		ls = alloc_init_layout_state(clp, fp, stateid);
++		if (!ls) {
++			dprintk("%s pNFS ERROR: no memory for layout state\n",
++				__func__);
++			status = nfserr_resource;
++			goto out;
++		}
++	} else {
++		dprintk("%s Not initial stateid. Layout state %p file %p\n",
++			__func__, ls, fp);
++
++		/* BAD STATEID */
++		status = nfserr_bad_stateid;
++		if (memcmp(&ls->ls_stateid.si_opaque, &stateid->si_opaque,
++			sizeof(stateid_opaque_t)) != 0) {
++
++			/* if a LAYOUTGET operation and stateid is a valid
++			 * open/deleg/lock stateid, accept it as a parallel
++			 * initial layout stateid
++			 */
++			if (lsp && ((verify_stateid(fp, stateid)) == 0)) {
++				dprintk("%s parallel initial layout state\n",
++					__func__);
++				goto update;
++			}
++
++			dprintk("%s ERROR bad opaque in stateid 1\n", __func__);
++			goto out_put;
++		}
++
++		/* stateid is a valid layout stateid for this file. */
++		if (stateid->si_generation > ls->ls_stateid.si_generation) {
++			dprintk("%s bad stateid 1\n", __func__);
++			goto out_put;
++		}
++update:
++		update_stateid(&ls->ls_stateid);
++		dprintk("%s Updated ls_stateid to %d on layoutstate %p\n",
++			__func__, ls->ls_stateid.si_generation, ls);
++	}
++	status = 0;
++	/* Set the stateid to be encoded */
++	memcpy(stateid, &ls->ls_stateid, sizeof(stateid_t));
++
++	/* Return the layout state if requested */
++	if (lsp) {
++		get_layout_state(ls);
++		*lsp = ls;
++	}
++	dprintk("%s: layout stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(&ls->ls_stateid));
++out_put:
++	dprintk("%s PUT LO STATE:\n", __func__);
++	put_layout_state(ls);
++out:
++	dprintk("<-- %s status %d\n", __func__, htonl(status));
++
++	return status;
++}
++
++static inline struct nfs4_layout *
++alloc_layout(void)
++{
++	return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
++}
++
++static inline void
++free_layout(struct nfs4_layout *lp)
++{
++	kmem_cache_free(pnfs_layout_slab, lp);
++}
++
++static void
++init_layout(struct nfs4_layout_state *ls,
++	    struct nfs4_layout *lp,
++	    struct nfs4_file *fp,
++	    struct nfs4_client *clp,
++	    struct svc_fh *current_fh,
++	    struct nfsd4_layout_seg *seg)
++{
++	dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__,
++		ls, lp, clp, fp, fp->fi_inode);
++
++	get_nfs4_file(fp);
++	lp->lo_client = clp;
++	lp->lo_file = fp;
++	get_layout_state(ls);
++	lp->lo_state = ls;
++	memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
++	spin_lock(&layout_lock);
++	list_add_tail(&lp->lo_perstate, &ls->ls_layouts);
++	list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
++	list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
++	spin_unlock(&layout_lock);
++	dprintk("pNFS %s end\n", __func__);
++}
++
++static void
++dequeue_layout(struct nfs4_layout *lp)
++{
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del(&lp->lo_perclnt);
++	list_del(&lp->lo_perfile);
++	list_del(&lp->lo_perstate);
++}
++
++static void
++destroy_layout(struct nfs4_layout *lp)
++{
++	struct nfs4_client *clp;
++	struct nfs4_file *fp;
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	clp = lp->lo_client;
++	fp = lp->lo_file;
++	ls = lp->lo_state;
++	dprintk("pNFS %s: lp %p clp %p fp %p ino %p ls_layouts empty %d\n",
++		__func__, lp, clp, fp, fp->fi_inode,
++		list_empty(&ls->ls_layouts));
++
++	kmem_cache_free(pnfs_layout_slab, lp);
++	/* release references taken by init_layout */
++	put_layout_state_locked(ls);
++	put_nfs4_file(fp);
++}
++
++void fs_layout_return(struct super_block *sb, struct inode *ino,
++		      struct nfsd4_pnfs_layoutreturn *lrp, int flags,
++		      void *recall_cookie)
++{
++	int ret;
++
++	if (unlikely(!sb->s_pnfs_op->layout_return))
++		return;
++
++	lrp->lr_flags = flags;
++	lrp->args.lr_cookie = recall_cookie;
++
++	if (!ino) /* FSID or ALL */
++		ino = sb->s_root->d_inode;
++
++	ret = sb->s_pnfs_op->layout_return(ino, &lrp->args);
++	dprintk("%s: inode %lu iomode=%d offset=0x%llx length=0x%llx "
++		"cookie = %p flags 0x%x status=%d\n",
++		__func__, ino->i_ino, lrp->args.lr_seg.iomode,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length,
++		recall_cookie, flags, ret);
++}
++
++static u64
++alloc_init_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	struct sbid_tracker *new = alloc_sbid();
++	unsigned long hash_idx = sbid_hashval(sb);
++	u64 id = 0;
++
++	if (likely(new)) {
++		spin_lock(&layout_lock);
++		id = ++current_sbid;
++		new->id = (id << SBID_HASH_BITS) | (hash_idx & SBID_HASH_MASK);
++		id = new->id;
++		BUG_ON(id == 0);
++		new->sb = sb;
++
++		list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash)
++			if (sbid->sb == sb) {
++				kfree(new);
++				id = sbid->id;
++				spin_unlock(&layout_lock);
++				return id;
++			}
++		list_add(&new->hash, &sbid_hashtbl[hash_idx]);
++		spin_unlock(&layout_lock);
++	}
++	return id;
++}
++
++struct super_block *
++find_sbid_id(u64 id)
++{
++	struct sbid_tracker *sbid;
++	struct super_block *sb = NULL;
++	unsigned long hash_idx = id & SBID_HASH_MASK;
++	int pos = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->id != id)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		sb = sbid->sb;
++		break;
++	}
++	spin_unlock(&layout_lock);
++	return sb;
++}
++
++u64
++find_create_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	unsigned long hash_idx = sbid_hashval(sb);
++	int pos = 0;
++	u64 id = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->sb != sb)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		id = sbid->id;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	if (!id)
++		id = alloc_init_sbid(sb);
++
++	return id;
++}
++
++/*
++ * Create a layoutrecall structure
++ * An optional layoutrecall can be cloned (except for the layoutrecall lists)
++ */
++static struct nfs4_layoutrecall *
++alloc_init_layoutrecall(struct nfsd4_pnfs_cb_layout *cbl,
++			struct nfs4_client *clp,
++			struct nfs4_file *lrfile)
++{
++	struct nfs4_layoutrecall *clr;
++
++	dprintk("NFSD %s\n", __func__);
++	clr = kmem_cache_alloc(pnfs_layoutrecall_slab, GFP_KERNEL);
++	if (clr == NULL)
++		return clr;
++
++	dprintk("NFSD %s -->\n", __func__);
++
++	memset(clr, 0, sizeof(*clr));
++	if (lrfile)
++		get_nfs4_file(lrfile);
++	clr->clr_client = clp;
++	clr->clr_file = lrfile;
++	clr->cb = *cbl;
++
++	kref_init(&clr->clr_ref);
++	INIT_LIST_HEAD(&clr->clr_perclnt);
++
++	dprintk("NFSD %s return %p\n", __func__, clr);
++	return clr;
++}
++
++static void
++get_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	kref_get(&clr->clr_ref);
++}
++
++static void
++destroy_layoutrecall(struct kref *kref)
++{
++	struct nfs4_layoutrecall *clr =
++			container_of(kref, struct nfs4_layoutrecall, clr_ref);
++	dprintk("pNFS %s: clr %p fp %p clp %p\n", __func__, clr,
++		clr->clr_file, clr->clr_client);
++	BUG_ON(!list_empty(&clr->clr_perclnt));
++	if (clr->clr_file)
++		put_nfs4_file(clr->clr_file);
++	kmem_cache_free(pnfs_layoutrecall_slab, clr);
++}
++
++int
++put_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	return kref_put(&clr->clr_ref, destroy_layoutrecall);
++}
++
++void *
++layoutrecall_done(struct nfs4_layoutrecall *clr)
++{
++	void *recall_cookie = clr->cb.cbl_cookie;
++	struct nfs4_layoutrecall *parent = clr->parent;
++
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del_init(&clr->clr_perclnt);
++	put_layoutrecall(clr);
++
++	if (parent && !put_layoutrecall(parent))
++		recall_cookie = NULL;
++
++	return recall_cookie;
++}
++
++/*
++ * get_state() and cb_get_state() are
++ */
++void
++release_pnfs_ds_dev_list(struct nfs4_stateid *stp)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	while (!list_empty(&stp->st_pnfs_ds_id)) {
++		ddp = list_entry(stp->st_pnfs_ds_id.next,
++				 struct pnfs_ds_dev_entry, dd_dev_entry);
++		list_del(&ddp->dd_dev_entry);
++		kfree(ddp);
++	}
++}
++
++static int
++nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 dsid)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	ddp = kmalloc(sizeof(*ddp), GFP_KERNEL);
++	if (!ddp)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&ddp->dd_dev_entry);
++	list_add(&ddp->dd_dev_entry, &stp->st_pnfs_ds_id);
++	ddp->dd_dsid = dsid;
++	return 0;
++}
++
++/*
++ * are two octet ranges overlapping?
++ * start1            last1
++ *   |-----------------|
++ *                start2            last2
++ *                  |----------------|
++ */
++static inline int
++lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 last1 = last_byte_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 last2 = last_byte_offset(start2, l2->length);
++	int ret;
++
++	/* if last1 == start2 there's a single byte overlap */
++	ret = (last2 >= start1) && (last1 >= start2);
++	dprintk("%s: l1 %llu:%lld l2 %llu:%lld ret=%d\n", __func__,
++		l1->offset, l1->length, l2->offset, l2->length, ret);
++	return ret;
++}
++
++static inline int
++same_fsid_major(struct nfs4_fsid *fsid, u64 major)
++{
++	return fsid->major == major;
++}
++
++static inline int
++same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
++{
++	return same_fsid_major(fsid, current_fh->fh_export->ex_fsid);
++}
++
++/*
++ * find a layout recall conflicting with the specified layoutget
++ */
++static int
++is_layout_recalled(struct nfs4_client *clp,
++		   struct svc_fh *current_fh,
++		   struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layoutrecall *clr;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (clr, &clp->cl_layoutrecalls, clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != seg->layout_type)
++			continue;
++		if (clr->cb.cbl_recall_type == RETURN_ALL)
++			goto found;
++		if (clr->cb.cbl_recall_type == RETURN_FSID) {
++			if (same_fsid(&clr->cb.cbl_fsid, current_fh))
++				goto found;
++			else
++				continue;
++		}
++		BUG_ON(clr->cb.cbl_recall_type != RETURN_FILE);
++		if (clr->cb.cbl_seg.clientid == seg->clientid &&
++		    lo_seg_overlapping(&clr->cb.cbl_seg, seg))
++			goto found;
++	}
++	spin_unlock(&layout_lock);
++	return 0;
++found:
++	spin_unlock(&layout_lock);
++	return 1;
++}
++
++/*
++ * are two octet ranges overlapping or adjacent?
++ */
++static inline int
++lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	/* is end1 == start2 ranges are adjacent */
++	return (end2 >= start1) && (end1 >= start2);
++}
++
++static void
++extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lg_start = lg->offset;
++	u64 lg_end = end_offset(lg_start, lg->length);
++
++	/* lo already covers lg? */
++	if (lo_start <= lg_start && lg_end <= lo_end)
++		return;
++
++	/* extend start offset */
++	if (lo_start > lg_start)
++		lo_start = lg_start;
++
++	/* extend end offset */
++	if (lo_end < lg_end)
++		lo_end = lg_end;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ?
++		      lo_end : lo_end - lo_start;
++}
++
++static struct nfs4_layout *
++merge_layout(struct nfs4_file *fp,
++	     struct nfs4_client *clp,
++	     struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layout *lp = NULL;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
++		if (lp->lo_seg.layout_type == seg->layout_type &&
++		    lp->lo_seg.clientid == seg->clientid &&
++		    lp->lo_seg.iomode == seg->iomode &&
++		    lo_seg_mergeable(&lp->lo_seg, seg)) {
++			extend_layout(&lp->lo_seg, seg);
++			break;
++		}
++	spin_unlock(&layout_lock);
++
++	return lp;
++}
++
++__be32
++nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
++		     struct exp_xdr_stream *xdr)
++{
++	u32 status;
++	__be32 nfserr;
++	struct inode *ino = lgp->lg_fhp->fh_dentry->d_inode;
++	struct super_block *sb = ino->i_sb;
++	int can_merge;
++	struct nfs4_file *fp;
++	struct nfs4_client *clp;
++	struct nfs4_layout *lp = NULL;
++	struct nfs4_layout_state *ls = NULL;
++	struct nfsd4_pnfs_layoutget_arg args = {
++		.lg_minlength = lgp->lg_minlength,
++		.lg_fh = &lgp->lg_fhp->fh_handle,
++	};
++	struct nfsd4_pnfs_layoutget_res res = {
++		.lg_seg = lgp->lg_seg,
++	};
++
++	dprintk("NFSD: %s Begin\n", __func__);
++
++	args.lg_sbid = find_create_sbid(sb);
++	if (!args.lg_sbid) {
++		nfserr = nfserr_layouttrylater;
++		goto out;
++	}
++
++	can_merge = sb->s_pnfs_op->can_merge_layouts != NULL &&
++		    sb->s_pnfs_op->can_merge_layouts(lgp->lg_seg.layout_type);
++
++	nfs4_lock_state();
++	fp = find_alloc_file(ino, lgp->lg_fhp);
++	clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
++	dprintk("pNFS %s: fp %p clp %p \n", __func__, fp, clp);
++	if (!fp || !clp) {
++		nfserr = nfserr_inval;
++		goto out_unlock;
++	}
++
++	/* Check decoded layout stateid */
++	nfserr = nfs4_process_layout_stateid(clp, fp, &lgp->lg_sid, &ls);
++	if (nfserr)
++		goto out_unlock;
++
++	if (is_layout_recalled(clp, lgp->lg_fhp, &lgp->lg_seg)) {
++		nfserr = nfserr_recallconflict;
++		goto out;
++	}
++
++	/* pre-alloc layout in case we can't merge after we call
++	 * the file system
++	 */
++	lp = alloc_layout();
++	if (!lp) {
++		nfserr = nfserr_layouttrylater;
++		goto out_unlock;
++	}
++
++	dprintk("pNFS %s: pre-export type 0x%x maxcount %Zd "
++		"iomode %u offset %llu length %llu\n",
++		__func__, lgp->lg_seg.layout_type,
++		exp_xdr_qbytes(xdr->end - xdr->p),
++		lgp->lg_seg.iomode, lgp->lg_seg.offset, lgp->lg_seg.length);
++
++	/* FIXME: need to eliminate the use of the state lock */
++	nfs4_unlock_state();
++	status = sb->s_pnfs_op->layout_get(ino, xdr, &args, &res);
++	nfs4_lock_state();
++
++	dprintk("pNFS %s: post-export status %u "
++		"iomode %u offset %llu length %llu\n",
++		__func__, status, res.lg_seg.iomode,
++		res.lg_seg.offset, res.lg_seg.length);
++
++	/*
++	 * The allowable error codes for the layout_get pNFS export
++	 * operations vector function (from the file system) can be
++	 * expanded as needed to include other errors defined for
++	 * the RFC 5561 LAYOUTGET operation.
++	 */
++	switch (status) {
++	case 0:
++		nfserr = NFS4_OK;
++		break;
++	case NFS4ERR_ACCESS:
++	case NFS4ERR_BADIOMODE:
++		/* No support for LAYOUTIOMODE4_RW layouts */
++	case NFS4ERR_BADLAYOUT:
++		/* No layout matching loga_minlength rules */
++	case NFS4ERR_INVAL:
++	case NFS4ERR_IO:
++	case NFS4ERR_LAYOUTTRYLATER:
++	case NFS4ERR_LAYOUTUNAVAILABLE:
++	case NFS4ERR_LOCKED:
++	case NFS4ERR_NOSPC:
++	case NFS4ERR_RECALLCONFLICT:
++	case NFS4ERR_SERVERFAULT:
++	case NFS4ERR_TOOSMALL:
++		/* Requested layout too big for loga_maxcount */
++	case NFS4ERR_WRONG_TYPE:
++		/* Not a regular file */
++		nfserr = cpu_to_be32(status);
++		goto out_freelayout;
++	default:
++		BUG();
++		nfserr = nfserr_serverfault;
++	}
++
++	lgp->lg_seg = res.lg_seg;
++	lgp->lg_roc = res.lg_return_on_close;
++
++	/* SUCCESS!
++	 * Can the new layout be merged into an existing one?
++	 * If so, free unused layout struct
++	 */
++	if (can_merge && merge_layout(fp, clp, &res.lg_seg))
++		goto out_freelayout;
++
++	/* Can't merge, so let's initialize this new layout */
++	init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
++out_unlock:
++	if (ls)
++		put_layout_state(ls);
++	if (fp)
++		put_nfs4_file(fp);
++	nfs4_unlock_state();
++out:
++	dprintk("pNFS %s: lp %p exit nfserr %u\n", __func__, lp,
++		be32_to_cpu(nfserr));
++	return nfserr;
++out_freelayout:
++	free_layout(lp);
++	goto out_unlock;
++}
++
++static void
++trim_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lr)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lr_start = lr->offset;
++	u64 lr_end = end_offset(lr_start, lr->length);
++
++	dprintk("%s:Begin lo %llu:%lld lr %llu:%lld\n", __func__,
++		lo->offset, lo->length, lr->offset, lr->length);
++
++	/* lr fully covers lo? */
++	if (lr_start <= lo_start && lo_end <= lr_end) {
++		lo->length = 0;
++		goto out;
++	}
++
++	/*
++	 * split not supported yet. retain layout segment.
++	 * remains must be returned by the client
++	 * on the final layout return.
++	 */
++	if (lo_start < lr_start && lr_end < lo_end) {
++		dprintk("%s: split not supported\n", __func__);
++		goto out;
++	}
++
++	if (lo_start < lr_start)
++		lo_end = lr_start - 1;
++	else /* lr_end < lo_end */
++		lo_start = lr_end + 1;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ? lo_end : lo_end - lo_start;
++out:
++	dprintk("%s:End lo %llu:%lld\n", __func__, lo->offset, lo->length);
++}
++
++static int
++pnfs_return_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp,
++			 struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	dprintk("%s: clp %p fp %p\n", __func__, clp, fp);
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &fp->fi_layouts, lo_perfile) {
++		dprintk("%s: lp %p client %p,%p lo_type %x,%x iomode %d,%d\n",
++			__func__, lp,
++			lp->lo_client, clp,
++			lp->lo_seg.layout_type, lrp->args.lr_seg.layout_type,
++			lp->lo_seg.iomode, lrp->args.lr_seg.iomode);
++		if (lp->lo_client != clp ||
++		    lp->lo_seg.layout_type != lrp->args.lr_seg.layout_type ||
++		    (lp->lo_seg.iomode != lrp->args.lr_seg.iomode &&
++		     lrp->args.lr_seg.iomode != IOMODE_ANY) ||
++		     !lo_seg_overlapping(&lp->lo_seg, &lrp->args.lr_seg))
++			continue;
++		layouts_found++;
++		trim_layout(&lp->lo_seg, &lrp->args.lr_seg);
++		if (!lp->lo_seg.length) {
++			lrp->lrs_present = 0;
++			dequeue_layout(lp);
++			destroy_layout(lp);
++		}
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++pnfs_return_client_layouts(struct nfs4_client *clp,
++			   struct nfsd4_pnfs_layoutreturn *lrp, u64 ex_fsid)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &clp->cl_layouts, lo_perclnt) {
++		if (lrp->args.lr_seg.layout_type != lp->lo_seg.layout_type ||
++		   (lrp->args.lr_seg.iomode != lp->lo_seg.iomode &&
++		    lrp->args.lr_seg.iomode != IOMODE_ANY))
++			continue;
++
++		if (lrp->args.lr_return_type == RETURN_FSID &&
++		    !same_fsid_major(&lp->lo_file->fi_fsid, ex_fsid))
++			continue;
++
++		layouts_found++;
++		dequeue_layout(lp);
++		destroy_layout(lp);
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++recall_return_perfect_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode ||
++	    clr->cb.cbl_recall_type != lrp->args.lr_return_type)
++		return 0;
++
++	return (clr->cb.cbl_recall_type == RETURN_FILE &&
++		clr->clr_file == fp &&
++		clr->cb.cbl_seg.offset == lrp->args.lr_seg.offset &&
++		clr->cb.cbl_seg.length == lrp->args.lr_seg.length) ||
++
++		(clr->cb.cbl_recall_type == RETURN_FSID &&
++		 same_fsid(&clr->cb.cbl_fsid, current_fh)) ||
++
++		clr->cb.cbl_recall_type == RETURN_ALL;
++}
++
++static int
++recall_return_partial_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	/* iomode matching? */
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode &&
++	    clr->cb.cbl_seg.iomode != IOMODE_ANY &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY)
++		return 0;
++
++	if (clr->cb.cbl_recall_type == RETURN_ALL ||
++	    lrp->args.lr_return_type == RETURN_ALL)
++		return 1;
++
++	/* fsid matches? */
++	if (clr->cb.cbl_recall_type == RETURN_FSID ||
++	    lrp->args.lr_return_type == RETURN_FSID)
++		return same_fsid(&clr->cb.cbl_fsid, current_fh);
++
++	/* file matches, range overlapping? */
++	return clr->clr_file == fp &&
++	       lo_seg_overlapping(&clr->cb.cbl_seg, &lrp->args.lr_seg);
++}
++
++int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
++			    struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status = 0;
++	int layouts_found = 0;
++	struct inode *ino = current_fh->fh_dentry->d_inode;
++	struct nfs4_file *fp = NULL;
++	struct nfs4_client *clp;
++	struct nfs4_layoutrecall *clr, *nextclr;
++	u64 ex_fsid = current_fh->fh_export->ex_fsid;
++	void *recall_cookie = NULL;
++
++	dprintk("NFSD: %s\n", __func__);
++
++	nfs4_lock_state();
++	clp = find_confirmed_client((clientid_t *)&lrp->args.lr_seg.clientid);
++	if (!clp)
++		goto out;
++
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		fp = find_file(ino);
++		if (!fp) {
++			printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
++				"ino %p:%lu\n",
++				__func__, ino, ino ? ino->i_ino : 0L);
++			goto out;
++		}
++
++		/* Check the stateid */
++		dprintk("%s PROCESS LO_STATEID inode %p\n", __func__, ino);
++		status = nfs4_process_layout_stateid(clp, fp, &lrp->lr_sid,
++						     NULL);
++		if (status)
++			goto out_put_file;
++
++		/* update layouts */
++		layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
++		/* optimize for the all-empty case */
++		if (list_empty(&fp->fi_layouts))
++			recall_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++	} else {
++		layouts_found = pnfs_return_client_layouts(clp, lrp, ex_fsid);
++	}
++
++	dprintk("pNFS %s: clp %p fp %p layout_type 0x%x iomode %d "
++		"return_type %d fsid 0x%llx offset %llu length %llu: "
++		"layouts_found %d\n",
++		__func__, clp, fp, lrp->args.lr_seg.layout_type,
++		lrp->args.lr_seg.iomode, lrp->args.lr_return_type,
++		ex_fsid,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);
++
++	/* update layoutrecalls
++	 * note: for RETURN_{FSID,ALL}, fp may be NULL
++	 */
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (clr, nextclr, &clp->cl_layoutrecalls,
++				  clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != lrp->args.lr_seg.layout_type)
++			continue;
++
++		if (recall_return_perfect_match(clr, lrp, fp, current_fh))
++			recall_cookie = layoutrecall_done(clr);
++		else if (layouts_found &&
++			 recall_return_partial_match(clr, lrp, fp, current_fh))
++			clr->clr_time = CURRENT_TIME;
++	}
++	spin_unlock(&layout_lock);
++
++out_put_file:
++	if (fp)
++		put_nfs4_file(fp);
++out:
++	nfs4_unlock_state();
++
++	/* call exported filesystem layout_return (ignore return-code) */
++	fs_layout_return(sb, ino, lrp, 0, recall_cookie);
++
++	dprintk("pNFS %s: exit status %d \n", __func__, status);
++	return status;
++}
++
++/*
++ * PNFS Metadata server export operations callback for get_state
++ *
++ * called by the cluster fs when it receives a get_state() from a data
++ * server.
++ * returns status, or pnfs_get_state* with pnfs_get_state->status set.
++ *
++ */
++int
++nfs4_pnfs_cb_get_state(struct super_block *sb, struct pnfs_get_state *arg)
++{
++	struct nfs4_stateid *stp;
++	int flags = LOCK_STATE | OPEN_STATE; /* search both hash tables */
++	int status = -EINVAL;
++	struct inode *ino;
++	struct nfs4_delegation *dl;
++	stateid_t *stid = (stateid_t *)&arg->stid;
++
++	dprintk("NFSD: %s sid=" STATEID_FMT " ino %llu\n", __func__,
++		STATEID_VAL(stid), arg->ino);
++
++	nfs4_lock_state();
++	stp = find_stateid(stid, flags);
++	if (!stp) {
++		ino = iget_locked(sb, arg->ino);
++		if (!ino)
++			goto out;
++
++		if (ino->i_state & I_NEW) {
++			iget_failed(ino);
++			goto out;
++		}
++
++		dl = find_delegation_stateid(ino, stid);
++		if (dl)
++			status = 0;
++
++		iput(ino);
++	} else {
++		/* XXX ANDROS: marc removed nfs4_check_fh - how come? */
++
++		/* arg->devid is the Data server id, set by the cluster fs */
++		status = nfs4_add_pnfs_ds_dev(stp, arg->dsid);
++		if (status)
++			goto out;
++
++		arg->access = stp->st_access_bmap;
++		*(clientid_t *)&arg->clid =
++			stp->st_stateowner->so_client->cl_clientid;
++	}
++out:
++	nfs4_unlock_state();
++	return status;
++}
++
++static int
++cl_has_file_layout(struct nfs4_client *clp, struct nfs4_file *lrfile,
++		   stateid_t *lsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++	struct nfs4_layout_state *ls;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt) {
++		if (lp->lo_file != lrfile)
++			continue;
++
++		ls = find_get_layout_state(clp, lrfile);
++		if (!ls) {
++			/* This shouldn't happen as the file should have a
++			 * layout stateid if it has a layout.
++			 */
++			printk(KERN_ERR "%s: file %p has no layout stateid\n",
++				__func__, lrfile);
++			WARN_ON(1);
++			break;
++		}
++		update_stateid(&ls->ls_stateid);
++		memcpy(lsid, &ls->ls_stateid, sizeof(stateid_t));
++		put_layout_state_locked(ls);
++		found = 1;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	return found;
++}
++
++static int
++cl_has_fsid_layout(struct nfs4_client *clp, struct nfs4_fsid *fsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++
++	/* note: minor version unused */
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt)
++		if (lp->lo_file->fi_fsid.major == fsid->major) {
++			found = 1;
++			break;
++		}
++	spin_unlock(&layout_lock);
++	return found;
++}
++
++static int
++cl_has_any_layout(struct nfs4_client *clp)
++{
++	return !list_empty(&clp->cl_layouts);
++}
++
++static int
++cl_has_layout(struct nfs4_client *clp, struct nfsd4_pnfs_cb_layout *cbl,
++	      struct nfs4_file *lrfile, stateid_t *lsid)
++{
++	switch (cbl->cbl_recall_type) {
++	case RETURN_FILE:
++		return cl_has_file_layout(clp, lrfile, lsid);
++	case RETURN_FSID:
++		return cl_has_fsid_layout(clp, &cbl->cbl_fsid);
++	default:
++		return cl_has_any_layout(clp);
++	}
++}
++
++/*
++ * Called without the layout_lock.
++ */
++void
++nomatching_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfsd4_pnfs_layoutreturn lr = {
++		.args.lr_return_type = clr->cb.cbl_recall_type,
++		.args.lr_seg = clr->cb.cbl_seg,
++	};
++	struct inode *inode;
++	void *recall_cookie;
++
++	if (clr->clr_file) {
++		inode = igrab(clr->clr_file->fi_inode);
++		if (WARN_ON(!inode))
++			return;
++	} else {
++		inode = NULL;
++	}
++
++	dprintk("%s: clp %p fp %p: simulating layout_return\n", __func__,
++		clr->clr_client, clr->clr_file);
++
++	if (clr->cb.cbl_recall_type == RETURN_FILE)
++		pnfs_return_file_layouts(clr->clr_client, clr->clr_file, &lr);
++	else
++		pnfs_return_client_layouts(clr->clr_client, &lr,
++					   clr->cb.cbl_fsid.major);
++
++	spin_lock(&layout_lock);
++	recall_cookie = layoutrecall_done(clr);
++	spin_unlock(&layout_lock);
++
++	fs_layout_return(clr->clr_sb, inode, &lr, LR_FLAG_INTERN,
++			 recall_cookie);
++	iput(inode);
++}
++
++void pnfs_expire_client(struct nfs4_client *clp)
++{
++	for (;;) {
++		struct nfs4_layoutrecall *lrp = NULL;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layoutrecalls)) {
++			lrp = list_entry(clp->cl_layoutrecalls.next,
++					 struct nfs4_layoutrecall, clr_perclnt);
++			get_layoutrecall(lrp);
++		}
++		spin_unlock(&layout_lock);
++		if (!lrp)
++			break;
++
++		dprintk("%s: lrp %p, fp %p\n", __func__, lrp, lrp->clr_file);
++		BUG_ON(lrp->clr_client != clp);
++		nomatching_layout(lrp);
++		put_layoutrecall(lrp);
++	}
++
++	for (;;) {
++		struct nfs4_layout *lp = NULL;
++		struct inode *inode = NULL;
++		struct nfsd4_pnfs_layoutreturn lr;
++		bool empty = false;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layouts)) {
++			lp = list_entry(clp->cl_layouts.next,
++					struct nfs4_layout, lo_perclnt);
++			inode = igrab(lp->lo_file->fi_inode);
++			memset(&lr, 0, sizeof(lr));
++			lr.args.lr_return_type = RETURN_FILE;
++			lr.args.lr_seg = lp->lo_seg;
++			empty = list_empty(&lp->lo_file->fi_layouts);
++			BUG_ON(lp->lo_client != clp);
++			dequeue_layout(lp);
++			destroy_layout(lp); /* do not access lp after this */
++		}
++		spin_unlock(&layout_lock);
++		if (!lp)
++			break;
++
++		if (WARN_ON(!inode))
++			break;
++
++		dprintk("%s: inode %lu lp %p clp %p\n", __func__, inode->i_ino,
++			lp, clp);
++
++		fs_layout_return(inode->i_sb, inode, &lr, LR_FLAG_EXPIRE,
++				 empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL);
++		iput(inode);
++	}
++}
++
++struct create_recall_list_arg {
++	struct nfsd4_pnfs_cb_layout *cbl;
++	struct nfs4_file *lrfile;
++	struct list_head *todolist;
++	unsigned todo_count;
++};
++
++/*
++ * look for matching layout for the given client
++ * and add a pending layout recall to the todo list
++ * if found any.
++ * returns:
++ *   0 if layouts found or negative error.
++ */
++static int
++lo_recall_per_client(struct nfs4_client *clp, void *p)
++{
++	stateid_t lsid;
++	struct nfs4_layoutrecall *pending;
++	struct create_recall_list_arg *arg = p;
++
++	memset(&lsid, 0, sizeof(lsid));
++	if (!cl_has_layout(clp, arg->cbl, arg->lrfile, &lsid))
++		return 0;
++
++	/* Matching put done by layoutreturn */
++	pending = alloc_init_layoutrecall(arg->cbl, clp, arg->lrfile);
++	/* out of memory, drain todo queue */
++	if (!pending)
++		return -ENOMEM;
++
++	*(stateid_t *)&pending->cb.cbl_sid = lsid;
++	list_add(&pending->clr_perclnt, arg->todolist);
++	arg->todo_count++;
++	return 0;
++}
++
++/* Create a layoutrecall structure for each client based on the
++ * original structure. */
++int
++create_layout_recall_list(struct list_head *todolist, unsigned *todo_len,
++			  struct nfsd4_pnfs_cb_layout *cbl,
++			  struct nfs4_file *lrfile)
++{
++	struct nfs4_client *clp;
++	struct create_recall_list_arg arg = {
++		.cbl = cbl,
++		.lrfile = lrfile,
++		.todolist = todolist,
++	};
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* If client given by fs, just do single client */
++	if (cbl->cbl_seg.clientid) {
++		clp = find_confirmed_client(
++				(clientid_t *)&cbl->cbl_seg.clientid);
++		if (!clp) {
++			status = -ENOENT;
++			dprintk("%s: clientid %llx not found\n", __func__,
++				(unsigned long long)cbl->cbl_seg.clientid);
++			goto out;
++		}
++
++		status = lo_recall_per_client(clp, &arg);
++	} else {
++		/* Check all clients for layout matches */
++		status = filter_confirmed_clients(lo_recall_per_client, &arg);
++	}
++
++out:
++	*todo_len = arg.todo_count;
++	dprintk("%s: <-- list len %u status %d\n", __func__, *todo_len, status);
++	return status;
++}
++
++/*
++ * Recall layouts asynchronously
++ * Called with state lock.
++ */
++static int
++spawn_layout_recall(struct super_block *sb, struct list_head *todolist,
++		    unsigned todo_len)
++{
++	struct nfs4_layoutrecall *pending;
++	struct nfs4_layoutrecall *parent = NULL;
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	if (todo_len > 1) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++
++		parent = alloc_init_layoutrecall(&pending->cb, NULL,
++						 pending->clr_file);
++		if (unlikely(!parent)) {
++			/* We want forward progress. If parent cannot be
++			 * allocated take the first one as parent but don't
++			 * execute it.  Caller must check for -EAGAIN, if so
++			 * When the partial recalls return,
++			 * nfsd_layout_recall_cb should be called again.
++			 */
++			list_del_init(&pending->clr_perclnt);
++			if (todo_len > 2) {
++				parent = pending;
++			} else {
++				parent = NULL;
++				put_layoutrecall(pending);
++			}
++			--todo_len;
++				status = -ENOMEM;
++		}
++	}
++
++	while (!list_empty(todolist)) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++		list_del_init(&pending->clr_perclnt);
++		dprintk("%s: clp %p cb_client %p fp %p\n", __func__,
++			pending->clr_client,
++			pending->clr_client->cl_cb_client,
++			pending->clr_file);
++		if (unlikely(!pending->clr_client->cl_cb_client)) {
++			printk(KERN_INFO
++				"%s: clientid %08x/%08x has no callback path\n",
++				__func__,
++				pending->clr_client->cl_clientid.cl_boot,
++				pending->clr_client->cl_clientid.cl_id);
++			put_layoutrecall(pending);
++			continue;
++		}
++
++		pending->clr_time = CURRENT_TIME;
++		pending->clr_sb = sb;
++		if (parent) {
++			/* If we created a parent its initial ref count is 1.
++			 * We will need to de-ref it eventually. So we just
++			 * don't increment on behalf of the last one.
++			 */
++			if (todo_len != 1)
++				get_layoutrecall(parent);
++		}
++		pending->parent = parent;
++		get_layoutrecall(pending);
++		/* Add to list so corresponding layoutreturn can find req */
++		list_add(&pending->clr_perclnt,
++			 &pending->clr_client->cl_layoutrecalls);
++
++		nfsd4_cb_layout(pending);
++		--todo_len;
++	}
++
++	return status;
++}
++
++/*
++ * Spawn a thread to perform a recall layout
++ *
++ */
++int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,
++			  struct nfsd4_pnfs_cb_layout *cbl)
++{
++	int status;
++	struct nfs4_file *lrfile = NULL;
++	struct list_head todolist;
++	unsigned todo_len = 0;
++
++	dprintk("NFSD nfsd_layout_recall_cb: inode %p cbl %p\n", inode, cbl);
++	BUG_ON(!cbl);
++	BUG_ON(cbl->cbl_recall_type != RETURN_FILE &&
++	       cbl->cbl_recall_type != RETURN_FSID &&
++	       cbl->cbl_recall_type != RETURN_ALL);
++	BUG_ON(cbl->cbl_recall_type == RETURN_FILE && !inode);
++	BUG_ON(cbl->cbl_seg.iomode != IOMODE_READ &&
++	       cbl->cbl_seg.iomode != IOMODE_RW &&
++	       cbl->cbl_seg.iomode != IOMODE_ANY);
++
++	if (nfsd_serv == NULL) {
++		dprintk("NFSD nfsd_layout_recall_cb: nfsd_serv == NULL\n");
++		return -ENOENT;
++	}
++
++	nfs4_lock_state();
++	status = -ENOENT;
++	if (inode) {
++		lrfile = find_file(inode);
++		if (!lrfile) {
++			dprintk("NFSD nfsd_layout_recall_cb: "
++				"nfs4_file not found\n");
++			goto err;
++		}
++		if (cbl->cbl_recall_type == RETURN_FSID)
++			cbl->cbl_fsid = lrfile->fi_fsid;
++	}
++
++	INIT_LIST_HEAD(&todolist);
++
++	/* If no cookie provided by FS, return a default one */
++	if (!cbl->cbl_cookie)
++		cbl->cbl_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++
++	status = create_layout_recall_list(&todolist, &todo_len, cbl, lrfile);
++	if (list_empty(&todolist)) {
++		status = -ENOENT;
++	} else {
++		/* process todolist even if create_layout_recall_list
++		 * returned an error */
++		int status2 = spawn_layout_recall(sb, &todolist, todo_len);
++		if (status2)
++			status = status2;
++	}
++
++err:
++	nfs4_unlock_state();
++	if (lrfile)
++		put_nfs4_file(lrfile);
++	return (todo_len && status) ? -EAGAIN : status;
++}
++
++struct create_device_notify_list_arg {
++	struct list_head *todolist;
++	struct nfsd4_pnfs_cb_dev_list *ndl;
++};
++
++static int
++create_device_notify_per_cl(struct nfs4_client *clp, void *p)
++{
++	struct nfs4_notify_device *cbnd;
++	struct create_device_notify_list_arg *arg = p;
++
++	if (atomic_read(&clp->cl_deviceref) <= 0)
++		return 0;
++
++	cbnd = kmalloc(sizeof(*cbnd), GFP_KERNEL);
++	if (!cbnd)
++		return -ENOMEM;
++
++	cbnd->nd_list = arg->ndl;
++	cbnd->nd_client = clp;
++	list_add(&cbnd->nd_perclnt, arg->todolist);
++	return 0;
++}
++
++/* Create a list of clients to send device notifications. */
++int
++create_device_notify_list(struct list_head *todolist,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	int status;
++	struct create_device_notify_list_arg arg = {
++		.todolist = todolist,
++		.ndl = ndl,
++	};
++
++	nfs4_lock_state();
++	status = filter_confirmed_clients(create_device_notify_per_cl, &arg);
++	nfs4_unlock_state();
++
++	return status;
++}
++
++/*
++ * For each client that a device, send a device notification.
++ * XXX: Need to track which clients have which devices.
++ */
++int nfsd_device_notify_cb(struct super_block *sb,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	struct nfs4_notify_device *cbnd;
++	unsigned int notify_num = 0;
++	int status2, status = 0;
++	struct list_head todolist;
++
++	BUG_ON(!ndl || ndl->cbd_len == 0 || !ndl->cbd_list);
++
++	dprintk("NFSD %s: cbl %p len %u\n", __func__, ndl, ndl->cbd_len);
++
++	if (nfsd_serv == NULL)
++		return -ENOENT;
++
++	INIT_LIST_HEAD(&todolist);
++
++	status = create_device_notify_list(&todolist, ndl);
++
++	while (!list_empty(&todolist)) {
++		cbnd = list_entry(todolist.next, struct nfs4_notify_device,
++				  nd_perclnt);
++		list_del_init(&cbnd->nd_perclnt);
++		status2 = nfsd4_cb_notify_device(cbnd);
++		pnfs_clear_device_notify(cbnd->nd_client);
++		if (status2) {
++			kfree(cbnd);
++			status = status2;
++		}
++		notify_num++;
++	}
++
++	dprintk("NFSD %s: status %d clients %u\n",
++		__func__, status, notify_num);
++	return status;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-31 20:42:05.557222774 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-31 20:42:05.557222774 -0400
+@@ -0,0 +1,461 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++
++#include <linux/nfs4.h>
++#include <linux/nfsd/const.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/sunrpc/clnt.h>
++
++#include "nfsfh.h"
++#include "nfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Just use a linked list. Do not expect more than 32 dlm_device_entries
++ * the first implementation will just use one device per cluster file system
++ */
++
++static LIST_HEAD(dlm_device_list);
++static DEFINE_SPINLOCK(dlm_device_list_lock);
++
++struct dlm_device_entry {
++	struct list_head	dlm_dev_list;
++	char			disk_name[DISK_NAME_LEN];
++	int			num_ds;
++	char			ds_list[NFSD_DLM_DS_LIST_MAX];
++};
++
++static struct dlm_device_entry *
++_nfsd4_find_pnfs_dlm_device(char *disk_name)
++{
++	struct dlm_device_entry *dlm_pdev;
++
++	dprintk("--> %s  disk name %s\n", __func__, disk_name);
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
++		dprintk("%s Look for dlm_pdev %s\n", __func__,
++			dlm_pdev->disk_name);
++		if (!memcmp(dlm_pdev->disk_name, disk_name, strlen(disk_name))) {
++			spin_unlock(&dlm_device_list_lock);
++			return dlm_pdev;
++		}
++	}
++	spin_unlock(&dlm_device_list_lock);
++	return NULL;
++}
++
++static struct dlm_device_entry *
++nfsd4_find_pnfs_dlm_device(struct super_block *sb) {
++	char dname[BDEVNAME_SIZE];
++
++	bdevname(sb->s_bdev, dname);
++	return _nfsd4_find_pnfs_dlm_device(dname);
++}
++
++ssize_t
++nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen)
++{
++	char *pos = buf;
++	ssize_t size = 0;
++	struct dlm_device_entry *dlm_pdev;
++	int ret = -EINVAL;
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list)
++	{
++		int advanced;
++		advanced = snprintf(pos, buflen - size, "%s:%s\n", dlm_pdev->disk_name, dlm_pdev->ds_list);
++		if (advanced >= buflen - size)
++			goto out;
++		size += advanced;
++		pos += advanced;
++	}
++	ret = size;
++
++out:
++	spin_unlock(&dlm_device_list_lock);
++	return ret;
++}
++
++bool nfsd4_validate_pnfs_dlm_device(char *ds_list, int *num_ds)
++{
++	char *start = ds_list;
++
++	*num_ds = 0;
++
++	while (*start) {
++		struct sockaddr_storage tempAddr;
++		int ipLen = strcspn(start, ",");
++
++		if (!rpc_pton(start, ipLen, (struct sockaddr *)&tempAddr, sizeof(tempAddr)))
++			return false;
++		(*num_ds)++;
++		start += ipLen + 1;
++	}
++	return true;
++}
++
++/*
++ * pnfs_dlm_device string format:
++ *     block-device-path:<ds1 ipv4 address>,<ds2 ipv4 address>
++ *
++ * Examples
++ *     /dev/sda:192.168.1.96,192.168.1.97' creates a data server list with
++ *     two data servers for the dlm cluster file system mounted on /dev/sda.
++ *
++ *     /dev/sda:192.168.1.96,192.168.1.100'
++ *     replaces the data server list for /dev/sda
++ *
++ *     Only the deviceid == 1 is supported. Can add device id to
++ *     pnfs_dlm_device string when needed.
++ *
++ *     Only the round robin each data server once stripe index is supported.
++ */
++int
++nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
++
++{
++	struct dlm_device_entry *new, *found;
++	char *bufp = pnfs_dlm_device;
++	char *endp = bufp + strlen(bufp);
++	int err = -ENOMEM;
++
++	dprintk("--> %s len %d\n", __func__, len);
++
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return err;
++
++	err = -EINVAL;
++	/* disk_name */
++	/* FIXME: need to check for valid disk_name. search superblocks?
++	 * check for slash dev slash ?
++	 */
++	len = strcspn(bufp, ":");
++	if (len > DISK_NAME_LEN)
++		goto out_free;
++	memcpy(new->disk_name, bufp, len);
++
++	err = -EINVAL;
++	bufp += len + 1;
++	if (bufp >= endp)
++		goto out_free;
++
++	/* data server list */
++	/* FIXME: need to check for comma separated valid ip format */
++	len = strcspn(bufp, ":");
++	if (len > NFSD_DLM_DS_LIST_MAX)
++		goto out_free;
++	memcpy(new->ds_list, bufp, len);
++
++
++	/*  validate the ips */
++	if (!nfsd4_validate_pnfs_dlm_device(new->ds_list, &(new->num_ds)))
++		goto out_free;
++
++	dprintk("%s disk_name %s num_ds %d ds_list %s\n", __func__,
++		new->disk_name, new->num_ds, new->ds_list);
++
++	found = _nfsd4_find_pnfs_dlm_device(new->disk_name);
++	if (found) {
++		/* FIXME: should compare found->ds_list with new->ds_list
++		 * and if it is different, kick off a CB_NOTIFY change
++		 * deviceid.
++		 */
++		dprintk("%s pnfs_dlm_device %s:%s already in cache "
++			" replace ds_list with new ds_list %s\n", __func__,
++			found->disk_name, found->ds_list, new->ds_list);
++		memset(found->ds_list, 0, DISK_NAME_LEN);
++		memcpy(found->ds_list, new->ds_list, strlen(new->ds_list));
++		found->num_ds = new->num_ds;
++		kfree(new);
++	} else {
++		dprintk("%s Adding pnfs_dlm_device %s:%s\n", __func__,
++				new->disk_name, new->ds_list);
++		spin_lock(&dlm_device_list_lock);
++		list_add(&new->dlm_dev_list, &dlm_device_list);
++		spin_unlock(&dlm_device_list_lock);
++	}
++	dprintk("<-- %s Success\n", __func__);
++	return 0;
++
++out_free:
++	kfree(new);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++void nfsd4_pnfs_dlm_shutdown(void)
++{
++	struct dlm_device_entry *dlm_pdev, *next;
++
++	dprintk("--> %s\n", __func__);
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry_safe (dlm_pdev, next, &dlm_device_list,
++				  dlm_dev_list) {
++		list_del(&dlm_pdev->dlm_dev_list);
++		kfree(dlm_pdev);
++	}
++	spin_unlock(&dlm_device_list_lock);
++}
++
++static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
++				     u32 layout_type,
++				     struct nfsd4_pnfs_dev_iter_res *res)
++{
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		printk(KERN_ERR "%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return -ENOTSUPP;
++	}
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++	return 0;
++}
++
++static int nfsd4_pnfs_dlm_getdevinfo(struct super_block *sb,
++				     struct exp_xdr_stream *xdr,
++				     u32 layout_type,
++				     const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err, len, i = 0;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_devaddr *daddr;
++	struct dlm_device_entry *dlm_pdev;
++	char   *bufp;
++
++	err = -ENOTSUPP;
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		dprintk("%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return err;
++	}
++
++	/* We only hand out a deviceid of 1 in LAYOUTGET, so a GETDEVICEINFO
++	 * with a gdia_device_id != 1 is invalid.
++	 */
++	err = -EINVAL;
++	if (devid->devid != 1) {
++		dprintk("%s: WARNING: didn't receive a deviceid of "
++			"1 (got: 0x%llx)\n", __func__, devid->devid);
++		return err;
++	}
++
++	/*
++	 * If the DS list has not been established, return -EINVAL
++	 */
++	dlm_pdev = nfsd4_find_pnfs_dlm_device(sb);
++	if (!dlm_pdev) {
++		dprintk("%s: DEBUG: disk %s Not Found\n", __func__,
++			sb->s_bdev->bd_disk->disk_name);
++		return err;
++	}
++
++	dprintk("%s: Found disk %s with DS list |%s|\n",
++		__func__, dlm_pdev->disk_name, dlm_pdev->ds_list);
++
++	memset(&fdev, '\0', sizeof(fdev));
++	fdev.fl_device_length = dlm_pdev->num_ds;
++
++	err = -ENOMEM;
++	len = sizeof(*fdev.fl_device_list) * fdev.fl_device_length;
++	fdev.fl_device_list = kzalloc(len, GFP_KERNEL);
++	if (!fdev.fl_device_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a device list "
++			"buffer for %d DSes.\n", __func__, i);
++		fdev.fl_device_length = 0;
++		goto out;
++	}
++
++	/* Set a simple stripe indicie */
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = kzalloc(sizeof(u32) *
++				     fdev.fl_stripeindices_length, GFP_KERNEL);
++
++	if (!fdev.fl_stripeindices_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a stripeindices "
++			"list buffer for %d DSes.\n", __func__, i);
++		goto out;
++	}
++	for (i = 0; i < fdev.fl_stripeindices_length; i++)
++		fdev.fl_stripeindices_list[i] = i;
++
++	/* Transfer the data server list with a single multipath entry */
++	bufp = dlm_pdev->ds_list;
++	for (i = 0; i < fdev.fl_device_length; i++) {
++		daddr = kmalloc(sizeof(*daddr), GFP_KERNEL);
++		if (!daddr) {
++			printk(KERN_ERR "%s: ERROR: unable to kmalloc a device "
++				"addr buffer.\n", __func__);
++			goto out;
++		}
++
++		daddr->r_netid.data = "tcp";
++		daddr->r_netid.len = 3;
++
++		len = strcspn(bufp, ",");
++		daddr->r_addr.data = kmalloc(len + 4, GFP_KERNEL);
++		memcpy(daddr->r_addr.data, bufp, len);
++		/*
++		 * append the port number.  interpreted as two more bytes
++		 * beyond the quad: ".8.1" -> 0x08.0x01 -> 0x0801 = port 2049.
++		 */
++		memcpy(daddr->r_addr.data + len, ".8.1", 4);
++		daddr->r_addr.len = len + 4;
++
++		fdev.fl_device_list[i].fl_multipath_length = 1;
++		fdev.fl_device_list[i].fl_multipath_list = daddr;
++
++		dprintk("%s: encoding DS |%s|\n", __func__, bufp);
++
++		bufp += len + 1;
++	}
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	for (i = 0; i < fdev.fl_device_length; i++)
++		kfree(fdev.fl_device_list[i].fl_multipath_list);
++	kfree(fdev.fl_device_list);
++	kfree(fdev.fl_stripeindices_list);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize >= NFSSVC_MAXBLKSIZE)
++		return blocksize;
++	return NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++}
++
++/*
++ * Look up inode block device in pnfs_dlm_device list.
++ * Hash on the inode->i_ino and number of data servers.
++ */
++static int dlm_ino_hash(struct inode *ino)
++{
++	struct dlm_device_entry *de;
++	u32 hash_mask = 0;
++
++	/* If can't find the inode block device in the pnfs_dlm_deivce list
++	 * then don't hand out a layout
++	 */
++	de = nfsd4_find_pnfs_dlm_device(ino->i_sb);
++	if (!de)
++		return -1;
++	hash_mask = de->num_ds - 1;
++	return ino->i_ino & hash_mask;
++}
++
++static enum nfsstat4 nfsd4_pnfs_dlm_layoutget(struct inode *inode,
++			   struct exp_xdr_stream *xdr,
++			   const struct nfsd4_pnfs_layoutget_arg *args,
++			   struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++	int index;
++	enum nfsstat4 rc = NFS4_OK;
++
++	dprintk("%s: LAYOUT_GET\n", __func__);
++
++	/* DLM exported file systems only support layouts for READ */
++	if (res->lg_seg.iomode == IOMODE_RW)
++		return NFS4ERR_BADIOMODE;
++
++	index = dlm_ino_hash(inode);
++	dprintk("%s first stripe index %d i_ino %lu\n", __func__, index,
++		inode->i_ino);
++	if (index < 0)
++		return NFS4ERR_LAYOUTUNAVAILABLE;
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	/* Always give out whole file layouts */
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	/* Always give out READ ONLY layouts */
++	res->lg_seg.iomode = IOMODE_READ;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = false;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = args->lg_sbid;
++	layout->device_id.devid = 1;                                /*FSFTEMP*/
++	layout->lg_first_stripe_index = index;                      /*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	memcpy(fhp, args->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++/* For use by DLM cluster file systems exported by pNFSD */
++const struct pnfs_export_operations pnfs_dlm_export_ops = {
++	.layout_type = nfsd4_pnfs_dlm_layouttype,
++	.get_device_info = nfsd4_pnfs_dlm_getdevinfo,
++	.get_device_iter = nfsd4_pnfs_dlm_getdeviter,
++	.layout_get = nfsd4_pnfs_dlm_layoutget,
++};
++EXPORT_SYMBOL(pnfs_dlm_export_ops);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-31 20:42:05.558141620 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-31 20:42:05.558141620 -0400
+@@ -0,0 +1,620 @@
++/*
++*  linux/fs/nfsd/nfs4pnfsds.c
++*
++*  Copyright (c) 2005 The Regents of the University of Michigan.
++*  All rights reserved.
++*
++*  Andy Adamson <andros@umich.edu>
++*
++*  Redistribution and use in source and binary forms, with or without
++*  modification, are permitted provided that the following conditions
++*  are met:
++*
++*  1. Redistributions of source code must retain the above copyright
++*     notice, this list of conditions and the following disclaimer.
++*  2. Redistributions in binary form must reproduce the above copyright
++*     notice, this list of conditions and the following disclaimer in the
++*     documentation and/or other materials provided with the distribution.
++*  3. Neither the name of the University nor the names of its
++*     contributors may be used to endorse or promote products derived
++*     from this software without specific prior written permission.
++*
++*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*
++*/
++#if defined(CONFIG_PNFSD)
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++#include <linux/param.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/debug.h>
++#include <linux/nfs4.h>
++#include <linux/exportfs.h>
++#include <linux/sched.h>
++
++#include "nfsd.h"
++#include "pnfsd.h"
++#include "state.h"
++
++/*
++ *******************
++ *   	 PNFS
++ *******************
++ */
++/*
++ * Hash tables for pNFS Data Server state
++ *
++ * mds_nodeid:	list of struct pnfs_mds_id one per Metadata server (MDS) using
++ *		this data server (DS).
++ *
++ * mds_clid_hashtbl[]: uses clientid_hashval(), hash of all clientids obtained
++ *			from any MDS.
++ *
++ * ds_stid_hashtbl[]: uses stateid_hashval(), hash of all stateids obtained
++ *			from any MDS.
++ *
++ */
++/* Hash tables for clientid state */
++#define CLIENT_HASH_BITS                 4
++#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
++#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
++
++#define clientid_hashval(id) \
++	((id) & CLIENT_HASH_MASK)
++
++/* hash table for pnfs_ds_stateid */
++#define STATEID_HASH_BITS              10
++#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
++#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
++
++#define stateid_hashval(owner_id, file_id)  \
++	(((owner_id) + (file_id)) & STATEID_HASH_MASK)
++
++static struct list_head mds_id_tbl;
++static struct list_head mds_clid_hashtbl[CLIENT_HASH_SIZE];
++static struct list_head ds_stid_hashtbl[STATEID_HASH_SIZE];
++
++static inline void put_ds_clientid(struct pnfs_ds_clientid *dcp);
++static inline void put_ds_mdsid(struct pnfs_mds_id *mdp);
++
++/* Mutex for data server state.  Needs to be separate from
++ * mds state mutex since a node can be both mds and ds */
++static DEFINE_MUTEX(ds_mutex);
++static struct thread_info *ds_mutex_owner;
++
++static void
++ds_lock_state(void)
++{
++	mutex_lock(&ds_mutex);
++	ds_mutex_owner = current_thread_info();
++}
++
++static void
++ds_unlock_state(void)
++{
++	BUG_ON(ds_mutex_owner != current_thread_info());
++	ds_mutex_owner = NULL;
++	mutex_unlock(&ds_mutex);
++}
++
++static int
++cmp_clid(const clientid_t *cl1, const clientid_t *cl2)
++{
++	return (cl1->cl_boot == cl2->cl_boot) &&
++	       (cl1->cl_id == cl2->cl_id);
++}
++
++void
++nfs4_pnfs_state_init(void)
++{
++	int i;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&mds_clid_hashtbl[i]);
++
++	for (i = 0; i < STATEID_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&ds_stid_hashtbl[i]);
++
++	INIT_LIST_HEAD(&mds_id_tbl);
++}
++
++static struct pnfs_mds_id *
++find_pnfs_mds_id(u32 mdsid)
++{
++	struct pnfs_mds_id *local = NULL;
++
++	dprintk("pNFSD: %s\n", __func__);
++	list_for_each_entry(local, &mds_id_tbl, di_hash) {
++		if (local->di_mdsid == mdsid)
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_clientid *
++find_pnfs_ds_clientid(const clientid_t *clid)
++{
++	struct pnfs_ds_clientid *local = NULL;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = clientid_hashval(clid->cl_id);
++	list_for_each_entry(local, &mds_clid_hashtbl[hashval], dc_hash) {
++		if (cmp_clid(&local->dc_mdsclid, clid))
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_stateid *
++find_pnfs_ds_stateid(stateid_t *stid)
++{
++	struct pnfs_ds_stateid *local = NULL;
++	u32 st_id = stid->si_stateownerid;
++	u32 f_id = stid->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_for_each_entry(local, &ds_stid_hashtbl[hashval], ds_hash)
++		if ((local->ds_stid.si_stateownerid == st_id) &&
++				(local->ds_stid.si_fileid == f_id) &&
++				(local->ds_stid.si_boot == stid->si_boot)) {
++			stateid_t *sid = &local->ds_stid;
++			dprintk("NFSD: %s <-- %p ds_flags %lx " STATEID_FMT "\n",
++				__func__, local, local->ds_flags,
++				STATEID_VAL(sid));
++			return local;
++		}
++	return NULL;
++}
++
++static void
++release_ds_mdsid(struct kref *kref)
++{
++	struct pnfs_mds_id *mdp =
++		container_of(kref, struct pnfs_mds_id, di_ref);
++	dprintk("pNFSD: %s\n", __func__);
++
++	list_del(&mdp->di_hash);
++	list_del(&mdp->di_mdsclid);
++	kfree(mdp);
++}
++
++static void
++release_ds_clientid(struct kref *kref)
++{
++	struct pnfs_ds_clientid *dcp =
++		container_of(kref, struct pnfs_ds_clientid, dc_ref);
++	struct pnfs_mds_id *mdp;
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(dcp->dc_mdsid);
++	if (mdp)
++		put_ds_mdsid(mdp);
++
++	list_del(&dcp->dc_hash);
++	list_del(&dcp->dc_stateid);
++	list_del(&dcp->dc_permdsid);
++	kfree(dcp);
++}
++
++static void
++release_ds_stateid(struct kref *kref)
++{
++	struct pnfs_ds_stateid *dsp =
++		container_of(kref, struct pnfs_ds_stateid, ds_ref);
++	struct pnfs_ds_clientid *dcp;
++	dprintk("pNFS %s: dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid(&dsp->ds_mdsclid);
++	if (dcp)
++		put_ds_clientid(dcp);
++
++	list_del(&dsp->ds_hash);
++	list_del(&dsp->ds_perclid);
++	kfree(dsp);
++}
++
++static inline void
++put_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_put(&dcp->dc_ref, release_ds_clientid);
++}
++
++static inline void
++get_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_get(&dcp->dc_ref);
++}
++
++static inline void
++put_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_put(&mdp->di_ref, release_ds_mdsid);
++}
++
++static inline void
++get_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_get(&mdp->di_ref);
++}
++
++static inline void
++put_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_put(&dsp->ds_ref, release_ds_stateid);
++}
++
++static inline void
++get_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_get(&dsp->ds_ref);
++}
++
++void
++nfs4_pnfs_state_shutdown(void)
++{
++	struct pnfs_ds_stateid *dsp;
++	int i;
++
++	dprintk("pNFSD %s: -->\n", __func__);
++
++	ds_lock_state();
++	for (i = 0; i < STATEID_HASH_SIZE; i++) {
++		while (!list_empty(&ds_stid_hashtbl[i])) {
++			dsp = list_entry(ds_stid_hashtbl[i].next,
++					 struct pnfs_ds_stateid, ds_hash);
++			put_ds_stateid(dsp);
++		}
++	}
++	ds_unlock_state();
++}
++
++static struct pnfs_mds_id *
++alloc_init_mds_id(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = kmalloc(sizeof(*mdp), GFP_KERNEL);
++	if (!mdp)
++		return NULL;
++	INIT_LIST_HEAD(&mdp->di_hash);
++	INIT_LIST_HEAD(&mdp->di_mdsclid);
++	list_add(&mdp->di_hash, &mds_id_tbl);
++	mdp->di_mdsid = gsp->dsid;
++	mdp->di_mdsboot = 0;
++	kref_init(&mdp->di_ref);
++	return mdp;
++}
++
++static struct pnfs_ds_clientid *
++alloc_init_ds_clientid(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++	struct pnfs_ds_clientid *dcp;
++	clientid_t *clid = (clientid_t *)&gsp->clid;
++	unsigned int hashval = clientid_hashval(clid->cl_id);
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(gsp->dsid);
++	if (!mdp) {
++		mdp = alloc_init_mds_id(gsp);
++		if (!mdp)
++			return NULL;
++	} else {
++		get_ds_mdsid(mdp);
++	}
++
++	dcp = kmalloc(sizeof(*dcp), GFP_KERNEL);
++	if (!dcp)
++		return NULL;
++
++	INIT_LIST_HEAD(&dcp->dc_hash);
++	INIT_LIST_HEAD(&dcp->dc_stateid);
++	INIT_LIST_HEAD(&dcp->dc_permdsid);
++	list_add(&dcp->dc_hash, &mds_clid_hashtbl[hashval]);
++	list_add(&dcp->dc_permdsid, &mdp->di_mdsclid);
++	dcp->dc_mdsclid = *clid;
++	kref_init(&dcp->dc_ref);
++	dcp->dc_mdsid = gsp->dsid;
++	return dcp;
++}
++
++static struct pnfs_ds_stateid *
++alloc_init_ds_stateid(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct pnfs_ds_stateid *dsp;
++	u32 st_id = stidp->si_stateownerid;
++	u32 f_id  = stidp->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	dsp = kmalloc(sizeof(*dsp), GFP_KERNEL);
++	if (!dsp)
++		return dsp;
++
++	INIT_LIST_HEAD(&dsp->ds_hash);
++	INIT_LIST_HEAD(&dsp->ds_perclid);
++	memcpy(&dsp->ds_stid, stidp, sizeof(stateid_t));
++	fh_copy_shallow(&dsp->ds_fh, &cfh->fh_handle);
++	dsp->ds_access = 0;
++	dsp->ds_status = 0;
++	dsp->ds_flags = 0L;
++	kref_init(&dsp->ds_ref);
++	set_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	clear_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	init_waitqueue_head(&dsp->ds_waitq);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_add(&dsp->ds_hash, &ds_stid_hashtbl[hashval]);
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++static int
++update_ds_stateid(struct pnfs_ds_stateid *dsp, struct svc_fh *cfh,
++		  struct pnfs_get_state *gsp)
++{
++	struct pnfs_ds_clientid *dcp;
++	int new = 0;
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid((clientid_t *)&gsp->clid);
++	if (!dcp) {
++		dcp = alloc_init_ds_clientid(gsp);
++		if (!dcp)
++			return 1;
++		new = 1;
++	}
++	if (test_bit(DS_STATEID_NEW, &dsp->ds_flags)) {
++		list_add(&dsp->ds_perclid, &dcp->dc_stateid);
++		if (!new)
++			get_ds_clientid(dcp);
++	}
++
++	memcpy(&dsp->ds_stid, &gsp->stid, sizeof(stateid_t));
++	dsp->ds_access = gsp->access;
++	dsp->ds_status = 0;
++	dsp->ds_verifier[0] = gsp->verifier[0];
++	dsp->ds_verifier[1] = gsp->verifier[1];
++	memcpy(&dsp->ds_mdsclid, &gsp->clid, sizeof(clientid_t));
++	set_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	clear_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	return 0;
++}
++
++int
++nfs4_pnfs_cb_change_state(struct pnfs_get_state *gs)
++{
++	stateid_t *stid = (stateid_t *)&gs->stid;
++	struct pnfs_ds_stateid *dsp;
++
++	dprintk("pNFSD: %s stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stid));
++
++	ds_lock_state();
++	dsp = find_pnfs_ds_stateid(stid);
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	if (dsp)
++		return 0;
++	return -ENOENT;
++}
++
++/* Retrieves and validates stateid.
++ * If stateid exists and its fields match, return it.
++ * If stateid exists but either the generation or
++ * ownerids don't match, check with mds to see if it is valid.
++ * If the stateid doesn't exist, the first thread creates a
++ * invalid *marker* stateid, then checks to see if the
++ * stateid exists on the mds.  If so, it validates the *marker*
++ * stateid and updates its fields.  Subsequent threads that
++ * find the *marker* stateid wait until it is valid or an error
++ * occurs.
++ * Called with ds_state_lock.
++ */
++static struct pnfs_ds_stateid *
++nfsv4_ds_get_state(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct inode *ino = cfh->fh_dentry->d_inode;
++	struct super_block *sb;
++	struct pnfs_ds_stateid *dsp = NULL;
++	struct pnfs_get_state gs = {
++		.access = 0,
++	};
++	int status = 0, waiter = 0;
++
++	dprintk("pNFSD: %s -->\n", __func__);
++
++	dsp = find_pnfs_ds_stateid(stidp);
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags) &&
++	    (stidp->si_generation == dsp->ds_stid.si_generation))
++		goto out_noput;
++
++	sb = ino->i_sb;
++	if (!sb || !sb->s_pnfs_op->get_state)
++		goto out_noput;
++
++	/* Uninitialize current state if it exists yet it doesn't match.
++	 * If it is already invalid, another thread is checking state */
++	if (dsp) {
++		if (!test_and_clear_bit(DS_STATEID_VALID, &dsp->ds_flags))
++			waiter = 1;
++	} else {
++		dsp = alloc_init_ds_stateid(cfh, stidp);
++		if (!dsp)
++			goto out_noput;
++	}
++
++	dprintk("pNFSD: %s Starting loop\n", __func__);
++	get_ds_stateid(dsp);
++	while (!test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		ds_unlock_state();
++
++		/* Another thread is checking the state */
++		if (waiter) {
++			dprintk("pNFSD: %s waiting\n", __func__);
++			wait_event_interruptible_timeout(dsp->ds_waitq,
++				(test_bit(DS_STATEID_VALID, &dsp->ds_flags) ||
++				 test_bit(DS_STATEID_ERROR, &dsp->ds_flags)),
++				 msecs_to_jiffies(1024));
++			dprintk("pNFSD: %s awake\n", __func__);
++			ds_lock_state();
++			if (test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++				goto out;
++
++			continue;
++		}
++
++		/* Validate stateid on mds */
++		dprintk("pNFSD: %s Checking state on MDS\n", __func__);
++		memcpy(&gs.stid, stidp, sizeof(stateid_t));
++		status = sb->s_pnfs_op->get_state(ino, &cfh->fh_handle, &gs);
++		dprintk("pNFSD: %s from MDS status %d\n", __func__, status);
++		ds_lock_state();
++		/* if !status and stateid is valid, update id and mark valid */
++		if (status || update_ds_stateid(dsp, cfh, &gs)) {
++			set_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++			/* remove invalid stateid from list */
++			put_ds_stateid(dsp);
++			wake_up(&dsp->ds_waitq);
++			goto out;
++		}
++
++		wake_up(&dsp->ds_waitq);
++	}
++out:
++	if (dsp)
++		put_ds_stateid(dsp);
++out_noput:
++	if (dsp)
++		dprintk("pNFSD: %s <-- dsp %p ds_flags %lx " STATEID_FMT "\n",
++			__func__, dsp, dsp->ds_flags, STATEID_VAL(&dsp->ds_stid));
++	/* If error, return null */
++	if (dsp && test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++		dsp = NULL;
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++int
++nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *cfh, stateid_t *stateid)
++{
++	struct pnfs_ds_stateid *dsp;
++	int status = 0;
++
++	dprintk("pNFSD: %s --> " STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	/* Must release state lock while verifying stateid on mds */
++	nfs4_unlock_state();
++	ds_lock_state();
++	dsp = nfsv4_ds_get_state(cfh, stateid);
++	if (dsp) {
++		get_ds_stateid(dsp);
++		dprintk("pNFSD: %s Found " STATEID_FMT "\n", __func__,
++			STATEID_VAL(&dsp->ds_stid));
++
++		dprintk("NFSD: %s: dsp %p fh_size %u:%u "
++			"fh [%08x:%08x:%08x:%08x]:[%08x:%08x:%08x:%08x] "
++			"gen %x:%x\n",
++			__func__, dsp,
++			cfh->fh_handle.fh_size, dsp->ds_fh.fh_size,
++			((unsigned *)&cfh->fh_handle.fh_base)[0],
++			((unsigned *)&cfh->fh_handle.fh_base)[1],
++			((unsigned *)&cfh->fh_handle.fh_base)[2],
++			((unsigned *)&cfh->fh_handle.fh_base)[3],
++			((unsigned *)&dsp->ds_fh.fh_base)[0],
++			((unsigned *)&dsp->ds_fh.fh_base)[1],
++			((unsigned *)&dsp->ds_fh.fh_base)[2],
++			((unsigned *)&dsp->ds_fh.fh_base)[3],
++			stateid->si_generation, dsp->ds_stid.si_generation);
++	}
++
++	if (!dsp ||
++	    (cfh->fh_handle.fh_size != dsp->ds_fh.fh_size) ||
++	    (memcmp(&cfh->fh_handle.fh_base, &dsp->ds_fh.fh_base,
++		    dsp->ds_fh.fh_size) != 0) ||
++	    (stateid->si_generation > dsp->ds_stid.si_generation))
++		status = nfserr_bad_stateid;
++	else if (stateid->si_generation < dsp->ds_stid.si_generation)
++		status = nfserr_old_stateid;
++
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++	nfs4_lock_state();
++	dprintk("pNFSD: %s <-- status %d\n", __func__, be32_to_cpu(status));
++	return status;
++}
++
++void
++nfs4_ds_get_verifier(stateid_t *stateid, struct super_block *sb, u32 *p)
++{
++	struct pnfs_ds_stateid *dsp = NULL;
++
++	dprintk("pNFSD: %s --> stid %p\n", __func__, stateid);
++
++	ds_lock_state();
++	if (stateid != NULL) {
++		dsp = find_pnfs_ds_stateid(stateid);
++		if (dsp)
++			get_ds_stateid(dsp);
++	}
++
++	/* XXX: Should we fetch the stateid or wait if some other
++	 * thread is currently retrieving the stateid ? */
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		*p++ = dsp->ds_verifier[0];
++		*p++ = dsp->ds_verifier[1];
++		put_ds_stateid(dsp);
++	} else {
++		/* must be on MDS */
++		ds_unlock_state();
++		sb->s_pnfs_op->get_verifier(sb, p);
++		ds_lock_state();
++		p += 2;
++	}
++	ds_unlock_state();
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-31 20:41:19.198160463 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-31 20:42:05.559129617 -0400
+@@ -34,10 +34,14 @@
+  */
+ #include <linux/file.h>
+ #include <linux/slab.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "cache.h"
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_PROC
+ 
+@@ -372,6 +376,24 @@ nfsd4_open(struct svc_rqst *rqstp, struc
+ 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ 	 */
+ 	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
++#if defined(CONFIG_SPNFS)
++	if (!status && spnfs_enabled()) {
++		struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
++
++		status = spnfs_open(inode, open);
++		if (status) {
++			dprintk(
++			     "nfsd: pNFS could not be enabled for inode: %lu\n",
++			     inode->i_ino);
++			/*
++			 * XXX When there's a failure then need to indicate to
++			 * future ops that no pNFS is available.  Should I save
++			 * the status in the inode?  It's kind of a big hammer.
++			 * But there may be no stripes available?
++			 */
++		}
++	}
++#endif /* CONFIG_SPNFS */
+ out:
+ 	if (open->op_stateowner) {
+ 		nfs4_get_stateowner(open->op_stateowner);
+@@ -454,16 +476,30 @@ nfsd4_access(struct svc_rqst *rqstp, str
+ 			   &access->ac_supported);
+ }
+ 
++static void
++nfsd4_get_verifier(struct super_block *sb, nfs4_verifier *verf)
++{
++	u32 *p = (u32 *)verf->data;
++
++#if defined(CONFIG_PNFSD)
++	if (sb->s_pnfs_op && sb->s_pnfs_op->get_verifier) {
++		nfs4_ds_get_verifier(NULL, sb, p);
++		return;
++	}
++#endif /* CONFIG_PNFSD */
++
++	*p++ = nfssvc_boot.tv_sec;
++	*p++ = nfssvc_boot.tv_usec;
++}
++
+ static __be32
+ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	     struct nfsd4_commit *commit)
+ {
+ 	__be32 status;
+ 
+-	u32 *p = (u32 *)commit->co_verf.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+-
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &commit->co_verf);
+ 	status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+ 			     commit->co_count);
+ 	if (status == nfserr_symlink)
+@@ -816,7 +852,6 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ {
+ 	stateid_t *stateid = &write->wr_stateid;
+ 	struct file *filp = NULL;
+-	u32 *p;
+ 	__be32 status = nfs_ok;
+ 	unsigned long cnt;
+ 
+@@ -838,13 +873,49 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ 
+ 	cnt = write->wr_buflen;
+ 	write->wr_how_written = write->wr_stable_how;
+-	p = (u32 *)write->wr_verifier.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+ 
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &write->wr_verifier);
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(cstate->current_fh.fh_dentry->d_inode, 0)) {
++                status = bl_layoutrecall(cstate->current_fh.fh_dentry->d_inode,
++		    RETURN_FILE, write->wr_offset, write->wr_buflen);
++                if (!status) {
++                        status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++                }
++        } else
++#endif
++		
++	if (spnfs_enabled()) {
++		status = spnfs_write(cstate->current_fh.fh_dentry->d_inode,
++			write->wr_offset, write->wr_buflen, write->wr_vlen,
++			rqstp);
++		if (status == nfs_ok) {
++			/* DMXXX: HACK to get filesize set */
++			/* write one byte at offset+length-1 */
++			struct kvec k[1];
++			char zero = 0;
++			unsigned long cnt = 1;
++
++			k[0].iov_base = (void *)&zero;
++			k[0].iov_len = 1;
++			nfsd_write(rqstp, &cstate->current_fh, filp,
++				   write->wr_offset+write->wr_buflen-1, k, 1,
++				   &cnt, &write->wr_how_written);
++		}
++	} else /* we're not an MDS */
++		status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++#else
+ 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
+ 			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+ 			     &cnt, &write->wr_how_written);
++#endif /* CONFIG_SPNFS */
++
+ 	if (filp)
+ 		fput(filp);
+ 
+@@ -935,6 +1006,306 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+ 	return status == nfserr_same ? nfs_ok : status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++static __be32
++nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
++		    unsigned int layout_type)
++{
++	int status, type;
++
++	/* check to see if pNFS  is supported. */
++	status = nfserr_layoutunavailable;
++	if (exp && exp->ex_pnfs == 0) {
++		dprintk("%s: Underlying file system "
++			"is not exported over pNFS\n", __func__);
++		goto out;
++	}
++	if (!sb->s_pnfs_op || !sb->s_pnfs_op->layout_type) {
++		dprintk("%s: Underlying file system "
++			"does not support pNFS\n", __func__);
++		goto out;
++	}
++
++	type = sb->s_pnfs_op->layout_type(sb);
++
++	/* check to see if requested layout type is supported. */
++	status = nfserr_unknown_layouttype;
++	if (!type)
++		dprintk("BUG: %s: layout_type 0 is reserved and must not be "
++			"used by filesystem\n", __func__);
++	else if (type != layout_type)
++		dprintk("%s: requested layout type %d "
++		       "does not match supported type %d\n",
++			__func__, layout_type, type);
++	else
++		status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevlist(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevlist *gdlp)
++{
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++	int status;
++
++	dprintk("%s: type %u maxdevices %u cookie %llu verf %llu\n",
++		__func__, gdlp->gd_layout_type, gdlp->gd_maxdevices,
++		gdlp->gd_cookie, gdlp->gd_verf);
++
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* We must be able to encode at list one device */
++	if (!gdlp->gd_maxdevices)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     gdlp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Do nothing if underlying file system does not support
++	 * getdevicelist */
++	if (!sb->s_pnfs_op->get_device_iter) {
++		status = nfserr_notsupp;
++		goto out;
++	}
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdlp->gd_fhp = &cstate->current_fh;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevinfo(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevinfo *gdp)
++{
++	struct super_block *sb;
++	int status;
++	clientid_t clid;
++
++	dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
++	       __func__, gdp->gd_layout_type, gdp->gd_devid.sbid,
++	       gdp->gd_devid.devid, gdp->gd_maxcount);
++
++	status = nfserr_inval;
++	sb = find_sbid_id(gdp->gd_devid.sbid);
++	dprintk("%s: sb %p\n", __func__, sb);
++	if (!sb) {
++		status = nfserr_noent;
++		goto out;
++	}
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, NULL, gdp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdp->gd_sb = sb;
++
++	/* Update notifications */
++	copy_clientid(&clid, cstate->session);
++	pnfs_set_device_notify(&clid, gdp->gd_notify_types);
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutget(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutget *lgp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lgp->lg_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_badiomode;
++	if (lgp->lg_seg.iomode != IOMODE_READ &&
++	    lgp->lg_seg.iomode != IOMODE_RW) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lgp->lg_seg.iomode);
++		goto out;
++	}
++
++	/* Set up arguments so layout can be retrieved at encode time */
++	lgp->lg_fhp = current_fh;
++	copy_clientid((clientid_t *)&lgp->lg_seg.clientid, cstate->session);
++	status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutcommit(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	int status;
++	struct inode *ino = NULL;
++	struct iattr ia;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	dprintk("NFSD: nfsd4_layoutcommit \n");
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	ino = current_fh->fh_dentry->d_inode;
++	if (!ino)
++		goto out;
++
++	status = nfserr_inval;
++	sb = ino->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lcp->args.lc_seg.layout_type);
++	if (status)
++		goto out;
++
++	/* This will only extend the file length.  Do a quick
++	 * check to see if there is any point in waiting for the update
++	 * locks.
++	 * TODO: Is this correct for all back ends?
++	 */
++	dprintk("%s:new offset: %d new size: %llu old size: %lld\n",
++		__func__, lcp->args.lc_newoffset, lcp->args.lc_last_wr + 1,
++		ino->i_size);
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lcp->args.lc_seg.clientid, cstate->session);
++	lcp->res.lc_size_chg = 0;
++	if (sb->s_pnfs_op->layout_commit) {
++		status = sb->s_pnfs_op->layout_commit(ino, &lcp->args, &lcp->res);
++		dprintk("%s:layout_commit result %d\n", __func__, status);
++	} else {
++		fh_lock(current_fh);
++		if ((lcp->args.lc_newoffset == 0) ||
++		    ((lcp->args.lc_last_wr + 1) <= ino->i_size)) {
++			status = 0;
++			lcp->res.lc_size_chg = 0;
++			fh_unlock(current_fh);
++			goto out;
++		}
++
++		/* Try our best to update the file size */
++		dprintk("%s: Modifying file size\n", __func__);
++		ia.ia_valid = ATTR_SIZE;
++		ia.ia_size = lcp->args.lc_last_wr + 1;
++		status = notify_change(current_fh->fh_dentry, &ia);
++		fh_unlock(current_fh);
++		dprintk("%s:notify_change result %d\n", __func__, status);
++	}
++
++	if (!status && lcp->res.lc_size_chg &&
++	    EX_ISSYNC(current_fh->fh_export)) {
++		dprintk("%s: Synchronously writing inode size %llu\n",
++			__func__, ino->i_size);
++		write_inode_now(ino, 1);
++		lcp->res.lc_newsize = i_size_read(ino);
++	}
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutreturn(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lrp->args.lr_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	if (lrp->args.lr_return_type != RETURN_FILE &&
++	    lrp->args.lr_return_type != RETURN_FSID &&
++	    lrp->args.lr_return_type != RETURN_ALL) {
++		dprintk("pNFS %s: invalid return_type %d\n", __func__,
++			lrp->args.lr_return_type);
++		goto out;
++	}
++
++	status = nfserr_inval;
++	if (lrp->args.lr_seg.iomode != IOMODE_READ &&
++	    lrp->args.lr_seg.iomode != IOMODE_RW &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lrp->args.lr_seg.iomode);
++		goto out;
++	}
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lrp->args.lr_seg.clientid, cstate->session);
++	lrp->lrs_present = (lrp->args.lr_return_type == RETURN_FILE);
++	status = nfs4_pnfs_return_layout(sb, current_fh, lrp);
++out:
++	dprintk("pNFS %s: status %d return_type 0x%x lrs_present %d\n",
++		__func__, status, lrp->args.lr_return_type, lrp->lrs_present);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * NULL call.
+  */
+@@ -1317,6 +1688,29 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH,
+ 		.op_name = "OP_RECLAIM_COMPLETE",
+ 	},
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICELIST] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevlist,
++		.op_name = "OP_GETDEVICELIST",
++	},
++	[OP_GETDEVICEINFO] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevinfo,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_GETDEVICEINFO",
++	},
++	[OP_LAYOUTGET] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutget,
++		.op_name = "OP_LAYOUTGET",
++	},
++	[OP_LAYOUTCOMMIT] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
++		.op_name = "OP_LAYOUTCOMMIT",
++	},
++	[OP_LAYOUTRETURN] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
++		.op_name = "OP_LAYOUTRETURN",
++	},
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-31 20:41:19.200150153 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-31 20:42:05.561202607 -0400
+@@ -42,6 +42,8 @@
+ #include "xdr4.h"
+ #include "vfs.h"
+ 
++#include "pnfsd.h"
++
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+@@ -60,8 +62,6 @@ static u64 current_sessionid = 1;
+ #define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+ 
+ /* forward declarations */
+-static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+-static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+ static void nfs4_set_recdir(char *recdir);
+ 
+@@ -69,6 +69,7 @@ static void nfs4_set_recdir(char *recdir
+ 
+ /* Currently used for almost all code touching nfsv4 state: */
+ static DEFINE_MUTEX(client_mutex);
++struct task_struct *client_mutex_owner;
+ 
+ /*
+  * Currently used for the del_recall_lru and file hash table.  In an
+@@ -86,11 +87,21 @@ void
+ nfs4_lock_state(void)
+ {
+ 	mutex_lock(&client_mutex);
++	client_mutex_owner = current;
++}
++
++#define BUG_ON_UNLOCKED_STATE() BUG_ON(client_mutex_owner != current)
++
++void
++nfs4_bug_on_unlocked_state(void)
++{
++	BUG_ON(client_mutex_owner != current);
+ }
+ 
+ void
+ nfs4_unlock_state(void)
+ {
++	client_mutex_owner = NULL;
+ 	mutex_unlock(&client_mutex);
+ }
+ 
+@@ -109,7 +120,7 @@ opaque_hashval(const void *ptr, int nbyt
+ 
+ static struct list_head del_recall_lru;
+ 
+-static inline void
++inline void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+ 	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+@@ -120,7 +131,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ 	}
+ }
+ 
+-static inline void
++inline void
+ get_nfs4_file(struct nfs4_file *fi)
+ {
+ 	atomic_inc(&fi->fi_ref);
+@@ -230,7 +241,10 @@ nfs4_close_delegation(struct nfs4_delega
+ 	 * but we want to remove the lease in any case. */
+ 	if (dp->dl_flock)
+ 		vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(filp);
++	nfs4_lock_state();
+ }
+ 
+ /* Called under the state lock. */
+@@ -266,8 +280,8 @@ static DEFINE_SPINLOCK(client_lock);
+  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+  * used in reboot/reset lease grace period processing
+  *
+- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+- * setclientid_confirmed info. 
++ * conf_id_hashtbl[], and conf_str_hashtbl[] hold
++ * confirmed setclientid_confirmed info.
+  *
+  * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+  * setclientid info.
+@@ -292,6 +306,7 @@ static void unhash_generic_stateid(struc
+ 	list_del(&stp->st_hash);
+ 	list_del(&stp->st_perfile);
+ 	list_del(&stp->st_perstateowner);
++	release_pnfs_ds_dev_list(stp);
+ }
+ 
+ static void free_generic_stateid(struct nfs4_stateid *stp)
+@@ -345,7 +360,10 @@ static void release_open_stateid(struct 
+ {
+ 	unhash_generic_stateid(stp);
+ 	release_stateid_lockowners(stp);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(stp->st_vfs_file);
++	nfs4_lock_state();
+ 	free_generic_stateid(stp);
+ }
+ 
+@@ -739,6 +757,8 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
++	BUG_ON_UNLOCKED_STATE();
++
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -758,6 +778,7 @@ expire_client(struct nfs4_client *clp)
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
++	pnfs_expire_client(clp);
+ 	nfsd4_set_callback_client(clp, NULL);
+ 	if (clp->cl_cb_conn.cb_xprt)
+ 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+@@ -770,6 +791,13 @@ expire_client(struct nfs4_client *clp)
+ 	spin_unlock(&client_lock);
+ }
+ 
++void expire_client_lock(struct nfs4_client *clp)
++{
++	nfs4_lock_state();
++	expire_client(clp);
++	nfs4_unlock_state();
++}
++
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+ {
+ 	memcpy(target->cl_verifier.data, source->data,
+@@ -859,6 +887,11 @@ static struct nfs4_client *create_client
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++	INIT_LIST_HEAD(&clp->cl_layoutrecalls);
++	atomic_set(&clp->cl_deviceref, 0);
++#endif /* CONFIG_PNFSD */
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
+ 	clp->cl_time = get_seconds();
+@@ -908,7 +941,7 @@ move_to_confirmed(struct nfs4_client *cl
+ 	renew_client(clp);
+ }
+ 
+-static struct nfs4_client *
++struct nfs4_client *
+ find_confirmed_client(clientid_t *clid)
+ {
+ 	struct nfs4_client *clp;
+@@ -978,6 +1011,24 @@ find_unconfirmed_client_by_str(const cha
+ 	return NULL;
+ }
+ 
++int
++filter_confirmed_clients(int (* func)(struct nfs4_client *, void *),
++			 void *arg)
++{
++	struct nfs4_client *clp, *next;
++	int i, status = 0;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		list_for_each_entry_safe (clp, next, &conf_str_hashtbl[i],
++					  cl_strhash) {
++			status = func(clp, arg);
++			if (status)
++				break;
++		}
++
++	return status;
++}
++
+ static void
+ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
+ {
+@@ -1110,8 +1161,12 @@ nfsd4_replay_cache_entry(struct nfsd4_co
+ static void
+ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
+ {
+-	/* pNFS is not supported */
++#if defined(CONFIG_PNFSD)
++	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
++				  EXCHGID4_FLAG_USE_PNFS_DS;
++#else  /* CONFIG_PNFSD */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
++#endif /* CONFIG_PNFSD */
+ 
+ 	/* Referrals are supported, Migration is not. */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
+@@ -1301,6 +1356,13 @@ nfsd4_create_session(struct svc_rqst *rq
+ 	struct nfsd4_clid_slot *cs_slot = NULL;
+ 	int status = 0;
+ 
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	/* XXX hack to get local ip address */
++	memcpy(&pnfsd_lexp_addr, &rqstp->rq_xprt->xpt_local,
++		sizeof(pnfsd_lexp_addr));
++	pnfs_lexp_addr_len = rqstp->rq_xprt->xpt_locallen;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
+ 	nfs4_lock_state();
+ 	unconf = find_unconfirmed_client(&cr_ses->clientid);
+ 	conf = find_confirmed_client(&cr_ses->clientid);
+@@ -1340,25 +1402,26 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(rqstp->rq_xprt);
+-			rpc_copy_addr(
+-				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+-				sa);
+-			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+-			unconf->cl_cb_conn.cb_minorversion =
+-				cstate->minorversion;
+-			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+-			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+-		}
++		if (is_ds_only_session(unconf->cl_exchange_flags))
++			cr_ses->flags &= ~SESSION4_BACK_CHAN;
++
+ 		conf = unconf;
+ 	} else {
+ 		status = nfserr_stale_clientid;
+ 		goto out;
+ 	}
+ 
++	if (cr_ses->flags & SESSION4_BACK_CHAN) {
++		conf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++		svc_xprt_get(rqstp->rq_xprt);
++		rpc_copy_addr((struct sockaddr *)&conf->cl_cb_conn.cb_addr, sa);
++		conf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
++		conf->cl_cb_conn.cb_minorversion = cstate->minorversion;
++		conf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
++		conf->cl_cb_seq_nr = 1;
++		nfsd4_probe_callback(conf, &conf->cl_cb_conn);
++	}
++
+ 	/*
+ 	 * We do not support RDMA or persistent sessions
+ 	 */
+@@ -1746,7 +1809,7 @@ out:
+ 
+ /* OPEN Share state helper functions */
+ static inline struct nfs4_file *
+-alloc_init_file(struct inode *ino)
++alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
+ {
+ 	struct nfs4_file *fp;
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1760,6 +1823,16 @@ alloc_init_file(struct inode *ino)
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++#if defined(CONFIG_PNFSD)
++		INIT_LIST_HEAD(&fp->fi_layouts);
++		INIT_LIST_HEAD(&fp->fi_layout_states);
++		fp->fi_fsid.major = current_fh->fh_export->ex_fsid;
++		fp->fi_fsid.minor = 0;
++		fp->fi_fhlen = current_fh->fh_handle.fh_size;
++		BUG_ON(fp->fi_fhlen > sizeof(fp->fi_fhval));
++		memcpy(fp->fi_fhval, &current_fh->fh_handle.fh_base,
++		       fp->fi_fhlen);
++#endif /* CONFIG_PNFSD */
+ 		spin_lock(&recall_lock);
+ 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ 		spin_unlock(&recall_lock);
+@@ -1768,7 +1841,7 @@ alloc_init_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
+-static void
++void
+ nfsd4_free_slab(struct kmem_cache **slab)
+ {
+ 	if (*slab == NULL)
+@@ -1784,6 +1857,7 @@ nfsd4_free_slabs(void)
+ 	nfsd4_free_slab(&file_slab);
+ 	nfsd4_free_slab(&stateid_slab);
+ 	nfsd4_free_slab(&deleg_slab);
++	nfsd4_free_pnfs_slabs();
+ }
+ 
+ static int
+@@ -1805,6 +1879,8 @@ nfsd4_init_slabs(void)
+ 			sizeof(struct nfs4_delegation), 0, 0, NULL);
+ 	if (deleg_slab == NULL)
+ 		goto out_nomem;
++	if (nfsd4_init_pnfs_slabs())
++		goto out_nomem;
+ 	return 0;
+ out_nomem:
+ 	nfsd4_free_slabs();
+@@ -1878,6 +1954,9 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners);
+ 	INIT_LIST_HEAD(&stp->st_perfile);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+@@ -1919,6 +1998,7 @@ find_openstateowner_str(unsigned int has
+ {
+ 	struct nfs4_stateowner *so = NULL;
+ 
++	BUG_ON_UNLOCKED_STATE();
+ 	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ 		if (same_owner_str(so, &open->op_owner, &open->op_clientid))
+ 			return so;
+@@ -1927,7 +2007,7 @@ find_openstateowner_str(unsigned int has
+ }
+ 
+ /* search file_hashtbl[] for file */
+-static struct nfs4_file *
++struct nfs4_file *
+ find_file(struct inode *ino)
+ {
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1945,6 +2025,18 @@ find_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
++struct nfs4_file *
++find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
++{
++	struct nfs4_file *fp;
++
++	fp = find_file(ino);
++	if (fp)
++		return fp;
++
++	return alloc_init_file(ino, current_fh);
++}
++
+ static inline int access_valid(u32 x, u32 minorversion)
+ {
+ 	if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
+@@ -2503,7 +2595,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+ 			goto out;
+ 		status = nfserr_resource;
+-		fp = alloc_init_file(ino);
++		fp = alloc_init_file(ino, current_fh);
+ 		if (fp == NULL)
+ 			goto out;
+ 	}
+@@ -2730,7 +2822,7 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ 	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
+ }
+ 
+-static int
++int
+ STALE_STATEID(stateid_t *stateid)
+ {
+ 	if (stateid->si_boot == boot_time)
+@@ -2740,6 +2832,16 @@ STALE_STATEID(stateid_t *stateid)
+ 	return 1;
+ }
+ 
++__be32
++nfs4_check_stateid(stateid_t *stateid)
++{
++	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++		return nfserr_bad_stateid;
++	if (STALE_STATEID(stateid))
++		return nfserr_stale_stateid;
++	return 0;
++}
++
+ static inline int
+ access_permit_read(unsigned long access_bmap)
+ {
+@@ -2848,6 +2950,24 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	if (grace_disallows_io(ino))
+ 		return nfserr_grace;
+ 
++#if defined(CONFIG_PNFSD)
++	if (pnfs_fh_is_ds(&current_fh->fh_handle)) {
++		if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++			status = nfserr_bad_stateid;
++		else
++#ifdef CONFIG_GFS2_FS_LOCKING_DLM
++		{
++			dprintk("%s Don't check DS stateid\n", __func__);
++			return 0;
++		}
++#else /* CONFIG_GFS2_FS_LOCKING_DLM */
++			status = nfs4_preprocess_pnfs_ds_stateid(current_fh,
++								 stateid);
++#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
++		goto out;
++	}
++#endif /* CONFIG_PNFSD */
++
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 
+@@ -2924,13 +3044,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 	*stpp = NULL;
+ 	*sopp = NULL;
+ 
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+-		dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
+-		return nfserr_bad_stateid;
+-	}
+-
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		return status;
+ 
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+@@ -3205,11 +3321,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 	nfs4_lock_state();
+-	status = nfserr_bad_stateid;
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+-		goto out;
+-	status = nfserr_stale_stateid;
+-	if (STALE_STATEID(stateid))
++	status = nfs4_check_stateid(stateid);
++	if (status)
+ 		goto out;
+ 	status = nfserr_bad_stateid;
+ 	if (!is_delegation_stateid(stateid))
+@@ -3238,26 +3351,6 @@ out:
+ #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
+ #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+ 
+-static inline u64
+-end_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	end = start + len;
+-	return end >= start ? end: NFS4_MAX_UINT64;
+-}
+-
+-/* last octet in a range */
+-static inline u64
+-last_byte_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	BUG_ON(!len);
+-	end = start + len;
+-	return end > start ? end - 1: NFS4_MAX_UINT64;
+-}
+-
+ #define lockownerid_hashval(id) \
+         ((id) & LOCK_HASH_MASK)
+ 
+@@ -3274,7 +3367,7 @@ static struct list_head lock_ownerid_has
+ static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+ 
+-static struct nfs4_stateid *
++struct nfs4_stateid *
+ find_stateid(stateid_t *stid, int flags)
+ {
+ 	struct nfs4_stateid *local;
+@@ -3303,7 +3396,7 @@ find_stateid(stateid_t *stid, int flags)
+ 	return NULL;
+ }
+ 
+-static struct nfs4_delegation *
++struct nfs4_delegation *
+ find_delegation_stateid(struct inode *ino, stateid_t *stid)
+ {
+ 	struct nfs4_file *fp;
+@@ -3436,6 +3529,9 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	INIT_LIST_HEAD(&stp->st_perfile);
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+@@ -3998,6 +4094,9 @@ nfs4_state_init(void)
+ 	INIT_LIST_HEAD(&client_lru);
+ 	INIT_LIST_HEAD(&del_recall_lru);
+ 	reclaim_str_hashtbl_size = 0;
++#if defined(CONFIG_PNFSD)
++	nfs4_pnfs_state_init();
++#endif /* CONFIG_PNFSD */
+ 	return 0;
+ }
+ 
+@@ -4110,6 +4209,7 @@ __nfs4_state_shutdown(void)
+ 	}
+ 
+ 	nfsd4_shutdown_recdir();
++	nfs4_pnfs_state_shutdown();
+ 	nfs4_init = 0;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-31 20:41:19.202150173 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-31 20:42:05.563232916 -0400
+@@ -47,9 +47,14 @@
+ #include <linux/nfsd_idmap.h>
+ #include <linux/nfs4_acl.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -1234,6 +1239,138 @@ nfsd4_decode_sequence(struct nfsd4_compo
+ 	DECODE_TAIL;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static __be32
++nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16 + sizeof(nfs4_verifier));
++	READ32(gdevl->gd_layout_type);
++	READ32(gdevl->gd_maxdevices);
++	READ64(gdevl->gd_cookie);
++	COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	u32 num;
++	DECODE_HEAD;
++
++	READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
++	READ64(gdev->gd_devid.sbid);
++	READ64(gdev->gd_devid.devid);
++	READ32(gdev->gd_layout_type);
++	READ32(gdev->gd_maxcount);
++	READ32(num);
++	if (num) {
++		READ_BUF(4);
++		READ32(gdev->gd_notify_types);
++	} else {
++		gdev->gd_notify_types = 0;
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_layoutget *lgp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(36);
++	READ32(lgp->lg_signal);
++	READ32(lgp->lg_seg.layout_type);
++	READ32(lgp->lg_seg.iomode);
++	READ64(lgp->lg_seg.offset);
++	READ64(lgp->lg_seg.length);
++	READ64(lgp->lg_minlength);
++	nfsd4_decode_stateid(argp, &lgp->lg_sid);
++	READ_BUF(4);
++	READ32(lgp->lg_maxcount);
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	DECODE_HEAD;
++	u32 timechange;
++
++	READ_BUF(20);
++	READ64(lcp->args.lc_seg.offset);
++	READ64(lcp->args.lc_seg.length);
++	READ32(lcp->args.lc_reclaim);
++	nfsd4_decode_stateid(argp, &lcp->lc_sid);
++	READ_BUF(4);
++	READ32(lcp->args.lc_newoffset);
++	if (lcp->args.lc_newoffset) {
++		READ_BUF(8);
++		READ64(lcp->args.lc_last_wr);
++	} else
++		lcp->args.lc_last_wr = 0;
++	READ_BUF(4);
++	READ32(timechange);
++	if (timechange) {
++		READ_BUF(12);
++		READ64(lcp->args.lc_mtime.seconds);
++		READ32(lcp->args.lc_mtime.nseconds);
++	} else {
++		lcp->args.lc_mtime.seconds = 0;
++		lcp->args.lc_mtime.nseconds = 0;
++	}
++	READ_BUF(8);
++	READ32(lcp->args.lc_seg.layout_type);
++	/* XXX: saving XDR'ed layout update. Since we don't have the
++	 * current_fh yet, and therefore no export_ops, we can't call
++	 * the layout specific decode routines. File and pVFS2
++	 * do not use the layout update....
++	 */
++	READ32(lcp->args.lc_up_len);
++	if (lcp->args.lc_up_len > 0) {
++		READ_BUF(lcp->args.lc_up_len);
++		READMEM(lcp->args.lc_up_layout, lcp->args.lc_up_len);
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16);
++	READ32(lrp->args.lr_reclaim);
++	READ32(lrp->args.lr_seg.layout_type);
++	READ32(lrp->args.lr_seg.iomode);
++	READ32(lrp->args.lr_return_type);
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		READ_BUF(16);
++		READ64(lrp->args.lr_seg.offset);
++		READ64(lrp->args.lr_seg.length);
++		nfsd4_decode_stateid(argp, &lrp->lr_sid);
++		READ_BUF(4);
++		READ32(lrp->args.lrf_body_len);
++		if (lrp->args.lrf_body_len > 0) {
++			READ_BUF(lrp->args.lrf_body_len);
++			READMEM(lrp->args.lrf_body, lrp->args.lrf_body_len);
++		}
++	}
++
++	DECODE_TAIL;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+ {
+@@ -1335,11 +1472,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+@@ -2136,6 +2281,36 @@ out_acl:
+ 		}
+ 		WRITE64(stat.ino);
+ 	}
++#if defined(CONFIG_PNFSD)
++	if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
++		struct super_block *sb = dentry->d_inode->i_sb;
++		int type = 0;
++
++		/* Query the filesystem for supported pNFS layout types.
++		 * Currently, we only support one layout type per file system.
++		 * The export_ops->layout_type() returns the pnfs_layouttype4.
++		 */
++		buflen -= 4;
++		if (buflen < 0)		/* length */
++			goto out_resource;
++
++		if (sb && sb->s_pnfs_op && sb->s_pnfs_op->layout_type)
++			type = sb->s_pnfs_op->layout_type(sb);
++		if (type) {
++			if ((buflen -= 4) < 0)	/* type */
++				goto out_resource;
++			WRITE32(1); 	/* length */
++			WRITE32(type);  /* type */
++		} else
++			WRITE32(0);  /* length */
++	}
++
++	if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		if ((buflen -= 4) < 0)
++			goto out_resource;
++		WRITE32(stat.blksize);
++	}
++#endif /* CONFIG_PNFSD */
+ 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ 		WRITE32(3);
+ 		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+@@ -2366,6 +2541,10 @@ nfsd4_encode_commit(struct nfsd4_compoun
+ 	if (!nfserr) {
+ 		RESERVE_SPACE(8);
+ 		WRITEMEM(commit->co_verf.data, 8);
++		dprintk("NFSD: nfsd4_encode_commit: verifier %x:%x\n",
++			((u32 *)(&commit->co_verf.data))[0],
++			((u32 *)(&commit->co_verf.data))[1]);
++
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -2620,9 +2799,20 @@ nfsd4_encode_read(struct nfsd4_compoundr
+ 	}
+ 	read->rd_vlen = v;
+ 
++#if defined(CONFIG_SPNFS)
++	if (spnfs_enabled())
++		nfserr = spnfs_read(read->rd_fhp->fh_dentry->d_inode,
++				    read->rd_offset, &maxcount, read->rd_vlen,
++				    resp->rqstp);
++	else /* we're not an MDS */
++		nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
++			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
++			&maxcount);
++#else
+ 	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ 			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ 			&maxcount);
++#endif /* CONFIG_SPNFS */
+ 
+ 	if (nfserr == nfserr_symlink)
+ 		nfserr = nfserr_inval;
+@@ -2926,6 +3116,9 @@ nfsd4_encode_write(struct nfsd4_compound
+ 		WRITE32(write->wr_bytes_written);
+ 		WRITE32(write->wr_how_written);
+ 		WRITEMEM(write->wr_verifier.data, 8);
++		dprintk("NFSD: nfsd4_encode_write: verifier %x:%x\n",
++			((u32 *)(&write->wr_verifier.data))[0],
++			((u32 *)(&write->wr_verifier.data))[1]);
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -3069,6 +3262,343 @@ nfsd4_encode_sequence(struct nfsd4_compo
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/* Uses the export interface to iterate through the available devices
++ * and encodes them on the response stream.
++ */
++static  __be32
++nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
++			      struct nfsd4_pnfs_getdevlist *gdevl,
++			      unsigned int *dev_count)
++{
++	struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
++	__be32 nfserr;
++	int status;
++	__be32 *p;
++	struct nfsd4_pnfs_dev_iter_res res = {
++		.gd_cookie = gdevl->gd_cookie,
++		.gd_verf = gdevl->gd_verf,
++		.gd_eof = 0
++	};
++	u64 sbid;
++
++	dprintk("%s: Begin\n", __func__);
++
++	sbid = find_create_sbid(sb);
++	*dev_count = 0;
++	do {
++		status = sb->s_pnfs_op->get_device_iter(sb,
++							gdevl->gd_layout_type,
++							&res);
++		if (status) {
++			if (status == -ENOENT) {
++				res.gd_eof = 1;
++				/* return success */
++				break;
++			}
++			nfserr = nfserrno(status);
++			goto out_err;
++		}
++
++		/* Encode device id and layout type */
++		RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
++		WRITE64((__be64)sbid);
++		WRITE64(res.gd_devid);	/* devid minor */
++		ADJUST_ARGS();
++		(*dev_count)++;
++	} while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
++	gdevl->gd_cookie = res.gd_cookie;
++	gdevl->gd_verf = res.gd_verf;
++	gdevl->gd_eof = res.gd_eof;
++	nfserr = nfs_ok;
++out_err:
++	dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
++	return nfserr;
++}
++
++/* Encodes the response of get device list.
++*/
++static __be32
++nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	unsigned int dev_count = 0, lead_count;
++	u32 *p_in = resp->p;
++	__be32 *p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	/* Ensure we have room for cookie, verifier, and devlist len,
++	 * which we will backfill in after we encode as many devices as possible
++	 */
++	lead_count = 8 + sizeof(nfs4_verifier) + 4;
++	RESERVE_SPACE(lead_count);
++	/* skip past these values */
++	p += XDR_QUADLEN(lead_count);
++	ADJUST_ARGS();
++
++	/* Iterate over as many device ids as possible on the xdr stream */
++	nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
++	if (nfserr)
++		goto out_err;
++
++	/* Backfill in cookie, verf and number of devices encoded */
++	p = p_in;
++	WRITE64(gdevl->gd_cookie);
++	WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++	WRITE32(dev_count);
++
++	/* Skip over devices */
++	p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
++	ADJUST_ARGS();
++
++	/* are we at the end of devices? */
++	RESERVE_SPACE(4);
++	WRITE32(gdevl->gd_eof);
++	ADJUST_ARGS();
++
++	dprintk("%s: done.\n", __func__);
++
++	nfserr = nfs_ok;
++out:
++	return nfserr;
++out_err:
++	p = p_in;
++	ADJUST_ARGS();
++	goto out;
++}
++
++/* For a given device id, have the file system retrieve and encode the
++ * associated device.  For file layout, the encoding function is
++ * passed down to the file system.  The file system then has the option
++ * of using this encoding function or one of its own.
++ *
++ * Note: the file system must return the XDR size of struct device_addr4
++ * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
++ * gdir_mincount calculation.
++ */
++static __be32
++nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	struct super_block *sb;
++	int maxcount = 0, type_notify_len = 12;
++	__be32 *p, *p_save = NULL, *p_in = resp->p;
++	struct exp_xdr_stream xdr;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = gdev->gd_sb;
++
++	if (gdev->gd_maxcount != 0) {
++		/* FIXME: this will be bound by the session max response */
++		maxcount = svc_max_payload(resp->rqstp);
++		if (maxcount > gdev->gd_maxcount)
++			maxcount = gdev->gd_maxcount;
++
++		/* Ensure have room for type and notify field */
++		maxcount -= type_notify_len;
++		if (maxcount < 0) {
++			nfserr = -ETOOSMALL;
++			goto toosmall;
++		}
++	}
++
++	RESERVE_SPACE(4);
++	WRITE32(gdev->gd_layout_type);
++	ADJUST_ARGS();
++
++	/* If maxcount is 0 then just update notifications */
++	if (gdev->gd_maxcount == 0)
++		goto handle_notifications;
++
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
++						&gdev->gd_devid);
++	if (nfserr)
++		goto err;
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++	BUG_ON(xdr.p > xdr.end);
++
++	/* Update the xdr stream with the number of bytes encoded
++	 * by the file system.
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++handle_notifications:
++	/* Encode supported device notifications */
++	RESERVE_SPACE(4);
++	if (sb->s_pnfs_op->set_device_notify) {
++		struct pnfs_devnotify_arg dn_args;
++
++		dn_args.dn_layout_type = gdev->gd_layout_type;
++		dn_args.dn_devid = gdev->gd_devid;
++		dn_args.dn_notify_types = gdev->gd_notify_types;
++		nfserr = sb->s_pnfs_op->set_device_notify(sb, &dn_args);
++		if (nfserr)
++			goto err;
++		WRITE32(dn_args.dn_notify_types);
++	} else {
++		WRITE32(0);
++	}
++	ADJUST_ARGS();
++
++out:
++	return nfserrno(nfserr);
++toosmall:
++	dprintk("%s: maxcount too small\n", __func__);
++	RESERVE_SPACE(4);
++	WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
++	ADJUST_ARGS();
++	goto out;
++err:
++	/* Rewind to the beginning */
++	p = p_in;
++	ADJUST_ARGS();
++	if (nfserr == -ETOOSMALL)
++		goto toosmall;
++	printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
++	goto out;
++}
++
++static __be32
++nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
++		       __be32 nfserr,
++		       struct nfsd4_pnfs_layoutget *lgp)
++{
++	int maxcount, leadcount;
++	struct super_block *sb;
++	struct exp_xdr_stream xdr;
++	__be32 *p, *p_save, *p_start = resp->p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = lgp->lg_fhp->fh_dentry->d_inode->i_sb;
++	maxcount = PAGE_SIZE;
++	if (maxcount > lgp->lg_maxcount)
++		maxcount = lgp->lg_maxcount;
++
++	/* Check for space on xdr stream */
++	leadcount = 36 + sizeof(stateid_opaque_t);
++	RESERVE_SPACE(leadcount);
++	/* encode layout metadata after file system encodes layout */
++	p += XDR_QUADLEN(leadcount);
++	ADJUST_ARGS();
++
++	/* Ensure have room for ret_on_close, off, len, iomode, type */
++	maxcount -= leadcount;
++	if (maxcount < 0) {
++		printk(KERN_ERR "%s: buffer too small\n", __func__);
++		nfserr = nfserr_toosmall;
++		goto err;
++	}
++
++	/* Set xdr info so file system can encode layout */
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	/* Retrieve, encode, and merge layout; process stateid */
++	nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
++	if (nfserr)
++		goto err;
++
++	/* Ensure file system returned enough bytes for the client
++	 * to access.
++	 */
++	if (lgp->lg_seg.length < lgp->lg_minlength) {
++		nfserr = nfserr_badlayout;
++		goto err;
++	}
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++
++	/* Rewind to beginning and encode attrs */
++	resp->p = p_start;
++	RESERVE_SPACE(4);
++	WRITE32(lgp->lg_roc);	/* return on close */
++	ADJUST_ARGS();
++	nfsd4_encode_stateid(resp, &lgp->lg_sid);
++	RESERVE_SPACE(28);
++	/* Note: response logr_layout array count, always one for now */
++	WRITE32(1);
++	WRITE64(lgp->lg_seg.offset);
++	WRITE64(lgp->lg_seg.length);
++	WRITE32(lgp->lg_seg.iomode);
++	WRITE32(lgp->lg_seg.layout_type);
++
++	/* Update the xdr stream with the number of bytes written
++	 * by the file system
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++	return nfs_ok;
++err:
++	resp->p = p_start;
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lcp->res.lc_size_chg);
++	ADJUST_ARGS();
++	if (lcp->res.lc_size_chg) {
++		RESERVE_SPACE(8);
++		WRITE64(lcp->res.lc_newsize);
++		ADJUST_ARGS();
++	}
++out:
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lrp->lrs_present != 0);    /* got stateid? */
++	ADJUST_ARGS();
++	if (lrp->lrs_present)
++		nfsd4_encode_stateid(resp, &lrp->lr_sid);
++out:
++	return nfserr;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+ {
+@@ -3129,11 +3659,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-31 20:41:19.203150982 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-31 20:42:05.565212801 -0400
+@@ -13,10 +13,15 @@
+ #include <linux/nfsd/syscall.h>
+ #include <linux/lockd/lockd.h>
+ #include <linux/sunrpc/clnt.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "nfsd.h"
+ #include "cache.h"
+ 
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ /*
+  *	We have a single directory with 9 nodes in it.
+  */
+@@ -49,6 +54,9 @@ enum {
+ 	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
++#ifdef CONFIG_PNFSD
++	NFSD_pnfs_dlm_device,
++#endif
+ };
+ 
+ /*
+@@ -74,6 +82,9 @@ static ssize_t write_leasetime(struct fi
+ static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
++#ifdef CONFIG_PNFSD
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size);
++#endif
+ 
+ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ 	[NFSD_Svc] = write_svc,
+@@ -96,6 +107,9 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
++#ifdef CONFIG_PNFSD
++	[NFSD_pnfs_dlm_device] = write_pnfs_dlm_device,
++#endif
+ };
+ 
+ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+@@ -1349,6 +1363,68 @@ static ssize_t write_recoverydir(struct 
+ 
+ #endif
+ 
++#ifdef CONFIG_PNFSD
++
++static ssize_t __write_pnfs_dlm_device(struct file *file, char *buf,
++				       size_t size)
++{
++	char *mesg = buf;
++	char *pnfs_dlm_device;
++	int max_size = NFSD_PNFS_DLM_DEVICE_MAX;
++	int len, ret = 0;
++
++	if (size > 0) {
++		ret = -EINVAL;
++		if (size > max_size || buf[size-1] != '\n')
++			return ret;
++		buf[size-1] = 0;
++
++		pnfs_dlm_device = mesg;
++		len = qword_get(&mesg, pnfs_dlm_device, size);
++		if (len <= 0)
++			return ret;
++
++		ret = nfsd4_set_pnfs_dlm_device(pnfs_dlm_device, len);
++	} else
++		return nfsd4_get_pnfs_dlm_device_list(buf, SIMPLE_TRANSACTION_LIMIT);
++
++	return ret <= 0 ? ret : strlen(buf);
++}
++
++/**
++ * write_pnfs_dlm_device - Set or report the current pNFS data server list
++ *
++ * Input:
++ *			buf:		ignored
++ *			size:		zero
++ *
++ * OR
++ *
++ * Input:
++ *			buf:		C string containing a block device name,
++ *					a colon, and then a comma separated
++ *					list of pNFS data server IPv4 addresses
++ *			size:		non-zero length of C string in @buf
++ * Output:
++ *	On success:	passed-in buffer filled with '\n'-terminated C
++ *			string containing a block device name, a colon, and
++ *			then a comma separated list of pNFS
++ *			data server IPv4 addresses.
++ *			return code is the size in bytes of the string
++ *	On error:	return code is a negative errno value
++ */
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __write_pnfs_dlm_device(file, buf, size);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
++}
++
++#endif /* CONFIG_PNFSD */
++
+ /*----------------------------------------------------------------------------*/
+ /*
+  *	populating the filesystem.
+@@ -1383,6 +1459,10 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
++#ifdef CONFIG_PNFSD
++		[NFSD_pnfs_dlm_device] = {"pnfs_dlm_device", &transaction_ops,
++					   S_IWUSR|S_IRUSR},
++#endif
+ 		/* last one */ {""}
+ 	};
+ 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
+@@ -1421,6 +1501,9 @@ static int create_proc_exports_entry(voi
+ }
+ #endif
+ 
++#if defined(CONFIG_SPNFS_BLOCK)
++int nfsd_bl_init(void);
++#endif
+ static int __init init_nfsd(void)
+ {
+ 	int retval;
+@@ -1443,6 +1526,15 @@ static int __init init_nfsd(void)
+ 	retval = create_proc_exports_entry();
+ 	if (retval)
+ 		goto out_free_idmap;
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	retval = spnfs_init_proc();
++	if (retval != 0)
++		goto out_free_idmap;
++#if defined(CONFIG_SPNFS_BLOCK)
++	nfsd_bl_init();
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ 	retval = register_filesystem(&nfsd_fs_type);
+ 	if (retval)
+ 		goto out_free_all;
+@@ -1465,7 +1557,22 @@ out_free_stat:
+ 
+ static void __exit exit_nfsd(void)
+ {
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	remove_proc_entry("fs/nfs/spnfs/recall", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/getfh", NULL);
++	remove_proc_entry("fs/nfs/spnfs/config", NULL);
++	remove_proc_entry("fs/nfs/spnfs/ctl", NULL);
++	remove_proc_entry("fs/nfs/spnfs", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutsegsize", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS_LAYOUTSEGMENTS */
++
+ 	nfsd_export_shutdown();
++	nfsd4_pnfs_dlm_shutdown();
+ 	nfsd_reply_cache_shutdown();
+ 	remove_proc_entry("fs/nfs/exports", NULL);
+ 	remove_proc_entry("fs/nfs", NULL);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-31 20:41:19.204160960 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-31 20:42:05.565212801 -0400
+@@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD0
+ 
++#if defined(CONFIG_PNFSD)
++#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
++	(NFSD4_SUPPORTED_ATTRS_WORD1 | FATTR4_WORD1_FS_LAYOUT_TYPES)
++#else /* CONFIG_PNFSD */
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD1
++#endif /* CONFIG_PNFSD */
+ 
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
++	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT | \
++	 FATTR4_WORD2_LAYOUT_BLKSIZE)
+ 
+ static inline u32 nfsd_suppattrs0(u32 minorversion)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-31 20:42:05.566222921 -0400
+@@ -10,6 +10,7 @@
+ #include <linux/exportfs.h>
+ 
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
+ #include "nfsd.h"
+ #include "vfs.h"
+ #include "auth.h"
+@@ -139,6 +140,7 @@ static inline __be32 check_pseudo_root(s
+ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ 	struct knfsd_fh	*fh = &fhp->fh_handle;
++	int fsid_type;
+ 	struct fid *fid = NULL, sfid;
+ 	struct svc_export *exp;
+ 	struct dentry *dentry;
+@@ -159,7 +161,8 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 			return error;
+ 		if (fh->fh_auth_type != 0)
+ 			return error;
+-		len = key_len(fh->fh_fsid_type) / 4;
++		fsid_type = pnfs_fh_fsid_type(fh);
++		len = key_len(fsid_type) / 4;
+ 		if (len == 0)
+ 			return error;
+ 		if  (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+@@ -172,7 +175,7 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 		data_left -= len;
+ 		if (data_left < 0)
+ 			return error;
+-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
++		exp = rqst_exp_find(rqstp, fsid_type, fh->fh_auth);
+ 		fid = (struct fid *)(fh->fh_auth + len);
+ 	} else {
+ 		__u32 tfh[2];
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-31 20:42:05.567233002 -0400
+@@ -14,6 +14,7 @@ enum nfsd_fsid {
+ 	FSID_UUID8,
+ 	FSID_UUID16,
+ 	FSID_UUID16_INUM,
++	FSID_MAX
+ };
+ 
+ enum fsid_source {
+@@ -205,4 +206,42 @@ fh_unlock(struct svc_fh *fhp)
+ 	}
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/*
++ * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
++ * to a DS by LAYOUTGET.  nfs4_preprocess_stateid_op() uses this to decide how
++ * to handle a given stateid.
++ */
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return fh->fh_fsid_type >= FSID_MAX;
++}
++
++static inline void pnfs_fh_mark_ds(struct knfsd_fh *fh)
++{
++	BUG_ON(fh->fh_version != 1);
++	BUG_ON(pnfs_fh_is_ds(fh));
++	fh->fh_fsid_type += FSID_MAX;
++}
++
++#else  /* CONFIG_PNFSD */
++
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return 0;
++}
++
++#endif /* CONFIG_PNFSD */
++
++/* allows fh_verify() to check the real fsid_type (i.e., not overloaded). */
++static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
++{
++	int fsid_type = fh->fh_fsid_type;
++
++	if (pnfs_fh_is_ds(fh))
++		return fsid_type - FSID_MAX;
++	return fsid_type;
++}
++
+ #endif /* _LINUX_NFSD_FH_INT_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-31 20:41:17.274232911 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-31 20:42:05.568144414 -0400
+@@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
+ 
+ };
+ 
+-u32 nfsd_supported_minorversion;
++u32 nfsd_supported_minorversion = NFSD_SUPPORTED_MINOR_VERSION;
+ 
+ int nfsd_vers(int vers, enum vers_op change)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-31 20:42:05.569090615 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-31 20:42:05.569090615 -0400
+@@ -0,0 +1,143 @@
++/*
++ *  Copyright (c) 2005 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef LINUX_NFSD_PNFSD_H
++#define LINUX_NFSD_PNFSD_H
++
++#include <linux/list.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#include "state.h"
++#include "xdr4.h"
++
++/* outstanding layout stateid */
++struct nfs4_layout_state {
++	struct list_head	ls_perfile;
++	struct list_head	ls_layouts; /* list of nfs4_layouts */
++	struct kref		ls_ref;
++	struct nfs4_client	*ls_client;
++	struct nfs4_file	*ls_file;
++	stateid_t		ls_stateid;
++};
++
++/* outstanding layout */
++struct nfs4_layout {
++	struct list_head		lo_perfile;	/* hash by f_id */
++	struct list_head		lo_perclnt;	/* hash by clientid */
++	struct list_head		lo_perstate;
++	struct nfs4_file		*lo_file;	/* backpointer */
++	struct nfs4_client		*lo_client;
++	struct nfs4_layout_state	*lo_state;
++	struct nfsd4_layout_seg 	lo_seg;
++};
++
++struct pnfs_inval_state {
++	struct knfsd_fh		mdsfh; /* needed only by invalidate all */
++	stateid_t		stid;
++	clientid_t		clid;
++	u32			status;
++};
++
++/* pNFS Data Server state */
++#define DS_STATEID_VALID   0
++#define DS_STATEID_ERROR   1
++#define DS_STATEID_NEW     2
++
++struct pnfs_ds_stateid {
++	struct list_head	ds_hash;        /* ds_stateid hash entry */
++	struct list_head	ds_perclid;     /* per client hash entry */
++	stateid_t		ds_stid;
++	struct knfsd_fh		ds_fh;
++	unsigned long		ds_access;
++	u32			ds_status;      /* from MDS */
++	u32			ds_verifier[2]; /* from MDS */
++	wait_queue_head_t	ds_waitq;
++	unsigned long		ds_flags;
++	struct kref		ds_ref;
++	clientid_t		ds_mdsclid;
++};
++
++struct pnfs_ds_clientid {
++	struct list_head	dc_hash;        /* mds_clid_hashtbl entry */
++	struct list_head	dc_stateid;     /* ds_stateid head */
++	struct list_head	dc_permdsid;    /* per mdsid hash entry */
++	clientid_t		dc_mdsclid;
++	struct kref		dc_ref;
++	uint32_t		dc_mdsid;
++};
++
++struct pnfs_mds_id {
++	struct list_head	di_hash;        /* mds_nodeid list entry */
++	struct list_head	di_mdsclid;     /* mds_clientid head */
++	uint32_t		di_mdsid;
++	time_t			di_mdsboot;	/* mds boot time */
++	struct kref		di_ref;
++};
++
++/* notify device request (from exported filesystem) */
++struct nfs4_notify_device {
++	struct nfsd4_pnfs_cb_dev_list  *nd_list;
++	struct nfs4_client	       *nd_client;
++	struct list_head	        nd_perclnt;
++
++	void				*nd_args;	/* nfsd internal */
++};
++
++u64 find_create_sbid(struct super_block *);
++struct super_block *find_sbid_id(u64);
++__be32 nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
++int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
++					struct nfsd4_pnfs_layoutreturn *);
++int nfs4_pnfs_cb_get_state(struct super_block *, struct pnfs_get_state *);
++int nfs4_pnfs_cb_change_state(struct pnfs_get_state *);
++void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++int put_layoutrecall(struct nfs4_layoutrecall *);
++void nomatching_layout(struct nfs4_layoutrecall *);
++void *layoutrecall_done(struct nfs4_layoutrecall *);
++int nfsd4_cb_layout(struct nfs4_layoutrecall *);
++int nfsd_layout_recall_cb(struct super_block *, struct inode *,
++			  struct nfsd4_pnfs_cb_layout *);
++int nfsd_device_notify_cb(struct super_block *,
++			  struct nfsd4_pnfs_cb_dev_list *);
++int nfsd4_cb_notify_device(struct nfs4_notify_device *);
++void pnfs_set_device_notify(clientid_t *, unsigned int types);
++void pnfs_clear_device_notify(struct nfs4_client *);
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++extern struct sockaddr pnfsd_lexp_addr;
++extern size_t pnfs_lexp_addr_len;
++
++extern void pnfsd_lexp_init(struct inode *);
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#endif /* LINUX_NFSD_PNFSD_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-31 20:42:05.569090615 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-31 20:42:05.569090615 -0400
+@@ -0,0 +1,225 @@
++/*
++ * linux/fs/nfsd/pnfs_lexp.c
++ *
++ * pNFS export of local filesystems.
++ *
++ * Export local file systems over the files layout type.
++ * The MDS (metadata server) functions also as a single DS (data server).
++ * This is mostly useful for development and debugging purposes.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Copyright (C) 2008 Benny Halevy, <bhalevy@panasas.com>
++ *
++ * Initial implementation was based on the pnfs-gfs2 patches done
++ * by David M. Richter <richterd@citi.umich.edu>
++ */
++
++#include <linux/sunrpc/svc_xprt.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++struct sockaddr pnfsd_lexp_addr;
++size_t pnfs_lexp_addr_len;
++
++static int
++pnfsd_lexp_layout_type(struct super_block *sb)
++{
++	int ret = LAYOUT_NFSV4_1_FILES;
++	dprintk("<-- %s: return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++pnfsd_lexp_get_device_iter(struct super_block *sb,
++			   u32 layout_type,
++			   struct nfsd4_pnfs_dev_iter_res *res)
++{
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++
++	dprintk("<-- %s: return 0\n", __func__);
++	return 0;
++}
++
++static int
++pnfsd_lexp_get_device_info(struct super_block *sb,
++			   struct exp_xdr_stream *xdr,
++			   u32 layout_type,
++			   const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_multipath fl_devices[1];
++	u32 fl_stripe_indices[1] = { 0 };
++	struct pnfs_filelayout_devaddr daddr;
++	/* %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x.%03u.%03u */
++	char daddr_buf[8*4 + 2*3 + 10];
++
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	memset(&fdev, '\0', sizeof(fdev));
++
++	if (devid->devid != 1) {
++		printk(KERN_ERR "%s: WARNING: didn't receive a deviceid of 1 "
++			"(got: 0x%llx)\n", __func__, devid->devid);
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* count the number of comma-delimited DS IPs */
++	fdev.fl_device_length = 1;
++	fdev.fl_device_list = fl_devices;
++
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = fl_stripe_indices;
++
++	daddr.r_addr.data = daddr_buf;
++	daddr.r_addr.len = sizeof(daddr_buf);
++	err = __svc_print_netaddr(&pnfsd_lexp_addr, &daddr.r_addr);
++	if (err < 0)
++		goto out;
++	daddr.r_addr.len = err;
++	switch (pnfsd_lexp_addr.sa_family) {
++	case AF_INET:
++		daddr.r_netid.data = "tcp";
++		daddr.r_netid.len = 3;
++		break;
++	case AF_INET6:
++		daddr.r_netid.data = "tcp6";
++		daddr.r_netid.len = 4;
++		break;
++	default:
++		BUG();
++	}
++	fdev.fl_device_list[0].fl_multipath_length = 1;
++	fdev.fl_device_list[0].fl_multipath_list = &daddr;
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	dprintk("<-- %s: return %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize < NFSSVC_MAXBLKSIZE)
++		blocksize = NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++	dprintk("%s: return %d\n", __func__, blocksize);
++	return blocksize;
++}
++
++static enum nfsstat4
++pnfsd_lexp_layout_get(struct inode *inode,
++		      struct exp_xdr_stream *xdr,
++		      const struct nfsd4_pnfs_layoutget_arg *arg,
++		      struct nfsd4_pnfs_layoutget_res *res)
++{
++	enum nfsstat4 rc = NFS4_OK;
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++
++	dprintk("--> %s: inode=%p\n", __func__, inode);
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = true;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = arg->lg_sbid;
++	layout->device_id.devid = 1;				/*FSFTEMP*/
++	layout->lg_first_stripe_index = 0;			/*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	memcpy(fhp, arg->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	dprintk("<-- %s: return %d\n", __func__, rc);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++pnfsd_lexp_layout_commit(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutcommit_arg *args,
++			 struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int
++pnfsd_lexp_layout_return(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int pnfsd_lexp_get_state(struct inode *inode, struct knfsd_fh *fh,
++				struct pnfs_get_state *p)
++{
++	return 0;	/* just use the current stateid */
++}
++
++static struct pnfs_export_operations pnfsd_lexp_ops = {
++	.layout_type = pnfsd_lexp_layout_type,
++	.get_device_info = pnfsd_lexp_get_device_info,
++	.get_device_iter = pnfsd_lexp_get_device_iter,
++	.layout_get = pnfsd_lexp_layout_get,
++	.layout_commit = pnfsd_lexp_layout_commit,
++	.layout_return = pnfsd_lexp_layout_return,
++	.get_state = pnfsd_lexp_get_state,
++};
++
++void
++pnfsd_lexp_init(struct inode *inode)
++{
++	dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
++	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-31 20:42:05.570119170 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-31 20:42:05.570119170 -0400
+@@ -0,0 +1,535 @@
++/*
++ * fs/nfsd/spnfs_com.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ * Based heavily on idmap.c
++ *
++ */
++
++/*
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/nfsd/debug.h>
++
++#include <linux/nfsd4_spnfs.h>
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PROC
++
++static ssize_t   spnfs_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++		     char __user *, size_t);
++static ssize_t   spnfs_pipe_downcall(struct file *, const char __user *,
++		     size_t);
++static void      spnfs_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops spnfs_upcall_ops = {
++	.upcall		= spnfs_pipe_upcall,
++	.downcall	= spnfs_pipe_downcall,
++	.destroy_msg	= spnfs_pipe_destroy_msg,
++};
++
++/* evil global variable */
++struct spnfs *global_spnfs;
++struct spnfs_config *spnfs_config;
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++int spnfs_use_layoutsegments;
++uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++/*
++ * Used by spnfs_enabled()
++ * Tracks if the subsystem has been initialized at some point.  It doesn't
++ * matter if it's not currently initialized.
++ */
++static int spnfs_enabled_at_some_point;
++
++/* call this to start the ball rolling */
++/* code it like we're going to avoid the global variable in the future */
++int
++nfsd_spnfs_new(void)
++{
++	struct spnfs *spnfs = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	if (global_spnfs != NULL)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	spnfs = kzalloc(sizeof(*spnfs), GFP_KERNEL);
++	if (spnfs == NULL){
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	spnfs->spnfs_dentry = rpc_mkpipe(nd.path.dentry, "spnfs", spnfs,
++					 &spnfs_upcall_ops, 0);
++	if (IS_ERR(spnfs->spnfs_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++
++	mutex_init(&spnfs->spnfs_lock);
++	mutex_init(&spnfs->spnfs_plock);
++	init_waitqueue_head(&spnfs->spnfs_wq);
++
++	global_spnfs = spnfs;
++	spnfs_enabled_at_some_point = 1;
++
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(spnfs);
++	return rc;
++}
++
++/* again, code it like we're going to remove the global variable */
++void
++nfsd_spnfs_delete(void)
++{
++	struct spnfs *spnfs = global_spnfs;
++
++	if (!spnfs)
++		return;
++	rpc_unlink(spnfs->spnfs_dentry);
++	rpc_put_mount();
++	global_spnfs = NULL;
++	kfree(spnfs);
++}
++
++/* RPC pipefs upcall/downcall routines */
++/* looks like this code is invoked by the rpc_pipe code */
++/* to handle upcalls on things we've queued elsewhere */
++/* See nfs_idmap_id for an exmaple of enqueueing */
++static ssize_t
++spnfs_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
++    char __user *dst, size_t buflen)
++{
++	char *data = (char *)msg->data + msg->copied;
++	ssize_t mlen = msg->len - msg->copied;
++	ssize_t left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen -= left;
++	msg->copied += mlen;
++	msg->errno = 0;
++	return mlen;
++}
++
++static ssize_t
++spnfs_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
++	struct spnfs *spnfs = (struct spnfs *)rpci->private;
++	struct spnfs_msg *im_in = NULL, *im = &spnfs->spnfs_im;
++	int ret;
++
++	if (mlen != sizeof(struct spnfs_msg))
++		return -ENOSPC;
++
++	im_in = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im_in == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(im_in, src, mlen) != 0)
++		return -EFAULT;
++
++	mutex_lock(&spnfs->spnfs_plock);
++
++	ret = mlen;
++	im->im_status = im_in->im_status;
++	/* If we got an error, terminate now, and wake up pending upcalls */
++	if (!(im_in->im_status & SPNFS_STATUS_SUCCESS)) {
++		wake_up(&spnfs->spnfs_wq);
++		goto out;
++	}
++
++	ret = -EINVAL;
++	/* Did we match the current upcall? */
++	/* DMXXX: do not understand the comment above, from original code */
++	/* DMXXX: when do we _not_ match the current upcall? */
++	/* DMXXX: anyway, let's to a simplistic check */
++	if (im_in->im_type == im->im_type) {
++		/* copy the response into the spnfs struct */
++		memcpy(&im->im_res, &im_in->im_res, sizeof(im->im_res));
++		ret = mlen;
++	} else
++		dprintk("spnfs: downcall type != upcall type\n");
++
++
++	wake_up(&spnfs->spnfs_wq);
++/* DMXXX handle rval processing */
++out:
++	mutex_unlock(&spnfs->spnfs_plock);
++	kfree(im_in);
++	return ret;
++}
++
++static void
++spnfs_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	struct spnfs_msg *im = msg->data;
++	struct spnfs *spnfs = container_of(im, struct spnfs, spnfs_im);
++
++	if (msg->errno >= 0)
++		return;
++	mutex_lock(&spnfs->spnfs_plock);
++	im->im_status = SPNFS_STATUS_FAIL;  /* DMXXX */
++	wake_up(&spnfs->spnfs_wq);
++	mutex_unlock(&spnfs->spnfs_plock);
++}
++
++/* generic upcall.  called by functions in spnfs_ops.c  */
++int
++spnfs_upcall(struct spnfs *spnfs, struct spnfs_msg *upmsg,
++		union spnfs_msg_res *res)
++{
++	struct rpc_pipe_msg msg;
++	struct spnfs_msg *im;
++	DECLARE_WAITQUEUE(wq, current);
++	int ret = -EIO;
++	int rval;
++
++	im = &spnfs->spnfs_im;
++
++	mutex_lock(&spnfs->spnfs_lock);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	memset(im, 0, sizeof(*im));
++	memcpy(im, upmsg, sizeof(*upmsg));
++
++	memset(&msg, 0, sizeof(msg));
++	msg.data = im;
++	msg.len = sizeof(*im);
++
++	add_wait_queue(&spnfs->spnfs_wq, &wq);
++	rval = rpc_queue_upcall(spnfs->spnfs_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&spnfs->spnfs_wq, &wq);
++		goto out;
++	}
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&spnfs->spnfs_plock);
++	schedule();
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&spnfs->spnfs_wq, &wq);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	if (im->im_status & SPNFS_STATUS_SUCCESS) {
++		/* copy our result from the upcall */
++		memcpy(res, &im->im_res, sizeof(*res));
++		ret = 0;
++	}
++
++out:
++	memset(im, 0, sizeof(*im));
++	mutex_unlock(&spnfs->spnfs_plock);
++	mutex_unlock(&spnfs->spnfs_lock);
++	return(ret);
++}
++
++/*
++ * This is used to determine if the spnfsd daemon has been started at
++ * least once since the system came up.  This is used to by the export
++ * mechanism to decide if spnfs is in use.
++ *
++ * Returns non-zero if the spnfsd has initialized the communication pipe
++ * at least once.
++ */
++int spnfs_enabled(void)
++{
++	return spnfs_enabled_at_some_point;
++}
++
++#ifdef CONFIG_PROC_FS
++
++/*
++ * procfs virtual files for user/kernel space communication:
++ *
++ * ctl - currently just an on/off switch...can be expanded
++ * getfh - fd to fh conversion
++ * recall - recall a layout from the command line, for example:
++ *		echo <path> > /proc/fs/spnfs/recall
++ * config - configuration info, e.g., stripe size, num ds, etc.
++ */
++
++/*-------------- start ctl -------------------------*/
++static ssize_t ctl_write(struct file *file, const char __user *buf,
++			 size_t count, loff_t *offset)
++{
++	int cmd, rc;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (cmd) {
++		rc = nfsd_spnfs_new();
++		if (rc != 0)
++			return rc;
++	} else
++		nfsd_spnfs_delete();
++
++	return count;
++}
++
++static const struct file_operations ctl_ops = {
++	.write		= ctl_write,
++};
++/*-------------- end ctl ---------------------------*/
++
++/*-------------- start config -------------------------*/
++static ssize_t config_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	static struct spnfs_config cfg;
++
++	if (copy_from_user(&cfg, buf, count))
++		return -EFAULT;
++
++	spnfs_config = &cfg;
++	return 0;
++}
++
++static const struct file_operations config_ops = {
++	.write		= config_write,
++};
++/*-------------- end config ---------------------------*/
++
++/*-------------- start getfh -----------------------*/
++static int getfh_open(struct inode *inode, struct file *file)
++{
++	file->private_data = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
++	if (file->private_data == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static ssize_t getfh_read(struct file *file, char __user *buf, size_t count,
++			  loff_t *offset)
++{
++	if (copy_to_user(buf, file->private_data, sizeof(struct nfs_fh)))
++		return -EFAULT;
++
++	return count;
++}
++
++static ssize_t getfh_write(struct file *file, const char __user *buf,
++			   size_t count, loff_t *offset)
++{
++	int fd;
++
++	if (copy_from_user((int *)&fd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (spnfs_getfh(fd, file->private_data) != 0)
++		return -EIO;
++
++	return count;
++}
++
++static int getfh_release(struct inode *inode, struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static const struct file_operations getfh_ops = {
++	.open		= getfh_open,
++	.read		= getfh_read,
++	.write		= getfh_write,
++	.release	= getfh_release,
++};
++/*-------------- end getfh ------------------------*/
++
++
++/*-------------- start recall layout --------------*/
++static ssize_t recall_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	char input[128];
++	char *path, *str, *p;
++	int rc;
++	u64 off = 0, len = 0;
++
++	if (count > 128)
++		return -EINVAL;
++
++	if (copy_from_user(input, buf, count))
++		return -EFAULT;
++
++	/* assumes newline-terminated path */
++	p = memchr(input, '\n', count);
++	if (p == NULL)
++		return -EINVAL;
++	*p = '\0';
++
++	/*
++	 * Scan for path and, optionally, an offset and length
++	 * of a layout segment to be recalled; if there are two
++	 * fields, they're assumed to be path and offset.
++	 */
++	p = input;
++	path = strsep(&p, " ");
++	if (path == NULL)
++		return -EINVAL;
++
++	str = strsep(&p, " ");
++	if (str != NULL) {
++		rc = strict_strtoull(str, 10, &off);
++		if (rc != 0)
++			return -EINVAL;
++
++		str = strsep(&p, " ");
++		if (str != NULL) {
++			rc = strict_strtoull(str, 10, &len);
++			if (rc != 0)
++				return -EINVAL;
++		}
++	}
++
++	rc = spnfs_test_layoutrecall(path, off, len);
++	if (rc != 0)
++		return rc;
++
++	return count;
++}
++
++static const struct file_operations recall_ops = {
++	.write		= recall_write,
++};
++/*-------------- end recall layout --------------*/
++
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++/*-------------- start layoutseg -------------------------*/
++static ssize_t layoutseg_write(struct file *file, const char __user *buf,
++			       size_t count, loff_t *offset)
++{
++	char cmd[3];
++
++	if (copy_from_user(cmd, buf, 1))
++		return -EFAULT;
++	if (cmd[0] == '0')
++		spnfs_use_layoutsegments = 0;
++	else
++		spnfs_use_layoutsegments = 1;
++
++	return count;
++}
++
++static const struct file_operations layoutseg_ops = {
++	.write		= layoutseg_write,
++};
++/*-------------- end layoutseg ---------------------------*/
++
++/*-------------- start layoutsegsize -------------------------*/
++static ssize_t layoutsegsize_write(struct file *file, const char __user *buf,
++				   size_t count, loff_t *offset)
++{
++	char cmd[50];
++
++	if (copy_from_user(cmd, buf, 49))
++		return -EFAULT;
++	layoutsegment_size = simple_strtoull(cmd, NULL, 10);
++
++	return count;
++}
++
++static const struct file_operations layoutsegsize_ops = {
++	.write		= layoutsegsize_write,
++};
++/*-------------- end layoutsegsize ---------------------------*/
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++int
++spnfs_init_proc(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = proc_mkdir("fs/spnfs", NULL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry = create_proc_entry("fs/spnfs/ctl", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &ctl_ops;
++
++	entry = create_proc_entry("fs/spnfs/config", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &config_ops;
++
++	entry = create_proc_entry("fs/spnfs/getfh", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &getfh_ops;
++
++	entry = create_proc_entry("fs/spnfs/recall", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &recall_ops;
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++	entry = create_proc_entry("fs/spnfs/layoutseg", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutseg_ops;
++
++	entry = create_proc_entry("fs/spnfs/layoutsegsize", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutsegsize_ops;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	return 0;
++}
++#endif /* CONFIG_PROC_FS */
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-31 20:42:05.571097807 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-31 20:42:05.572091128 -0400
+@@ -0,0 +1,878 @@
++/*
++ * fs/nfsd/spnfs_ops.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ *
++ */
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/namei.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++/* comment out CONFIG_SPNFS_TEST for non-test behaviour */
++/* #define CONFIG_SPNFS_TEST 1 */
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PNFS
++
++/*
++ * The functions that are called from elsewhere in the kernel
++ * to perform tasks in userspace
++ *
++ */
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++extern int spnfs_use_layoutsegments;
++extern uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++extern struct spnfs *global_spnfs;
++
++int
++spnfs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++enum nfsstat4
++spnfs_layoutget(struct inode *inode, struct exp_xdr_stream *xdr,
++		const struct nfsd4_pnfs_layoutget_arg *lg_arg,
++		struct nfsd4_pnfs_layoutget_res *lg_res)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct pnfs_filelayout_layout *flp = NULL;
++	int status, i;
++	enum nfsstat4 nfserr;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	im->im_type = SPNFS_TYPE_LAYOUTGET;
++	im->im_args.layoutget_args.inode = inode->i_ino;
++	im->im_args.layoutget_args.generation = inode->i_generation;
++
++	/* call function to queue the msg for upcall */
++	if (spnfs_upcall(spnfs, im, res) != 0) {
++		dprintk("failed spnfs upcall: layoutget\n");
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto layoutget_cleanup;
++	}
++	status = res->layoutget_res.status;
++	if (status != 0) {
++		/* FIXME? until user mode is fixed, translate system error */
++		switch (status) {
++		case -E2BIG:
++		case -ETOOSMALL:
++			nfserr = NFS4ERR_TOOSMALL;
++			break;
++		case -ENOMEM:
++		case -EAGAIN:
++		case -EINTR:
++			nfserr = NFS4ERR_LAYOUTTRYLATER;
++			break;
++		case -ENOENT:
++			nfserr = NFS4ERR_BADLAYOUT;
++			break;
++ 		default:
++			nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		}
++		dprintk("spnfs layout_get upcall: status=%d nfserr=%u\n",
++			status, nfserr);
++		goto layoutget_cleanup;
++	}
++
++	lg_res->lg_return_on_close = 0;
++#if defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	/* if spnfs_use_layoutsegments & layoutsegment_size == 0, use */
++	/* the amount requested by the client.			      */
++	if (spnfs_use_layoutsegments) {
++		if (layoutsegment_size != 0)
++			lg_res->lg_seg.length = layoutsegment_size;
++	} else
++		lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#else
++	lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	flp = kmalloc(sizeof(struct pnfs_filelayout_layout), GFP_KERNEL);
++	if (flp == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	flp->device_id.sbid = lg_arg->lg_sbid;
++	flp->device_id.devid = res->layoutget_res.devid;
++	flp->lg_layout_type = 1; /* XXX */
++	flp->lg_stripe_type = res->layoutget_res.stripe_type;
++	flp->lg_commit_through_mds = 0;
++	flp->lg_stripe_unit =  res->layoutget_res.stripe_size;
++	flp->lg_first_stripe_index = 0;
++	flp->lg_pattern_offset = 0;
++	flp->lg_fh_length = res->layoutget_res.stripe_count;
++
++	flp->lg_fh_list = kmalloc(flp->lg_fh_length * sizeof(struct knfsd_fh),
++				  GFP_KERNEL);
++	if (flp->lg_fh_list == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	/*
++	 * FIX: Doing an extra copy here.  Should group res.flist's fh_len
++	 * and fh_val into a knfsd_fh structure.
++	 */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		flp->lg_fh_list[i].fh_size = res->layoutget_res.flist[i].fh_len;
++		memcpy(&flp->lg_fh_list[i].fh_base,
++		       res->layoutget_res.flist[i].fh_val,
++		       res->layoutget_res.flist[i].fh_len);
++	}
++
++	/* encode the layoutget body */
++	nfserr = filelayout_encode_layout(xdr, flp);
++
++layoutget_cleanup:
++	if (flp) {
++		if (flp->lg_fh_list)
++			kfree(flp->lg_fh_list);
++		kfree(flp);
++	}
++	kfree(im);
++	kfree(res);
++
++	return nfserr;
++}
++
++int
++spnfs_layoutcommit(void)
++{
++	return 0;
++}
++
++int
++spnfs_layoutreturn(struct inode *inode,
++		   const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	return 0;
++}
++
++int
++spnfs_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block *sb;
++	struct nfsd4_pnfs_cb_layout lr;
++
++	switch (type) {
++	case RETURN_FILE:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for ino = %lu\n",
++			__func__, inode->i_ino);
++		break;
++	case RETURN_FSID:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++			__func__);
++		return 0;
++	case RETURN_ALL:
++		/* XXX figure out how to get a sb since there's no inode ptr */
++		dprintk("%s: recalling all layouts (unimplemented)\n",
++			__func__);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	lr.cbl_recall_type = type;
++	lr.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	lr.cbl_seg.clientid = 0;
++	lr.cbl_seg.offset = offset;
++	lr.cbl_seg.length = len;
++	lr.cbl_seg.iomode = IOMODE_ANY;
++	lr.cbl_layoutchanged = 0;
++
++	nfsd_layout_recall_cb(sb, inode, &lr);
++
++	return 0;
++}
++
++
++int
++spnfs_test_layoutrecall(char *path, u64 offset, u64 len)
++{
++	struct nameidata nd;
++	struct inode *inode;
++	int type, rc;
++
++	dprintk("%s: path=%s, offset=%llu, len=%llu\n",
++		__func__, path, offset, len);
++
++	if (strcmp(path, "all") == 0) {
++		inode = NULL;
++		type = RETURN_ALL;
++	} else {
++		rc = path_lookup(path, 0, &nd);
++		if (rc != 0)
++			return -ENOENT;
++
++		/*
++		 * XXX todo: add a RETURN_FSID scenario here...maybe if
++		 * inode is a dir...
++		 */
++
++		inode = nd.path.dentry->d_inode;
++		type = RETURN_FILE;
++	}
++
++	if (len == 0)
++		len = NFS4_MAX_UINT64;
++
++	rc = spnfs_layoutrecall(inode, type, offset, len);
++
++	if (type != RETURN_ALL)
++		path_put(&nd.path);
++	return rc;
++}
++
++int
++spnfs_getdeviceiter(struct super_block *sb,
++		    u32 layout_type,
++		    struct nfsd4_pnfs_dev_iter_res *gd_res)
++{
++	struct spnfs *spnfs = global_spnfs;   /* XXX keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEITER;
++	im->im_args.getdeviceiter_args.cookie = gd_res->gd_cookie;
++	im->im_args.getdeviceiter_args.verf = gd_res->gd_verf;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceiter_out;
++	}
++	status = res->getdeviceiter_res.status;
++
++	if (res->getdeviceiter_res.eof)
++		gd_res->gd_eof = 1;
++	else {
++		gd_res->gd_devid = res->getdeviceiter_res.devid;
++		gd_res->gd_cookie = res->getdeviceiter_res.cookie;
++		gd_res->gd_verf = res->getdeviceiter_res.verf;
++		gd_res->gd_eof = 0;
++	}
++
++getdeviceiter_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++#ifdef CONFIG_SPNFS_TEST
++/*
++ * Setup the rq_res xdr_buf.  The svc_rqst rq_respages[1] page contains the
++ * 1024 encoded stripe indices.
++ *
++ * Skip the devaddr4 length and encode the indicies count (1024) in the
++ * rq_res.head and set the rq_res.head length.
++ *
++ * Set the rq_res page_len to 4096 (for the 1024 stripe indices).
++ * Set the rq_res xdr_buf tail base to rq_respages[0] just after the
++ * rq_res head to hold the rest of the getdeviceinfo return.
++ *
++ * So rq_respages[rq_resused - 1] contains the rq_res.head and rq_res.tail and
++ * rq_respages[rq_resused] contains the rq_res.pages.
++ */
++static int spnfs_test_indices_xdr(struct pnfs_xdr_info *info,
++				  const struct pnfs_filelayout_device *fdev)
++{
++	struct nfsd4_compoundres *resp = info->resp;
++	struct svc_rqst *rqstp = resp->rqstp;
++	struct xdr_buf *xb = &resp->rqstp->rq_res;
++	__be32 *p;
++
++	p = nfsd4_xdr_reserve_space(resp, 8);
++	p++; /* Fill in length later */
++	*p++ = cpu_to_be32(fdev->fl_stripeindices_length); /* 1024 */
++	resp->p = p;
++
++	xb->head[0].iov_len = (char *)resp->p - (char *)xb->head[0].iov_base;
++	xb->pages = &rqstp->rq_respages[rqstp->rq_resused];
++	xb->page_base = 0;
++	xb->page_len = PAGE_SIZE; /* page of 1024 encoded indices */
++	xb->tail[0].iov_base = resp->p;
++	resp->end = xb->head[0].iov_base + PAGE_SIZE;
++	xb->tail[0].iov_len = (char *)resp->end - (char *)resp->p;
++	return 0;
++}
++/*
++ * Return a stripeindices of length 1024 to test
++ * the pNFS client multipage getdeviceinfo implementation.
++ *
++ * Encode a page of stripe indices.
++ */
++static void spnfs_set_test_indices(struct pnfs_filelayout_device *fldev,
++				  struct spnfs_device *dev,
++				  struct pnfs_devinfo_arg *info)
++{
++	struct svc_rqst *rqstp = info->xdr.resp->rqstp;
++	__be32 *p;
++	int i, j = 0;
++
++	p = (__be32 *)page_address(rqstp->rq_respages[rqstp->rq_resused]);
++	fldev->fl_stripeindices_length = 1024;
++	/* round-robin the data servers device index into the stripe indicie */
++	for (i = 0; i < 1024; i++) {
++		*p++ = cpu_to_be32(j);
++		if (j < dev->dscount - 1)
++			j++;
++		else
++			j = 0;
++	}
++	fldev->fl_stripeindices_list = NULL;
++}
++#endif /* CONFIG_SPNFS_TEST */
++
++int
++spnfs_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    u32 layout_type,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct spnfs *spnfs = global_spnfs;
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct spnfs_device *dev;
++	struct pnfs_filelayout_device *fldev = NULL;
++	struct pnfs_filelayout_multipath *mp = NULL;
++	struct pnfs_filelayout_devaddr *fldap = NULL;
++	int status = 0, i, len;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEINFO;
++	/* XXX FIX: figure out what to do about fsid */
++	im->im_args.getdeviceinfo_args.devid = devid->devid;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceinfo_out;
++	}
++	status = res->getdeviceinfo_res.status;
++	if (status != 0)
++		goto getdeviceinfo_out;
++
++	dev = &res->getdeviceinfo_res.devinfo;
++
++	/* Fill in the device data, i.e., nfs4_1_file_layout_ds_addr4 */
++	fldev = kzalloc(sizeof(struct pnfs_filelayout_device), GFP_KERNEL);
++	if (fldev == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	/*
++	 * Stripe count is the same as data server count for our purposes
++	 */
++	fldev->fl_stripeindices_length = dev->dscount;
++	fldev->fl_device_length = dev->dscount;
++
++	/* Set stripe indices */
++#ifdef CONFIG_SPNFS_TEST
++	spnfs_set_test_indices(fldev, dev, info);
++	fldev->fl_enc_stripe_indices = spnfs_test_indices_xdr;
++#else /* CONFIG_SPNFS_TEST */
++	fldev->fl_stripeindices_list =
++		kmalloc(fldev->fl_stripeindices_length * sizeof(u32),
++			GFP_KERNEL);
++	if (fldev->fl_stripeindices_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_stripeindices_length; i++)
++		fldev->fl_stripeindices_list[i] = i;
++#endif /* CONFIG_SPNFS_TEST */
++
++	/*
++	 * Set the device's data server addresses  No multipath for spnfs,
++	 * so mp length is always 1.
++	 *
++	 */
++	fldev->fl_device_list =
++		kmalloc(fldev->fl_device_length *
++			sizeof(struct pnfs_filelayout_multipath),
++			GFP_KERNEL);
++	if (fldev->fl_device_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_device_length; i++) {
++		mp = &fldev->fl_device_list[i];
++		mp->fl_multipath_length = 1;
++		mp->fl_multipath_list =
++			kmalloc(sizeof(struct pnfs_filelayout_devaddr),
++				GFP_KERNEL);
++		if (mp->fl_multipath_list == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		fldap = mp->fl_multipath_list;
++
++		/*
++		 * Copy the netid into the device address, for example: "tcp"
++		 */
++		len = strlen(dev->dslist[i].netid);
++		fldap->r_netid.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_netid.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_netid.data, dev->dslist[i].netid, len);
++		fldap->r_netid.len = len;
++
++		/*
++		 * Copy the network address into the device address,
++		 * for example: "10.35.9.16.08.01"
++		 */
++		len = strlen(dev->dslist[i].addr);
++		fldap->r_addr.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_addr.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_addr.data, dev->dslist[i].addr, len);
++		fldap->r_addr.len = len;
++	}
++
++	/* encode the device data */
++	status = filelayout_encode_devinfo(xdr, fldev);
++
++getdeviceinfo_out:
++	if (fldev) {
++		kfree(fldev->fl_stripeindices_list);
++		if (fldev->fl_device_list) {
++			for (i = 0; i < fldev->fl_device_length; i++) {
++				fldap =
++				    fldev->fl_device_list[i].fl_multipath_list;
++				kfree(fldap->r_netid.data);
++				kfree(fldap->r_addr.data);
++				kfree(fldap);
++			}
++			kfree(fldev->fl_device_list);
++		}
++		kfree(fldev);
++	}
++
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_setattr(void)
++{
++	return 0;
++}
++
++int
++spnfs_open(struct inode *inode, struct nfsd4_open *open)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	im->im_type = SPNFS_TYPE_OPEN;
++	im->im_args.open_args.inode = inode->i_ino;
++	im->im_args.open_args.generation = inode->i_generation;
++	im->im_args.open_args.create = open->op_create;
++	im->im_args.open_args.createmode = open->op_createmode;
++	im->im_args.open_args.truncate = open->op_truncate;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto open_out;
++	}
++	status = res->open_res.status;
++
++open_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_create(void)
++{
++	return 0;
++}
++
++/*
++ * Invokes the spnfsd with the inode number of the object to remove.
++ * The file has already been removed on the MDS, so all the spnsfd
++ * daemon does is remove the stripes.
++ * Returns 0 on success otherwise error code
++ */
++int
++spnfs_remove(unsigned long ino, unsigned long generation)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	im->im_type = SPNFS_TYPE_REMOVE;
++	im->im_args.remove_args.inode = ino;
++	im->im_args.remove_args.generation = generation;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto remove_out;
++	}
++	status = res->remove_res.status;
++
++remove_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++static int
++read_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	 struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_read(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		if (err == 0)
++			break;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++     struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDONLY | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0 ; vnum < vlen ; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = read_one(inode, offset + bytecount, iolen,
++			       (char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err < 0) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		if (err < iolen) {
++			bytecount += err;
++			goto read_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++read_out:
++	*lenp = bytecount;
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++	return status;
++}
++
++__be32
++spnfs_read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++	   struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return read(inode, offset, lenp, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++static int
++write_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	  struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_write(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++write(struct inode *inode, loff_t offset, size_t len, int vlen,
++      struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDWR | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto write_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0; vnum < vlen; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = write_one(inode, offset + bytecount, iolen,
++				(char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err != iolen) {
++			dprintk("spnfs_write: err=%d expected %Zd\n", err, len);
++			status = nfserr_io;
++			goto write_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++write_out:
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++
++	return status;
++}
++
++__be32
++spnfs_write(struct inode *inode, loff_t offset, size_t len, int vlen,
++	    struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return write(inode, offset, len, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++int
++spnfs_commit(void)
++{
++	return 0;
++}
++
++/*
++ * Return the state for this object.
++ * At this time simply return 0 to indicate success and use the existing state
++ */
++int
++spnfs_get_state(struct inode *inode, struct knfsd_fh *fh, struct pnfs_get_state *arg)
++{
++	return 0;
++}
++
++/*
++ * Return the filehandle for the specified file descriptor
++ */
++int
++spnfs_getfh(int fd, struct nfs_fh *fh)
++{
++	struct file *file;
++
++	file = fget(fd);
++	if (file == NULL)
++		return -EIO;
++
++	memcpy(fh, NFS_FH(file->f_dentry->d_inode), sizeof(struct nfs_fh));
++	fput(file);
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-31 20:41:19.205016844 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-31 20:42:05.572091128 -0400
+@@ -242,6 +242,12 @@ struct nfs4_client {
+ 	u32			cl_cb_seq_nr;
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
++#if defined(CONFIG_PNFSD)
++	struct list_head	cl_layouts;	/* outstanding layouts */
++	struct list_head	cl_layoutrecalls; /* outstanding layoutrecall
++						     callbacks */
++	atomic_t		cl_deviceref;	/* Num outstanding devs */
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static inline void
+@@ -342,12 +348,31 @@ struct nfs4_file {
+ 	struct list_head        fi_hash;    /* hash by "struct inode *" */
+ 	struct list_head        fi_stateids;
+ 	struct list_head	fi_delegations;
++#if defined(CONFIG_PNFSD)
++	struct list_head	fi_layouts;
++	struct list_head	fi_layout_states;
++#endif /* CONFIG_PNFSD */
+ 	struct inode		*fi_inode;
+ 	u32                     fi_id;      /* used with stateowner->so_id 
+ 					     * for stateid_hashtbl hash */
+ 	bool			fi_had_conflict;
++#if defined(CONFIG_PNFSD)
++	/* used by layoutget / layoutrecall */
++	struct nfs4_fsid	fi_fsid;
++	u32			fi_fhlen;
++	u8			fi_fhval[NFS4_FHSIZE];
++#endif /* CONFIG_PNFSD */
+ };
+ 
++#if defined(CONFIG_PNFSD)
++/* pNFS Metadata server state */
++
++struct pnfs_ds_dev_entry {
++	struct list_head	dd_dev_entry; /* st_pnfs_ds_id entry */
++	u32			dd_dsid;
++};
++#endif /* CONFIG_PNFSD */
++
+ /*
+ * nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+ *
+@@ -370,6 +395,9 @@ struct nfs4_stateid {
+ 	struct list_head              st_perfile;
+ 	struct list_head              st_perstateowner;
+ 	struct list_head              st_lockowners;
++#if defined(CONFIG_PNFSD)
++	struct list_head              st_pnfs_ds_id;
++#endif /* CONFIG_PNFSD */
+ 	struct nfs4_stateowner      * st_stateowner;
+ 	struct nfs4_file            * st_file;
+ 	stateid_t                     st_stateid;
+@@ -421,6 +449,34 @@ extern void nfsd4_recdir_purge_old(void)
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+ extern void release_session_client(struct nfsd4_session *);
++extern void nfsd4_free_slab(struct kmem_cache **);
++extern struct nfs4_file *find_file(struct inode *);
++extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
++extern void put_nfs4_file(struct nfs4_file *);
++extern void get_nfs4_file(struct nfs4_file *);
++extern struct nfs4_client *find_confirmed_client(clientid_t *);
++extern struct nfs4_stateid *find_stateid(stateid_t *, int flags);
++extern struct nfs4_delegation *find_delegation_stateid(struct inode *, stateid_t *);
++extern __be32 nfs4_check_stateid(stateid_t *);
++extern void expire_client_lock(struct nfs4_client *);
++extern int filter_confirmed_clients(int (* func)(struct nfs4_client *, void *), void *);
++
++#if defined(CONFIG_PNFSD)
++extern int nfsd4_init_pnfs_slabs(void);
++extern void nfsd4_free_pnfs_slabs(void);
++extern void pnfs_expire_client(struct nfs4_client *);
++extern void release_pnfs_ds_dev_list(struct nfs4_stateid *);
++extern void nfs4_pnfs_state_init(void);
++extern void nfs4_pnfs_state_shutdown(void);
++extern void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++extern int nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *, stateid_t *);
++#else /* CONFIG_PNFSD */
++static inline void nfsd4_free_pnfs_slabs(void) {}
++static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
++static inline void pnfs_expire_client(struct nfs4_client *clp) {}
++static inline void release_pnfs_ds_dev_list(struct nfs4_stateid *stp) {}
++static inline void nfs4_pnfs_state_shutdown(void) {}
++#endif /* CONFIG_PNFSD */
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+@@ -434,4 +490,24 @@ nfs4_get_stateowner(struct nfs4_stateown
+ 	kref_get(&so->so_ref);
+ }
+ 
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
+ #endif   /* NFSD4_STATE_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-31 20:41:17.275233561 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-31 20:42:05.573121119 -0400
+@@ -37,7 +37,12 @@
+ #ifdef CONFIG_NFSD_V4
+ #include <linux/nfs4_acl.h>
+ #include <linux/nfsd_idmap.h>
++#include <linux/security.h>
++#include <linux/nfsd4_spnfs.h>
+ #endif /* CONFIG_NFSD_V4 */
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
+ 
+ #include "nfsd.h"
+ #include "vfs.h"
+@@ -383,6 +388,12 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ 					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
+ 			if (err)
+ 				goto out;
++#if defined(CONFIG_SPNFS_BLOCK)
++			if (pnfs_block_enabled(inode, 0)) {
++				err = bl_layoutrecall(inode, RETURN_FILE,
++				    iap->ia_size, inode->i_size - iap->ia_size);
++			}
++#endif /* CONFIG_SPNFS_BLOCK */
+ 		}
+ 
+ 		/*
+@@ -1703,6 +1714,11 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	struct inode	*fdir, *tdir;
+ 	__be32		err;
+ 	int		host_err;
++#ifdef CONFIG_SPNFS
++	unsigned long ino = 0;
++	unsigned long generation = 0;
++	unsigned int nlink = 0;
++#endif /* CONFIG_SPNFS */
+ 
+ 	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
+ 	if (err)
+@@ -1766,7 +1782,26 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	if (host_err)
+ 		goto out_dput_new;
+ 
++#ifdef CONFIG_SPNFS
++	/*
++	 * if the target is a preexisting regular file, remember the
++	 * inode number and generation so we can delete the stripes;
++	 * save the link count as well so that the stripes only get
++	 * get deleted when the last link is deleted
++	 */
++	if (ndentry && ndentry->d_inode && S_ISREG(ndentry->d_inode->i_mode)) {
++		ino = ndentry->d_inode->i_ino;
++		generation = ndentry->d_inode->i_generation;
++		nlink = ndentry->d_inode->i_nlink;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
++#ifdef CONFIG_SPNFS
++	if (spnfs_enabled() && (!host_err && ino && nlink == 1))
++		spnfs_remove(ino, generation);
++#endif /* CONFIG_SPNFS */
++
+ 	if (!host_err) {
+ 		host_err = commit_metadata(tfhp);
+ 		if (!host_err)
+@@ -1807,6 +1842,11 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	struct inode	*dirp;
+ 	__be32		err;
+ 	int		host_err;
++#if defined(CONFIG_SPNFS)
++	unsigned long	ino;
++	unsigned long	generation;
++	unsigned int	nlink;
++#endif /* defined(CONFIG_SPNFS) */
+ 
+ 	err = nfserr_acces;
+ 	if (!flen || isdotent(fname, flen))
+@@ -1830,6 +1870,17 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 		goto out;
+ 	}
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * Remember the inode number to communicate to the spnfsd
++	 * for removal of stripes; save the link count as well so that
++	 * the stripes only get get deleted when the last link is deleted
++	 */
++	ino = rdentry->d_inode->i_ino;
++	generation = rdentry->d_inode->i_generation;
++	nlink = rdentry->d_inode->i_nlink;
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	if (!type)
+ 		type = rdentry->d_inode->i_mode & S_IFMT;
+ 
+@@ -1854,6 +1905,29 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	if (!host_err)
+ 		host_err = commit_metadata(fhp);
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * spnfs: notify spnfsd of removal to destroy stripes
++	 */
++/*
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (sb->s_export_op->spnfs_remove) {
++*/
++	dprintk("%s check if spnfs_enabled\n", __FUNCTION__);
++	if (spnfs_enabled() && nlink == 1) {
++		BUG_ON(ino == 0);
++		dprintk("%s calling spnfs_remove inumber=%ld\n",
++			__FUNCTION__, ino);
++		if (spnfs_remove(ino, generation) == 0) {
++			dprintk("%s spnfs_remove success\n", __FUNCTION__);
++		} else {
++			/* XXX How do we make this atomic? */
++			printk(KERN_WARNING "nfsd: pNFS could not "
++				"remove stripes for inode: %ld\n", ino);
++		}
++	}
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ out_nfserr:
+ 	err = nfserrno(host_err);
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-31 20:41:19.206170424 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-31 20:42:05.575139084 -0400
+@@ -37,6 +37,8 @@
+ #ifndef _LINUX_NFSD_XDR4_H
+ #define _LINUX_NFSD_XDR4_H
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++
+ #include "state.h"
+ #include "nfsd.h"
+ 
+@@ -385,6 +387,51 @@ struct nfsd4_reclaim_complete {
+ 	u32 rca_one_fs;
+ };
+ 
++struct nfsd4_pnfs_getdevinfo {
++	struct nfsd4_pnfs_deviceid gd_devid;	/* request */
++	u32			gd_layout_type;	/* request */
++	u32			gd_maxcount;	/* request */
++	u32			gd_notify_types;/* request */
++	struct super_block	*gd_sb;
++};
++
++struct nfsd4_pnfs_getdevlist {
++	u32             gd_layout_type;	/* request */
++	u32		gd_maxdevices;	/* request */
++	u64		gd_cookie;	/* request - response */
++	u64		gd_verf;	/* request - response */
++	struct svc_fh 	*gd_fhp;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++struct nfsd4_pnfs_layoutget {
++	u64			lg_minlength;	/* request */
++	u32			lg_signal;	/* request */
++	u32			lg_maxcount;	/* request */
++	struct svc_fh		*lg_fhp;	/* request */
++	stateid_t		lg_sid;		/* request/response */
++	struct nfsd4_layout_seg	lg_seg;		/* request/response */
++	u32			lg_roc;		/* response */
++};
++
++struct nfsd4_pnfs_layoutcommit {
++	struct nfsd4_pnfs_layoutcommit_arg args;
++	stateid_t		lc_sid;		/* request */
++	struct nfsd4_pnfs_layoutcommit_res res;
++};
++
++enum layoutreturn_flags {
++	LR_FLAG_INTERN = 1 << 0,	/* internal return */
++	LR_FLAG_EXPIRE = 1 << 1,	/* return on client expiration */
++};
++
++struct nfsd4_pnfs_layoutreturn {
++	struct nfsd4_pnfs_layoutreturn_arg args;
++	u32			lr_flags;
++	stateid_t		lr_sid;		/* request/resopnse */
++	u32			lrs_present;	/* response */
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -426,6 +473,13 @@ struct nfsd4_op {
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
+ 		struct nfsd4_reclaim_complete	reclaim_complete;
++#if defined(CONFIG_PNFSD)
++		struct nfsd4_pnfs_getdevlist	pnfs_getdevlist;
++		struct nfsd4_pnfs_getdevinfo	pnfs_getdevinfo;
++		struct nfsd4_pnfs_layoutget	pnfs_layoutget;
++		struct nfsd4_pnfs_layoutcommit	pnfs_layoutcommit;
++		struct nfsd4_pnfs_layoutreturn	pnfs_layoutreturn;
++#endif /* CONFIG_PNFSD */
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-31 20:41:19.146161064 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-31 20:42:05.515139585 -0400
+@@ -36,6 +36,7 @@
+ #include "internal.h"
+ #include "iostat.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_FILE
+ 
+@@ -388,12 +389,17 @@ static int nfs_write_begin(struct file *
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ 	struct page *page;
+ 	int once_thru = 0;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+ 		file->f_path.dentry->d_name.name,
+ 		mapping->host->i_ino, len, (long long) pos);
+ 
++	pnfs_update_layout(mapping->host,
++			   nfs_file_open_context(file),
++			   0, NFS4_MAX_UINT64, IOMODE_RW,
++			   &lseg);
+ start:
+ 	/*
+ 	 * Prevent starvation issues if someone is doing a consistency
+@@ -402,17 +408,22 @@ start:
+ 	ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ 			nfs_wait_bit_killable, TASK_KILLABLE);
+ 	if (ret)
+-		return ret;
++		goto out;
+ 
+ 	page = grab_cache_page_write_begin(mapping, index, flags);
+-	if (!page)
+-		return -ENOMEM;
++	if (!page) {
++		ret = -ENOMEM;
++		goto out;
++	}
+ 	*pagep = page;
+ 
+-	ret = nfs_flush_incompatible(file, page);
++	ret = nfs_flush_incompatible(file, page, lseg);
+ 	if (ret) {
+ 		unlock_page(page);
+ 		page_cache_release(page);
++		*pagep = NULL;
++		*fsdata = NULL;
++		goto out;
+ 	} else if (!once_thru &&
+ 		   nfs_want_read_modify_write(file, page, pos, len)) {
+ 		once_thru = 1;
+@@ -421,6 +432,12 @@ start:
+ 		if (!ret)
+ 			goto start;
+ 	}
++	ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
++ out:
++	if (ret) {
++		put_lseg(lseg);
++		*fsdata = NULL;
++	}
+ 	return ret;
+ }
+ 
+@@ -430,6 +447,7 @@ static int nfs_write_end(struct file *fi
+ {
+ 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ 	int status;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+@@ -456,10 +474,17 @@ static int nfs_write_end(struct file *fi
+ 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+ 	}
+ 
+-	status = nfs_updatepage(file, page, offset, copied);
++	lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
++	status = pnfs_write_end(file, page, pos, len, copied, lseg);
++	if (status)
++		goto out;
++	status = nfs_updatepage(file, page, offset, copied, lseg, fsdata);
+ 
++ out:
+ 	unlock_page(page);
+ 	page_cache_release(page);
++	pnfs_write_end_cleanup(file, fsdata);
++	put_lseg(lseg);
+ 
+ 	if (status < 0)
+ 		return status;
+@@ -570,6 +595,8 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	/* make sure the cache has finished storing the page */
+ 	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+ 
++	/* XXX Do we want to call pnfs_update_layout here? */
++
+ 	lock_page(page);
+ 	mapping = page->mapping;
+ 	if (mapping != dentry->d_inode->i_mapping)
+@@ -580,11 +607,11 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	if (pagelen == 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_flush_incompatible(filp, page);
++	ret = nfs_flush_incompatible(filp, page, NULL);
+ 	if (ret != 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_updatepage(filp, page, 0, pagelen);
++	ret = nfs_updatepage(filp, page, 0, pagelen, NULL, NULL);
+ out_unlock:
+ 	if (!ret)
+ 		return VM_FAULT_LOCKED;
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-31 20:41:19.149170418 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-31 20:42:05.516222809 -0400
+@@ -48,6 +48,7 @@
+ #include "internal.h"
+ #include "fscache.h"
+ #include "dns_resolve.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -278,7 +279,7 @@ nfs_fhget(struct super_block *sb, struct
+ 		 */
+ 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
+ 		if (S_ISREG(inode->i_mode)) {
+-			inode->i_fop = &nfs_file_operations;
++			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ 			inode->i_data.a_ops = &nfs_file_aops;
+ 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ 		} else if (S_ISDIR(inode->i_mode)) {
+@@ -530,6 +531,68 @@ out:
+ 	return err;
+ }
+ 
++static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
++{
++	atomic_set(&l_ctx->count, 1);
++	l_ctx->lockowner = current->files;
++	l_ctx->pid = current->tgid;
++	INIT_LIST_HEAD(&l_ctx->list);
++}
++
++static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *pos;
++
++	list_for_each_entry(pos, &ctx->lock_context.list, list) {
++		if (pos->lockowner != current->files)
++			continue;
++		if (pos->pid != current->tgid)
++			continue;
++		atomic_inc(&pos->count);
++		return pos;
++	}
++	return NULL;
++}
++
++struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *res, *new = NULL;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	spin_lock(&inode->i_lock);
++	res = __nfs_find_lock_context(ctx);
++	if (res == NULL) {
++		spin_unlock(&inode->i_lock);
++		new = kmalloc(sizeof(*new), GFP_KERNEL);
++		if (new == NULL)
++			return NULL;
++		nfs_init_lock_context(new);
++		spin_lock(&inode->i_lock);
++		res = __nfs_find_lock_context(ctx);
++		if (res == NULL) {
++			list_add_tail(&new->list, &ctx->lock_context.list);
++			new->open_context = ctx;
++			res = new;
++			new = NULL;
++		}
++	}
++	spin_unlock(&inode->i_lock);
++	kfree(new);
++	return res;
++}
++
++void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
++{
++	struct nfs_open_context *ctx = l_ctx->open_context;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
++		return;
++	list_del(&l_ctx->list);
++	spin_unlock(&inode->i_lock);
++	kfree(l_ctx);
++}
++
+ /**
+  * nfs_close_context - Common close_context() routine NFSv2/v3
+  * @ctx: pointer to context
+@@ -566,11 +629,11 @@ static struct nfs_open_context *alloc_nf
+ 		path_get(&ctx->path);
+ 		ctx->cred = get_rpccred(cred);
+ 		ctx->state = NULL;
+-		ctx->lockowner = current->files;
+ 		ctx->flags = 0;
+ 		ctx->error = 0;
+ 		ctx->dir_cookie = 0;
+-		atomic_set(&ctx->count, 1);
++		nfs_init_lock_context(&ctx->lock_context);
++		ctx->lock_context.open_context = ctx;
+ 	}
+ 	return ctx;
+ }
+@@ -578,15 +641,16 @@ static struct nfs_open_context *alloc_nf
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ 	if (ctx != NULL)
+-		atomic_inc(&ctx->count);
++		atomic_inc(&ctx->lock_context.count);
+ 	return ctx;
+ }
++EXPORT_SYMBOL(get_nfs_open_context);
+ 
+ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
+ {
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 
+-	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
++	if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ 		return;
+ 	list_del(&ctx->list);
+ 	spin_unlock(&inode->i_lock);
+@@ -933,6 +997,7 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->time_start = jiffies;
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
++EXPORT_SYMBOL(nfs_fattr_init);
+ 
+ struct nfs_fattr *nfs_alloc_fattr(void)
+ {
+@@ -1142,6 +1207,14 @@ static int nfs_update_inode(struct inode
+ 		server->fsid = fattr->fsid;
+ 
+ 	/*
++	 * file needs layout commit, server attributes may be stale
++	 */
++	if (layoutcommit_needed(nfsi) && nfsi->change_attr >= fattr->change_attr) {
++		dprintk("NFS: %s: layoutcommit is needed for file %s/%ld\n",
++			__func__, inode->i_sb->s_id, inode->i_ino);
++		return 0;
++	}
++	/*
+ 	 * Update the read time so we don't revalidate too often.
+ 	 */
+ 	nfsi->read_cache_jiffies = fattr->time_start;
+@@ -1340,9 +1413,10 @@ static int nfs_update_inode(struct inode
+  */
+ void nfs4_clear_inode(struct inode *inode)
+ {
++	pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	/* If we are holding a delegation, return it! */
+ 	nfs_inode_return_delegation_noreclaim(inode);
+-	/* First call standard NFS clear_inode() code */
+ 	nfs_clear_inode(inode);
+ }
+ #endif
+@@ -1367,7 +1441,10 @@ struct inode *nfs_alloc_inode(struct sup
+ 
+ void nfs_destroy_inode(struct inode *inode)
+ {
+-	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
++	struct nfs_inode *nfsi = NFS_I(inode);
++
++	pnfs_destroy_layout(nfsi);
++	kmem_cache_free(nfs_inode_cachep, nfsi);
+ }
+ 
+ static inline void nfs4_init_once(struct nfs_inode *nfsi)
+@@ -1377,6 +1454,11 @@ static inline void nfs4_init_once(struct
+ 	nfsi->delegation = NULL;
+ 	nfsi->delegation_state = 0;
+ 	init_rwsem(&nfsi->rwsem);
++#ifdef CONFIG_NFS_V4_1
++	init_waitqueue_head(&nfsi->lo_waitq);
++	nfsi->pnfs_layout_suspend = 0;
++	nfsi->layout = NULL;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif
+ }
+ 
+@@ -1488,6 +1570,12 @@ static int __init init_nfs_fs(void)
+ 	if (err)
+ 		goto out0;
+ 
++#ifdef CONFIG_NFS_V4_1
++	err = pnfs_initialize();
++	if (err)
++		goto out00;
++#endif /* CONFIG_NFS_V4_1 */
++
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_register(&nfs_rpcstat);
+ #endif
+@@ -1498,6 +1586,10 @@ out:
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++out00:
++	pnfs_uninitialize();
++#endif /* CONFIG_NFS_V4_1 */
+ 	nfs_destroy_directcache();
+ out0:
+ 	nfs_destroy_writepagecache();
+@@ -1531,6 +1623,9 @@ static void __exit exit_nfs_fs(void)
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++	pnfs_uninitialize();
++#endif
+ 	unregister_nfs_fs();
+ 	nfs_fs_proc_exit();
+ 	nfsiod_stop();
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-31 20:41:19.149170418 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-31 20:42:05.517099944 -0400
+@@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
+ 					   struct nfs_fattr *);
+ extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
+ extern int nfs4_check_client_ready(struct nfs_client *clp);
++extern int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++		const struct sockaddr *sa2);
++extern int nfs4_set_client(struct nfs_server *server,
++		const char *hostname,
++		const struct sockaddr *addr,
++		const size_t addrlen,
++		const char *ip_addr,
++		rpc_authflavor_t authflavour,
++		int proto, const struct rpc_timeout *timeparms,
++		u32 minorversion);
+ #ifdef CONFIG_PROC_FS
+ extern int __init nfs_fs_proc_init(void);
+ extern void nfs_fs_proc_exit(void);
+@@ -201,6 +211,8 @@ extern const u32 nfs41_maxwrite_overhead
+ extern struct rpc_procinfo nfs4_procedures[];
+ #endif
+ 
++extern int nfs4_recover_expired_lease(struct nfs_client *clp);
++
+ /* proc.c */
+ void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+ 
+@@ -248,10 +260,31 @@ extern int nfs4_get_rootfh(struct nfs_se
+ #endif
+ 
+ /* read.c */
++extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
++extern int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
+ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+ 
+ /* write.c */
++extern int nfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int pnfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int nfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++			       int how);
++extern int pnfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++				int how, int pnfs);
+ extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
++extern void nfs_mark_list_commit(struct list_head *head);
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+ 		struct page *, struct page *);
+diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
+--- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-31 20:42:05.500123860 -0400
+@@ -79,10 +79,48 @@ config NFS_V4_1
+ 	depends on NFS_V4 && EXPERIMENTAL
+ 	help
+ 	  This option enables support for minor version 1 of the NFSv4 protocol
+-	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
++	  (RFC5661) including support for the parallel NFS (pNFS) features
++	  in the kernel's NFS client.
+ 
+ 	  Unless you're an NFS developer, say N.
+ 
++config PNFS_FILE_LAYOUT
++	tristate "NFS client support for the pNFS nfs-files layout (DEVELOPER ONLY)"
++	depends on NFS_FS && NFS_V4_1
++	default y
++	help
++	  This option enables support for the pNFS nfs-files layout.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFS_OBJLAYOUT
++	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
++	help
++	  Say M here if you want your pNFS client to support the Objects Layout Driver.
++	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
++	  upper level driver (SCSI_OSD_ULD).
++
++	  If unsure, say N.
++
++config PNFS_PANLAYOUT
++	tristate "Provide support for the Panasas OSD Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on PNFS_OBJLAYOUT
++	help
++	  Say M or y here if you want your pNFS client to support the Panasas OSD Layout Driver.
++
++	  If unsure, say N.
++
++config PNFS_BLOCK
++	tristate "Provide a pNFS block client (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1
++	select MD
++	select BLK_DEV_DM
++	help
++	  Say M or y here if you want your pNfs client to support the block protocol
++
++	  If unsure, say N.
++
+ config ROOT_NFS
+ 	bool "Root file system on NFS"
+ 	depends on NFS_FS=y && IP_PNP
+diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
+--- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-31 20:42:05.501268752 -0400
+@@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
+ 			   delegation.o idmap.o \
+ 			   callback.o callback_xdr.o callback_proc.o \
+ 			   nfs4namespace.o
++nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
+ nfs-$(CONFIG_SYSCTL) += sysctl.o
+ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
++
++obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
++nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
++
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
++obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-31 20:41:19.152180625 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-31 20:42:05.518232887 -0400
+@@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs3_dir_inode_operations,
+ 	.file_inode_ops	= &nfs3_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs3_proc_get_root,
+ 	.getattr	= nfs3_proc_getattr,
+ 	.setattr	= nfs3_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-31 20:42:05.519163219 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-31 20:42:05.520222923 -0400
+@@ -0,0 +1,768 @@
++/*
++ *  linux/fs/nfs/nfs4filelayout.c
++ *
++ *  Module for the pnfs nfs4 file layout driver.
++ *  Defines all I/O and Policy interface operations, plus code
++ *  to register itself with the pNFS client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/time.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4_pnfs.h>
++
++#include "nfs4filelayout.h"
++#include "nfs4_fs.h"
++#include "internal.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dean Hildebrand <dhildebz@eecs.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4 file layout driver");
++
++/* Callback operations to the pNFS client */
++struct pnfs_client_operations *pnfs_callback_ops;
++
++/* Forward declaration */
++struct layoutdriver_io_operations filelayout_io_operations;
++
++int
++filelayout_initialize_mountpoint(struct nfs_server *nfss,
++				 const struct nfs_fh *mntfh)
++{
++	int status = nfs4_alloc_init_deviceid_cache(nfss->nfs_client,
++						nfs4_fl_free_deviceid_callback);
++	if (status) {
++		printk(KERN_WARNING "%s: deviceid cache could not be "
++			"initialized\n", __func__);
++		return status;
++	}
++	dprintk("%s: deviceid cache has been initialized successfully\n",
++		__func__);
++	return 0;
++}
++
++/* Uninitialize a mountpoint by destroying its device list */
++int
++filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
++{
++	dprintk("--> %s\n", __func__);
++
++	if (nfss->pnfs_curr_ld && nfss->nfs_client->cl_devid_cache)
++		nfs4_put_deviceid_cache(nfss->nfs_client);
++	return 0;
++}
++
++/* This function is used by the layout driver to calculate the
++ * offset of the file on the dserver based on whether the
++ * layout type is STRIPE_DENSE or STRIPE_SPARSE
++ */
++static loff_t
++filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++
++	switch (flseg->stripe_type) {
++	case STRIPE_SPARSE:
++		return offset;
++
++	case STRIPE_DENSE:
++	{
++		u32 stripe_width;
++		u64 tmp, off;
++		u32 unit = flseg->stripe_unit;
++
++		stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
++		tmp = off = offset - flseg->pattern_offset;
++		do_div(tmp, stripe_width);
++		return tmp * unit + do_div(off, unit);
++	}
++	default:
++		BUG();
++	}
++
++	/* We should never get here... just to stop the gcc warning */
++	return 0;
++}
++
++/*
++ * Call ops for the async read/write cases
++ * In the case of dense layouts, the offset needs to be reset to its
++ * original value.
++ */
++static void filelayout_read_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	if (rdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			rdata->args.offset, rdata->fldata.orig_offset);
++		rdata->args.offset = rdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	rdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_read_release(void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	put_lseg(rdata->pdata.lseg);
++	rdata->pdata.lseg = NULL;
++	rdata->pdata.call_ops->rpc_release(data);
++}
++
++static void filelayout_write_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	if (wdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			wdata->args.offset, wdata->fldata.orig_offset);
++		wdata->args.offset = wdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_write_release(void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	put_lseg(wdata->pdata.lseg);
++	wdata->pdata.lseg = NULL;
++	wdata->pdata.call_ops->rpc_release(data);
++}
++
++struct rpc_call_ops filelayout_read_call_ops = {
++	.rpc_call_prepare = nfs_read_prepare,
++	.rpc_call_done = filelayout_read_call_done,
++	.rpc_release = filelayout_read_release,
++};
++
++struct rpc_call_ops filelayout_write_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_write_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/* Perform sync or async reads.
++ *
++ * An optimization for the NFS file layout driver
++ * allows the original read/write data structs to be passed in the
++ * last argument.
++ *
++ * TODO: join with write_pagelist?
++ */
++static enum pnfs_try_status
++filelayout_read_pagelist(struct nfs_read_data *data, unsigned nr_pages)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
++		__func__, data->inode->i_ino, nr_pages,
++		data->args.pgbase, (size_t)data->args.count, offset);
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s USE DS:ip %x %s\n", __func__,
++		htonl(ds->ds_ip_addr), ds->r_addr);
++
++	/* just try the first data server for the index..*/
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++
++	/*
++	 * Now get the file offset on the dserver
++	 * Set the read offset to this offset, and
++	 * save the original offset in orig_offset
++	 * In the case of aync reads, the offset will be reset in the
++	 * call_ops->rpc_call_done() routine.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/* Perform an asynchronous read */
++	nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
++			  &filelayout_read_call_ops);
++
++	data->pdata.pnfs_error = 0;
++
++	return PNFS_ATTEMPTED;
++}
++
++/* Perform async writes. */
++static enum pnfs_try_status
++filelayout_write_pagelist(struct nfs_write_data *data, unsigned nr_pages, int sync)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu %s\n", __func__,
++		data->inode->i_ino, sync, (size_t) data->args.count, offset,
++		htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
++
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++	/*
++	 * Get the file offset on the dserver. Set the write offset to
++	 * this offset and save the original offset.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/*
++	 * Perform an asynchronous write The offset will be reset in the
++	 * call_ops->rpc_call_done() routine
++	 */
++	nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
++			   &filelayout_write_call_ops, sync);
++
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++}
++
++/*
++ * Create a filelayout layout structure and return it.  The pNFS client
++ * will use the pnfs_layout_hdr type to refer to the layout for this
++ * inode from now on.
++ */
++static struct pnfs_layout_hdr *
++filelayout_alloc_layout(struct inode *inode)
++{
++	struct nfs4_filelayout *flp;
++
++	dprintk("NFS_FILELAYOUT: allocating layout\n");
++	flp =  kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
++	return flp ? &flp->fl_layout : NULL;
++}
++
++/* Free a filelayout layout structure */
++static void
++filelayout_free_layout(struct pnfs_layout_hdr *lo)
++{
++	dprintk("NFS_FILELAYOUT: freeing layout\n");
++	kfree(FILE_LO(lo));
++}
++
++/*
++ * filelayout_check_layout()
++ *
++ * Make sure layout segment parameters are sane WRT the device.
++ *
++ * Notes:
++ * 1) current code insists that # stripe index = # data servers in ds_list
++ *    which is wrong.
++ * 2) pattern_offset is ignored and must == 0 which is wrong;
++ * 3) the pattern_offset needs to be a mutliple of the stripe unit.
++ * 4) stripe unit is multiple of page size
++ */
++
++static int
++filelayout_check_layout(struct pnfs_layout_hdr *lo,
++			struct pnfs_layout_segment *lseg)
++{
++	struct nfs4_filelayout_segment *fl = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	int status = -EINVAL;
++	struct nfs_server *nfss = NFS_SERVER(PNFS_INODE(lo));
++
++	dprintk("--> %s\n", __func__);
++	/* find in list or get from server and reference the deviceid */
++	dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, &fl->dev_id);
++	if (dsaddr == NULL) {
++		dsaddr = get_device_info(PNFS_INODE(lo), &fl->dev_id);
++		if (dsaddr == NULL) {
++			dprintk("%s NO device for dev_id %s\n",
++				__func__, deviceid_fmt(&fl->dev_id));
++			goto out;
++		}
++	}
++	if (fl->first_stripe_index < 0 ||
++	    fl->first_stripe_index > dsaddr->stripe_count) {
++		dprintk("%s Bad first_stripe_index %d\n",
++				__func__, fl->first_stripe_index);
++		goto out_put;
++	}
++
++	if (fl->pattern_offset != 0) {
++		dprintk("%s Unsupported no-zero pattern_offset %Ld\n",
++				__func__, fl->pattern_offset);
++		goto out_put;
++	}
++
++	if (fl->stripe_unit % PAGE_SIZE) {
++		dprintk("%s Stripe unit (%u) not page aligned\n",
++			__func__, fl->stripe_unit);
++		goto out_put;
++	}
++
++	/* XXX only support SPARSE packing. Don't support use MDS open fh */
++	if (!(fl->num_fh == 1 || fl->num_fh == dsaddr->ds_num)) {
++		dprintk("%s num_fh %u not equal to 1 or ds_num %u\n",
++			__func__, fl->num_fh, dsaddr->ds_num);
++		goto out_put;
++	}
++
++	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
++		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
++			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
++			nfss->wsize);
++	}
++
++	nfs4_set_layout_deviceid(lseg, &dsaddr->deviceid);
++
++	status = 0;
++out:
++	dprintk("--> %s returns %d\n", __func__, status);
++	return status;
++out_put:
++	nfs4_put_unset_layout_deviceid(lseg, &dsaddr->deviceid,
++				       nfs4_fl_free_deviceid_callback);
++	goto out;
++}
++
++static void _filelayout_free_lseg(struct pnfs_layout_segment *lseg);
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl);
++
++/* Decode layout and store in layoutid.  Overwrite any existing layout
++ * information for this file.
++ */
++static int
++filelayout_set_layout(struct nfs4_filelayout *flo,
++		      struct nfs4_filelayout_segment *fl,
++		      struct nfs4_layoutget_res *lgr)
++{
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t nfl_util;
++	int i;
++
++	dprintk("%s: set_layout_map Begin\n", __func__);
++
++	memcpy(&fl->dev_id, p, NFS4_PNFS_DEVICEID4_SIZE);
++	p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++	nfl_util = be32_to_cpup(p++);
++	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
++		fl->commit_through_mds = 1;
++	if (nfl_util & NFL4_UFLG_DENSE)
++		fl->stripe_type = STRIPE_DENSE;
++	else
++		fl->stripe_type = STRIPE_SPARSE;
++	fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
++
++	if (!flo->stripe_unit)
++		flo->stripe_unit = fl->stripe_unit;
++	else if (flo->stripe_unit != fl->stripe_unit) {
++		printk(KERN_NOTICE "%s: updating strip_unit from %u to %u\n",
++			__func__, flo->stripe_unit, fl->stripe_unit);
++		flo->stripe_unit = fl->stripe_unit;
++	}
++
++	fl->first_stripe_index = be32_to_cpup(p++);
++	p = xdr_decode_hyper(p, &fl->pattern_offset);
++	fl->num_fh = be32_to_cpup(p++);
++
++	dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu dev_id %s\n",
++		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
++		fl->pattern_offset, deviceid_fmt(&fl->dev_id));
++
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE) {
++		fl->fh_array = vmalloc(fl->num_fh * sizeof(struct nfs_fh));
++		if (fl->fh_array)
++			memset(fl->fh_array, 0,
++				fl->num_fh * sizeof(struct nfs_fh));
++	} else {
++		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh),
++					GFP_KERNEL);
++       }
++	if (!fl->fh_array)
++		return -ENOMEM;
++
++	for (i = 0; i < fl->num_fh; i++) {
++		/* fh */
++		fl->fh_array[i].size = be32_to_cpup(p++);
++		if (sizeof(struct nfs_fh) < fl->fh_array[i].size) {
++			printk(KERN_ERR "Too big fh %d received %d\n",
++				i, fl->fh_array[i].size);
++			/* Layout is now invalid, pretend it doesn't exist */
++			filelayout_free_fh_array(fl);
++			fl->num_fh = 0;
++			break;
++		}
++		memcpy(fl->fh_array[i].data, p, fl->fh_array[i].size);
++		p += XDR_QUADLEN(fl->fh_array[i].size);
++		dprintk("DEBUG: %s: fh len %d\n", __func__,
++					fl->fh_array[i].size);
++	}
++
++	return 0;
++}
++
++static struct pnfs_layout_segment *
++filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
++		      struct nfs4_layoutget_res *lgr)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++	struct pnfs_layout_segment *lseg;
++	int rc;
++
++	dprintk("--> %s\n", __func__);
++	lseg = kzalloc(sizeof(struct pnfs_layout_segment) +
++		       sizeof(struct nfs4_filelayout_segment), GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++
++	rc = filelayout_set_layout(flo, LSEG_LD_DATA(lseg), lgr);
++
++	if (rc != 0 || filelayout_check_layout(layoutid, lseg)) {
++		_filelayout_free_lseg(lseg);
++		lseg = NULL;
++	}
++	return lseg;
++}
++
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
++{
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE)
++		vfree(fl->fh_array);
++	else
++		kfree(fl->fh_array);
++
++	fl->fh_array = NULL;
++}
++
++static void
++_filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	filelayout_free_fh_array(LSEG_LD_DATA(lseg));
++	kfree(lseg);
++}
++
++static void
++filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("--> %s\n", __func__);
++	nfs4_put_unset_layout_deviceid(lseg, lseg->deviceid,
++				   nfs4_fl_free_deviceid_callback);
++	_filelayout_free_lseg(lseg);
++}
++
++/* Allocate a new nfs_write_data struct and initialize */
++static struct nfs_write_data *
++filelayout_clone_write_data(struct nfs_write_data *old)
++{
++	static struct nfs_write_data *new;
++
++	new = nfs_commitdata_alloc();
++	if (!new)
++		goto out;
++	kref_init(&new->refcount);
++	new->parent      = old;
++	kref_get(&old->refcount);
++	new->inode       = old->inode;
++	new->cred        = old->cred;
++	new->args.offset = 0;
++	new->args.count  = 0;
++	new->res.count   = 0;
++	new->res.fattr   = &new->fattr;
++	nfs_fattr_init(&new->fattr);
++	new->res.verf    = &new->verf;
++	new->args.context = get_nfs_open_context(old->args.context);
++	new->pdata.lseg = NULL;
++	new->pdata.call_ops = old->pdata.call_ops;
++	new->pdata.how = old->pdata.how;
++out:
++	return new;
++}
++
++static void filelayout_commit_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static struct rpc_call_ops filelayout_commit_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_commit_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/*
++ * Execute a COMMIT op to the MDS or to each data server on which a page
++ * in 'pages' exists.
++ * Invoke the pnfs_commit_complete callback.
++ */
++enum pnfs_try_status
++filelayout_commit(struct nfs_write_data *data, int sync)
++{
++	LIST_HEAD(head);
++	struct nfs_page *req;
++	loff_t file_offset = 0;
++	u16 idx, i;
++	struct list_head **ds_page_list = NULL;
++	u16 *indices_used;
++	int num_indices_seen = 0;
++	const struct rpc_call_ops *call_ops;
++	struct rpc_clnt *clnt;
++	struct nfs_write_data **clone_list = NULL;
++	struct nfs_write_data *dsdata;
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("%s data %p sync %d\n", __func__, data, sync);
++
++	/* Alloc room for both in one go */
++	ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
++			       (sizeof(u16) + sizeof(struct list_head *)),
++			       GFP_KERNEL);
++	if (!ds_page_list)
++		goto mem_error;
++	indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
++	/*
++	 * Sort pages based on which ds to send to.
++	 * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
++	 * Note we are assuming there is only a single lseg in play.
++	 * When that is not true, we could first sort on lseg, then
++	 * sort within each as we do here.
++	 */
++	while (!list_empty(&data->pages)) {
++		req = nfs_list_entry(data->pages.next);
++		nfs_list_remove_request(req);
++		if (!req->wb_lseg ||
++		    ((struct nfs4_filelayout_segment *)
++		     LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
++			idx = NFS4_PNFS_MAX_MULTI_CNT;
++		else {
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
++		}
++		if (ds_page_list[idx]) {
++			/* Already seen this idx */
++			list_add(&req->wb_list, ds_page_list[idx]);
++		} else {
++			/* New idx not seen so far */
++			list_add_tail(&req->wb_list, &head);
++			indices_used[num_indices_seen++] = idx;
++		}
++		ds_page_list[idx] = &req->wb_list;
++	}
++	/* Once created, clone must be released via call_op */
++	clone_list = kzalloc(num_indices_seen *
++			     sizeof(struct nfs_write_data *), GFP_KERNEL);
++	if (!clone_list)
++		goto mem_error;
++	for (i = 0; i < num_indices_seen - 1; i++) {
++		clone_list[i] = filelayout_clone_write_data(data);
++		if (!clone_list[i])
++			goto mem_error;
++	}
++	clone_list[i] = data;
++	/*
++	 * Now send off the RPCs to each ds.  Note that it is important
++	 * that any RPC to the MDS be sent last (or at least after all
++	 * clones have been made.)
++	 */
++	for (i = 0; i < num_indices_seen; i++) {
++		dsdata = clone_list[i];
++		idx = indices_used[i];
++		list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
++		if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
++			call_ops = data->pdata.call_ops;;
++			clnt = NFS_CLIENT(dsdata->inode);
++			ds = NULL;
++		} else {
++			struct nfs_fh *fh;
++
++			call_ops = &filelayout_commit_call_ops;
++			req = nfs_list_entry(dsdata->pages.next);
++			ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
++			if (!ds) {
++				/* Trigger retry of this chunk through MDS */
++				dsdata->task.tk_status = -EIO;
++				data->pdata.call_ops->rpc_release(dsdata);
++				continue;
++			}
++			clnt = ds->ds_clp->cl_rpcclient;
++			dsdata->fldata.ds_nfs_client = ds->ds_clp;
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
++			if (fh)
++				dsdata->args.fh = fh;
++		}
++		dprintk("%s: Initiating commit: %llu USE DS:\n",
++			__func__, file_offset);
++		print_ds(ds);
++
++		/* Send COMMIT to data server */
++		nfs_initiate_commit(dsdata, clnt, call_ops, sync);
++	}
++	kfree(clone_list);
++	kfree(ds_page_list);
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++
++ mem_error:
++	if (clone_list) {
++		for (i = 0; i < num_indices_seen - 1; i++) {
++			if (!clone_list[i])
++				break;
++			data->pdata.call_ops->rpc_release(clone_list[i]);
++		}
++		kfree(clone_list);
++	}
++	kfree(ds_page_list);
++	/* One of these will be empty, but doesn't hurt to do both */
++	nfs_mark_list_commit(&head);
++	nfs_mark_list_commit(&data->pages);
++	data->pdata.call_ops->rpc_release(data);
++	return PNFS_ATTEMPTED;
++}
++
++/* Return the stripesize for the specified file */
++ssize_t
++filelayout_get_stripesize(struct pnfs_layout_hdr *lo)
++{
++	struct nfs4_filelayout *flo = FILE_LO(lo);
++
++	return flo->stripe_unit;
++}
++
++/*
++ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
++ *
++ * return 1 :  coalesce page
++ * return 0 :  don't coalesce page
++ */
++int
++filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++		   struct nfs_page *req)
++{
++	u64 p_stripe, r_stripe;
++
++	if (pgio->pg_boundary == 0)
++		return 1;
++	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
++	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
++
++	do_div(p_stripe, pgio->pg_boundary);
++	do_div(r_stripe, pgio->pg_boundary);
++
++	return (p_stripe == r_stripe);
++}
++
++struct layoutdriver_io_operations filelayout_io_operations = {
++	.commit                  = filelayout_commit,
++	.read_pagelist           = filelayout_read_pagelist,
++	.write_pagelist          = filelayout_write_pagelist,
++	.alloc_layout            = filelayout_alloc_layout,
++	.free_layout             = filelayout_free_layout,
++	.alloc_lseg              = filelayout_alloc_lseg,
++	.free_lseg               = filelayout_free_lseg,
++	.initialize_mountpoint   = filelayout_initialize_mountpoint,
++	.uninitialize_mountpoint = filelayout_uninitialize_mountpoint,
++};
++
++struct layoutdriver_policy_operations filelayout_policy_operations = {
++	.flags                 = PNFS_USE_RPC_CODE,
++	.get_stripesize        = filelayout_get_stripesize,
++	.pg_test               = filelayout_pg_test,
++};
++
++struct pnfs_layoutdriver_type filelayout_type = {
++	.id = LAYOUT_NFSV4_1_FILES,
++	.name = "LAYOUT_NFSV4_1_FILES",
++	.ld_io_ops = &filelayout_io_operations,
++	.ld_policy_ops = &filelayout_policy_operations,
++};
++
++static int __init nfs4filelayout_init(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
++	       __func__);
++
++	/*
++	 * Need to register file_operations struct with global list to indicate
++	 * that NFS4 file layout is a possible pNFS I/O module
++	 */
++	pnfs_callback_ops = pnfs_register_layoutdriver(&filelayout_type);
++
++	return 0;
++}
++
++static void __exit nfs4filelayout_exit(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
++	       __func__);
++
++	/* Unregister NFS4 file layout driver with pNFS client*/
++	pnfs_unregister_layoutdriver(&filelayout_type);
++}
++
++module_init(nfs4filelayout_init);
++module_exit(nfs4filelayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-31 20:42:05.521233147 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-31 20:42:05.521233147 -0400
+@@ -0,0 +1,635 @@
++/*
++ *  linux/fs/nfs/nfs4filelayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *  Garth Goodson   <Garth.Goodson@netapp.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/hash.h>
++
++#include <linux/nfs4.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_xdr.h>
++
++#include <asm/div64.h>
++
++#include <linux/utsname.h>
++#include <linux/vmalloc.h>
++#include <linux/nfs4_pnfs.h>
++#include "nfs4filelayout.h"
++#include "internal.h"
++#include "nfs4_fs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
++
++DEFINE_SPINLOCK(nfs4_ds_cache_lock);
++static LIST_HEAD(nfs4_data_server_cache);
++
++void
++print_ds(struct nfs4_pnfs_ds *ds)
++{
++	if (ds == NULL) {
++		dprintk("%s NULL device \n", __func__);
++		return;
++	}
++	dprintk("        ip_addr %x\n", ntohl(ds->ds_ip_addr));
++	dprintk("        port %hu\n", ntohs(ds->ds_port));
++	dprintk("        client %p\n", ds->ds_clp);
++	dprintk("        ref count %d\n", atomic_read(&ds->ds_count));
++	if (ds->ds_clp)
++		dprintk("        cl_exchange_flags %x\n",
++					    ds->ds_clp->cl_exchange_flags);
++	dprintk("        ip:port %s\n", ds->r_addr);
++}
++
++void
++print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	int i;
++
++	dprintk("%s dsaddr->ds_num %d\n", __func__,
++		dsaddr->ds_num);
++	for (i = 0; i < dsaddr->ds_num; i++)
++		print_ds(dsaddr->ds_list[i]);
++}
++
++/* Debugging function assuming a 64bit major/minor split of the deviceid */
++char *
++deviceid_fmt(const struct pnfs_deviceid *dev_id)
++{
++	static char buf[17];
++	uint32_t *p = (uint32_t *)dev_id->data;
++	uint64_t major, minor;
++
++	p = xdr_decode_hyper(p, &major);
++	p = xdr_decode_hyper(p, &minor);
++
++	sprintf(buf, "%08llu %08llu", major, minor);
++	return buf;
++}
++
++/* nfs4_ds_cache_lock is held */
++static inline struct nfs4_pnfs_ds *
++_data_server_lookup(u32 ip_addr, u32 port)
++{
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
++			ntohl(ip_addr), ntohs(port));
++
++	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
++		if (ds->ds_ip_addr == ip_addr &&
++		    ds->ds_port == port) {
++			return ds;
++		}
++	}
++	return NULL;
++}
++
++/* Create an rpc to the data server defined in 'dev_list' */
++static int
++nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
++{
++	struct nfs_server	*tmp;
++	struct sockaddr_in	sin;
++	struct rpc_clnt 	*mds_clnt = mds_srv->client;
++	struct nfs_client	*clp = mds_srv->nfs_client;
++	struct sockaddr		*mds_addr;
++	int err = 0;
++
++	dprintk("--> %s ip:port %s au_flavor %d\n", __func__,
++		ds->r_addr, mds_clnt->cl_auth->au_flavor);
++
++	sin.sin_family = AF_INET;
++	sin.sin_addr.s_addr = ds->ds_ip_addr;
++	sin.sin_port = ds->ds_port;
++
++	/*
++	 * If this DS is also the MDS, use the MDS session only if the
++	 * MDS exchangeid flags show the EXCHGID4_FLAG_USE_PNFS_DS pNFS role.
++	 */
++	mds_addr = (struct sockaddr *)&clp->cl_addr;
++	if (nfs_sockaddr_cmp((struct sockaddr *)&sin, mds_addr)) {
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++			printk(KERN_INFO "ip:port %s is not a pNFS Data "
++				"Server\n", ds->r_addr);
++			err = -ENODEV;
++		} else {
++			atomic_inc(&clp->cl_count);
++			ds->ds_clp = clp;
++			dprintk("%s Using MDS Session for DS\n", __func__);
++		}
++		goto out;
++	}
++
++	/* Temporay server for nfs4_set_client */
++	tmp = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
++	if (!tmp)
++		goto out;
++
++	/*
++	 * Set a retrans, timeout interval, and authflavor equual to the MDS
++	 * values. Use the MDS nfs_client cl_ipaddr field so as to use the
++	 * same co_ownerid as the MDS.
++	 */
++	err = nfs4_set_client(tmp,
++			      mds_srv->nfs_client->cl_hostname,
++			      (struct sockaddr *)&sin,
++			      sizeof(struct sockaddr),
++			      mds_srv->nfs_client->cl_ipaddr,
++			      mds_clnt->cl_auth->au_flavor,
++			      IPPROTO_TCP,
++			      mds_clnt->cl_xprt->timeout,
++			      1 /* minorversion */);
++	if (err < 0)
++		goto out_free;
++
++	clp = tmp->nfs_client;
++
++	/* Ask for only the EXCHGID4_FLAG_USE_PNFS_DS pNFS role */
++	dprintk("%s EXCHANGE_ID for clp %p\n", __func__, clp);
++	clp->cl_exchange_flags = EXCHGID4_FLAG_USE_PNFS_DS;
++
++	err = nfs4_recover_expired_lease(clp);
++	if (!err)
++		err = nfs4_check_client_ready(clp);
++	if (err)
++		goto out_put;
++
++	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++		printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
++			ds->r_addr);
++		err = -ENODEV;
++		goto out_put;
++	}
++	/*
++	 * Mask the (possibly) returned EXCHGID4_FLAG_USE_PNFS_MDS pNFS role
++	 * The is_ds_only_session depends on this.
++	 */
++	clp->cl_exchange_flags &= ~EXCHGID4_FLAG_USE_PNFS_MDS;
++	/*
++	 * Set DS lease equal to the MDS lease, renewal is scheduled in
++	 * create_session
++	 */
++	spin_lock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
++	spin_unlock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_last_renewal = jiffies;
++
++	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
++	ds->ds_clp = clp;
++
++	dprintk("%s: ip=%x, port=%hu, rpcclient %p\n", __func__,
++				ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
++				clp->cl_rpcclient);
++out_free:
++	kfree(tmp);
++out:
++	dprintk("%s Returns %d\n", __func__, err);
++	return err;
++out_put:
++	nfs_put_client(clp);
++	goto out_free;
++}
++
++static void
++destroy_ds(struct nfs4_pnfs_ds *ds)
++{
++	dprintk("--> %s\n", __func__);
++	print_ds(ds);
++
++	if (ds->ds_clp)
++		nfs_put_client(ds->ds_clp);
++	kfree(ds);
++}
++
++static void
++nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	struct nfs4_pnfs_ds *ds;
++	int i;
++
++	dprintk("%s: device id=%s\n", __func__,
++		deviceid_fmt(&dsaddr->deviceid.de_id));
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		ds = dsaddr->ds_list[i];
++		if (ds != NULL) {
++			if (atomic_dec_and_lock(&ds->ds_count,
++						&nfs4_ds_cache_lock)) {
++				list_del_init(&ds->ds_node);
++				spin_unlock(&nfs4_ds_cache_lock);
++				destroy_ds(ds);
++			}
++		}
++	}
++	kfree(dsaddr->stripe_indices);
++	kfree(dsaddr);
++}
++
++void
++nfs4_fl_free_deviceid_callback(struct kref *kref)
++{
++	struct nfs4_deviceid *device =
++		container_of(kref, struct nfs4_deviceid, de_kref);
++	struct nfs4_file_layout_dsaddr *dsaddr =
++		container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
++
++	nfs4_fl_free_deviceid(dsaddr);
++}
++
++static void
++nfs4_pnfs_ds_add(struct inode *inode, struct nfs4_pnfs_ds **dsp,
++		 u32 ip_addr, u32 port, char *r_addr, int len)
++{
++	struct nfs4_pnfs_ds *tmp_ds, *ds;
++
++	*dsp = NULL;
++
++	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
++	if (!ds)
++		return;
++
++	spin_lock(&nfs4_ds_cache_lock);
++	tmp_ds = _data_server_lookup(ip_addr, port);
++	if (tmp_ds == NULL) {
++		ds->ds_ip_addr = ip_addr;
++		ds->ds_port = port;
++		strncpy(ds->r_addr, r_addr, len);
++		atomic_set(&ds->ds_count, 1);
++		INIT_LIST_HEAD(&ds->ds_node);
++		ds->ds_clp = NULL;
++		list_add(&ds->ds_node, &nfs4_data_server_cache);
++		*dsp = ds;
++		dprintk("%s add new data server ip 0x%x\n", __func__,
++				ds->ds_ip_addr);
++		spin_unlock(&nfs4_ds_cache_lock);
++	} else {
++		atomic_inc(&tmp_ds->ds_count);
++		*dsp = tmp_ds;
++		dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
++				__func__, tmp_ds->ds_ip_addr,
++				atomic_read(&tmp_ds->ds_count));
++		spin_unlock(&nfs4_ds_cache_lock);
++		kfree(ds);
++	}
++}
++
++static struct nfs4_pnfs_ds *
++decode_and_add_ds(uint32_t **pp, struct inode *inode)
++{
++	struct nfs4_pnfs_ds *ds = NULL;
++	char r_addr[29]; /* max size of ip/port string */
++	int len;
++	u32 ip_addr, port;
++	int tmp[6];
++	uint32_t *p = *pp;
++
++	dprintk("%s enter\n", __func__);
++	/* check and skip r_netid */
++	len = be32_to_cpup(p++);
++	/* "tcp" */
++	if (len != 3) {
++		printk("%s: ERROR: non TCP r_netid len %d\n",
++			__func__, len);
++		goto out_err;
++	}
++	/*
++	 * Read the bytes into a temporary buffer
++	 * XXX: should probably sanity check them
++	 */
++	tmp[0] = be32_to_cpup(p++);
++
++	len = be32_to_cpup(p++);
++	if (len >= sizeof(r_addr)) {
++		printk("%s: ERROR: Device ip/port too long (%d)\n",
++			__func__, len);
++		goto out_err;
++	}
++	memcpy(r_addr, p, len);
++	p += XDR_QUADLEN(len);
++	*pp = p;
++	r_addr[len] = '\0';
++	sscanf(r_addr, "%d.%d.%d.%d.%d.%d", &tmp[0], &tmp[1],
++	       &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
++	ip_addr = htonl((tmp[0]<<24) | (tmp[1]<<16) | (tmp[2]<<8) | (tmp[3]));
++	port = htons((tmp[4] << 8) | (tmp[5]));
++
++	nfs4_pnfs_ds_add(inode, &ds, ip_addr, port, r_addr, len);
++
++	dprintk("%s: addr:port string = %s\n", __func__, r_addr);
++	return ds;
++out_err:
++	dprintk("%s returned NULL\n", __func__);
++	return NULL;
++}
++
++/* Decode opaque device data and return the result */
++static struct nfs4_file_layout_dsaddr*
++decode_device(struct inode *ino, struct pnfs_device *pdev)
++{
++	int i, dummy;
++	u32 cnt, num;
++	u8 *indexp;
++	uint32_t *p = (u32 *)pdev->area, *indicesp;
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	/* Get the stripe count (number of stripe index) */
++	cnt = be32_to_cpup(p++);
++	dprintk("%s stripe count  %d\n", __func__, cnt);
++	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
++		printk(KERN_WARNING "%s: stripe count %d greater than "
++		       "supported maximum %d\n", __func__,
++			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
++		goto out_err;
++	}
++
++	/* Check the multipath list count */
++	indicesp = p;
++	p += XDR_QUADLEN(cnt << 2);
++	num = be32_to_cpup(p++);
++	dprintk("%s ds_num %u\n", __func__, num);
++	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
++		printk(KERN_WARNING "%s: multipath count %d greater than "
++			"supported maximum %d\n", __func__,
++			num, NFS4_PNFS_MAX_MULTI_CNT);
++		goto out_err;
++	}
++	dsaddr = kzalloc(sizeof(*dsaddr) +
++			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
++			GFP_KERNEL);
++	if (!dsaddr)
++		goto out_err;
++
++	dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
++	if (!dsaddr->stripe_indices)
++		goto out_err_free;
++
++	dsaddr->stripe_count = cnt;
++	dsaddr->ds_num = num;
++
++	memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id,
++	       NFS4_PNFS_DEVICEID4_SIZE);
++
++	/* Go back an read stripe indices */
++	p = indicesp;
++	indexp = &dsaddr->stripe_indices[0];
++	for (i = 0; i < dsaddr->stripe_count; i++) {
++		dummy = be32_to_cpup(p++);
++		*indexp = dummy; /* bound by NFS4_PNFS_MAX_MULTI_CNT */
++		indexp++;
++	}
++	/* Skip already read multipath list count */
++	p++;
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		int j;
++
++		dummy = be32_to_cpup(p++); /* multipath count */
++		if (dummy > 1) {
++			printk(KERN_WARNING
++			       "%s: Multipath count %d not supported, "
++			       "skipping all greater than 1\n", __func__,
++				dummy);
++		}
++		for (j = 0; j < dummy; j++) {
++			if (j == 0) {
++				dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
++				if (dsaddr->ds_list[i] == NULL)
++					goto out_err_free;
++			} else {
++				u32 len;
++				/* skip extra multipath */
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				continue;
++			}
++		}
++	}
++	nfs4_init_deviceid_node(&dsaddr->deviceid);
++
++	return dsaddr;
++
++out_err_free:
++	nfs4_fl_free_deviceid(dsaddr);
++out_err:
++	dprintk("%s ERROR: returning NULL\n", __func__);
++	return NULL;
++}
++
++/*
++ * Decode the opaque device specified in 'dev'
++ * and add it to the list of available devices.
++ * If the deviceid is already cached, nfs4_add_deviceid will return
++ * a pointer to the cached struct and throw away the new.
++ */
++static struct nfs4_file_layout_dsaddr*
++decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
++{
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	struct nfs4_deviceid *d;
++
++	dsaddr = decode_device(inode, dev);
++	if (!dsaddr) {
++		printk(KERN_WARNING "%s: Could not decode or add device\n",
++			__func__);
++		return NULL;
++	}
++
++	d = nfs4_add_get_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
++			      &dsaddr->deviceid);
++
++	return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Retrieve the information for dev_id, add it to the list
++ * of available devices, and return it.
++ */
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id)
++{
++	struct pnfs_device *pdev = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
++	int rc, i;
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
++		__func__, inode, max_resp_sz, max_pages);
++
++	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
++	if (pdev == NULL)
++		return NULL;
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(pdev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set pdev->area */
++	pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!pdev->area)
++		goto out_free;
++
++	memcpy(&pdev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE);
++	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
++	pdev->pages = pages;
++	pdev->pgbase = 0;
++	pdev->pglen = PAGE_SIZE * max_pages;
++	pdev->mincount = 0;
++	/* TODO: Update types when CB_NOTIFY_DEVICEID is available */
++	pdev->dev_notify_types = 0;
++
++	rc = pnfs_callback_ops->nfs_getdeviceinfo(server, pdev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	/*
++	 * Found new device, need to decode it and then add it to the
++	 * list of known devices for this mountpoint.
++	 */
++	dsaddr = decode_and_add_device(inode, pdev);
++out_free:
++	if (pdev->area != NULL)
++		vunmap(pdev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(pdev);
++	dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
++	return dsaddr;
++}
++
++struct nfs4_file_layout_dsaddr *
++nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++
++	d = nfs4_find_get_deviceid(clp->cl_devid_cache, id);
++	dprintk("%s device id (%s) nfs4_deviceid %p\n", __func__,
++		deviceid_fmt(id), d);
++	return (d == NULL) ? NULL :
++		container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
++ * Then: ((res + fsi) % dsaddr->stripe_count)
++ */
++static inline u32
++_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u64 tmp;
++
++	tmp = offset - flseg->pattern_offset;
++	do_div(tmp, flseg->stripe_unit);
++	tmp += flseg->first_stripe_index;
++	return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
++}
++
++u32
++nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	u32 j;
++
++	j = _nfs4_fl_calc_j_index(lseg, offset);
++	return FILE_DSADDR(lseg)->stripe_indices[j];
++}
++
++struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u32 i;
++
++	if (flseg->stripe_type == STRIPE_SPARSE) {
++		if (flseg->num_fh == 1)
++			i = 0;
++		else if (flseg->num_fh == 0)
++			return NULL;
++		else
++			i = nfs4_fl_calc_ds_index(lseg, offset);
++	} else
++		i = _nfs4_fl_calc_j_index(lseg, offset);
++	return &flseg->fh_array[i];
++}
++
++struct nfs4_pnfs_ds *
++nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	dsaddr = FILE_DSADDR(lseg);
++	if (dsaddr->ds_list[ds_idx] == NULL) {
++		printk(KERN_ERR "%s: No data server for device id (%s)!!\n",
++			__func__, deviceid_fmt(&flseg->dev_id));
++		return NULL;
++	}
++
++	if (!dsaddr->ds_list[ds_idx]->ds_clp) {
++		int err;
++
++		err = nfs4_pnfs_ds_create(PNFS_NFS_SERVER(lseg->layout),
++					  dsaddr->ds_list[ds_idx]);
++		if (err) {
++			printk(KERN_ERR "%s nfs4_pnfs_ds_create error %d\n",
++			       __func__, err);
++			return NULL;
++		}
++	}
++	dprintk("%s: dev_id=%s, ds_idx=%u\n",
++		__func__, deviceid_fmt(&flseg->dev_id), ds_idx);
++
++	return dsaddr->ds_list[ds_idx];
++}
++
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-31 20:42:05.520222923 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-31 20:42:05.520222923 -0400
+@@ -0,0 +1,96 @@
++/*
++ *  pnfs_nfs4filelayout.h
++ *
++ *  NFSv4 file layout driver data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_NFS4FILELAYOUT_H
++#define FS_NFS_NFS4FILELAYOUT_H
++
++#include <linux/kref.h>
++#include <linux/nfs4_pnfs.h>
++
++#define NFS4_PNFS_DEV_HASH_BITS 5
++#define NFS4_PNFS_DEV_HASH_SIZE (1 << NFS4_PNFS_DEV_HASH_BITS)
++#define NFS4_PNFS_DEV_HASH_MASK (NFS4_PNFS_DEV_HASH_SIZE - 1)
++
++#define NFS4_PNFS_MAX_STRIPE_CNT 4096
++#define NFS4_PNFS_MAX_MULTI_CNT  64 /* 256 fit into a u8 stripe_index */
++#define NFS4_PNFS_MAX_MULTI_DS   2
++
++#define FILE_DSADDR(lseg) (container_of(lseg->deviceid, \
++					struct nfs4_file_layout_dsaddr, \
++					deviceid))
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++/* Individual ip address */
++struct nfs4_pnfs_ds {
++	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
++	u32 			ds_ip_addr;
++	u32 			ds_port;
++	struct nfs_client	*ds_clp;
++	atomic_t		ds_count;
++	char r_addr[29];
++};
++
++struct nfs4_file_layout_dsaddr {
++	struct nfs4_deviceid	deviceid;
++	u32 			stripe_count;
++	u8			*stripe_indices;
++	u32			ds_num;
++	struct nfs4_pnfs_ds	*ds_list[1];
++};
++
++struct nfs4_pnfs_dev_hlist {
++	rwlock_t		dev_lock;
++	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH_SIZE];
++};
++
++struct nfs4_filelayout_segment {
++	u32 stripe_type;
++	u32 commit_through_mds;
++	u32 stripe_unit;
++	u32 first_stripe_index;
++	u64 pattern_offset;
++	struct pnfs_deviceid dev_id;
++	unsigned int num_fh;
++	struct nfs_fh *fh_array;
++};
++
++struct nfs4_filelayout {
++	struct pnfs_layout_hdr fl_layout;
++	u32 stripe_unit;
++};
++
++extern struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
++
++static inline struct nfs4_filelayout *
++FILE_LO(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct nfs4_filelayout, fl_layout);
++}
++
++extern struct pnfs_client_operations *pnfs_callback_ops;
++
++extern void nfs4_fl_free_deviceid_callback(struct kref *);
++extern void print_ds(struct nfs4_pnfs_ds *ds);
++char *deviceid_fmt(const struct pnfs_deviceid *dev_id);
++u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset);
++struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
++					u32 ds_idx);
++extern struct nfs4_file_layout_dsaddr *
++nfs4_fl_find_get_deviceid(struct nfs_client *, struct pnfs_deviceid *dev_id);
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id);
++
++#endif /* FS_NFS_NFS4FILELAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-31 20:41:19.154160465 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-31 20:42:05.519163219 -0400
+@@ -45,8 +45,28 @@ enum nfs4_client_state {
+ 	NFS4CLNT_RECLAIM_NOGRACE,
+ 	NFS4CLNT_DELEGRETURN,
+ 	NFS4CLNT_SESSION_RESET,
+-	NFS4CLNT_SESSION_DRAINING,
+ 	NFS4CLNT_RECALL_SLOT,
++	NFS4CLNT_LAYOUT_RECALL,
++};
++
++enum nfs4_session_state {
++	NFS4_SESSION_INITING,
++	NFS4_SESSION_DRAINING,
++};
++
++struct nfs4_minor_version_ops {
++	u32	minor_version;
++
++	int	(*call_sync)(struct nfs_server *server,
++			struct rpc_message *msg,
++			struct nfs4_sequence_args *args,
++			struct nfs4_sequence_res *res,
++			int cache_reply);
++	int	(*validate_stateid)(struct nfs_delegation *,
++			const nfs4_stateid *);
++	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
++	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
++	const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ };
+ 
+ /*
+@@ -89,7 +109,6 @@ struct nfs_unique_id {
+  */
+ struct nfs4_state_owner {
+ 	struct nfs_unique_id so_owner_id;
+-	struct nfs_client    *so_client;
+ 	struct nfs_server    *so_server;
+ 	struct rb_node	     so_client_node;
+ 
+@@ -99,7 +118,6 @@ struct nfs4_state_owner {
+ 	atomic_t	     so_count;
+ 	unsigned long	     so_flags;
+ 	struct list_head     so_states;
+-	struct list_head     so_delegations;
+ 	struct nfs_seqid_counter so_seqid;
+ 	struct rpc_sequence  so_sequence;
+ };
+@@ -125,10 +143,20 @@ enum {
+  * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+  */
+ 
++struct nfs4_lock_owner {
++	unsigned int lo_type;
++#define NFS4_ANY_LOCK_TYPE	(0U)
++#define NFS4_FLOCK_LOCK_TYPE	(1U << 0)
++#define NFS4_POSIX_LOCK_TYPE	(1U << 1)
++	union {
++		fl_owner_t posix_owner;
++		pid_t flock_owner;
++	} lo_u;
++};
++
+ struct nfs4_lock_state {
+ 	struct list_head	ls_locks;	/* Other lock stateids */
+ 	struct nfs4_state *	ls_state;	/* Pointer to open state */
+-	fl_owner_t		ls_owner;	/* POSIX lock owner */
+ #define NFS_LOCK_INITIALIZED 1
+ 	int			ls_flags;
+ 	struct nfs_seqid_counter	ls_seqid;
+@@ -136,6 +164,7 @@ struct nfs4_lock_state {
+ 	struct nfs_unique_id	ls_id;
+ 	nfs4_stateid		ls_stateid;
+ 	atomic_t		ls_count;
++	struct nfs4_lock_owner	ls_owner;
+ };
+ 
+ /* bits for nfs4_state->flags */
+@@ -219,22 +248,34 @@ extern int nfs4_open_revalidate(struct i
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
+ 		struct nfs4_fs_locations *fs_locations, struct page *page);
++extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+ 
+-extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
+-extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
+ #if defined(CONFIG_NFS_V4_1)
+-extern int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return server->nfs_client->cl_session;
++}
++
++extern int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task);
+ extern void nfs4_destroy_session(struct nfs4_session *session);
+ extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
++extern int nfs4_proc_exchange_id(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_create_session(struct nfs_client *);
+ extern int nfs4_proc_destroy_session(struct nfs4_session *);
+ extern int nfs4_init_session(struct nfs_server *server);
+ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
+ 		struct nfs_fsinfo *fsinfo);
+ #else /* CONFIG_NFS_v4_1 */
+-static inline int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return NULL;
++}
++
++static inline int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task)
+ {
+@@ -247,12 +288,12 @@ static inline int nfs4_init_session(stru
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+-extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
++extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
+ 
+ extern const u32 nfs4_fattr_bitmap[2];
+ extern const u32 nfs4_statfs_bitmap[2];
+ extern const u32 nfs4_pathconf_bitmap[2];
+-extern const u32 nfs4_fsinfo_bitmap[2];
++extern const u32 nfs4_fsinfo_bitmap[3];
+ extern const u32 nfs4_fs_locations_bitmap[2];
+ 
+ /* nfs4renewd.c */
+@@ -284,7 +325,7 @@ extern void nfs41_handle_sequence_flag_e
+ extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
++extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+ 
+ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+@@ -293,6 +334,7 @@ extern void nfs_increment_lock_seqid(int
+ extern void nfs_release_seqid(struct nfs_seqid *seqid);
+ extern void nfs_free_seqid(struct nfs_seqid *seqid);
+ 
++/* write.c */
+ extern const nfs4_stateid zero_stateid;
+ 
+ /* nfs4xdr.c */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-31 20:41:19.157140145 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-31 20:42:05.524099925 -0400
+@@ -49,12 +49,14 @@
+ #include <linux/mount.h>
+ #include <linux/module.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "delegation.h"
+ #include "internal.h"
+ #include "iostat.h"
+ #include "callback.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PROC
+ 
+@@ -67,7 +69,7 @@ struct nfs4_opendata;
+ static int _nfs4_proc_open(struct nfs4_opendata *data);
+ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
+ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
++static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, struct nfs_client *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+@@ -125,11 +127,16 @@ const u32 nfs4_pathconf_bitmap[2] = {
+ 	0
+ };
+ 
+-const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
++const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
+ 			| FATTR4_WORD0_MAXREAD
+ 			| FATTR4_WORD0_MAXWRITE
+ 			| FATTR4_WORD0_LEASE_TIME,
++#ifdef CONFIG_NFS_V4_1
++			FATTR4_WORD1_FS_LAYOUT_TYPES,
++			FATTR4_WORD2_LAYOUT_BLKSIZE
++#else /* CONFIG_NFS_V4_1 */
+ 			0
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ const u32 nfs4_fs_locations_bitmap[2] = {
+@@ -356,7 +363,7 @@ static void nfs41_check_drain_session_co
+ {
+ 	struct rpc_task *task;
+ 
+-	if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
++	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
+ 		if (task)
+ 			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+@@ -370,12 +377,11 @@ static void nfs41_check_drain_session_co
+ 	complete(&ses->complete);
+ }
+ 
+-static void nfs41_sequence_free_slot(const struct nfs_client *clp,
+-			      struct nfs4_sequence_res *res)
++static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+ {
+ 	struct nfs4_slot_table *tbl;
+ 
+-	tbl = &clp->cl_session->fc_slot_table;
++	tbl = &res->sr_session->fc_slot_table;
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+ 		/* just wake up the next guy waiting since
+ 		 * we may have not consumed a slot after all */
+@@ -385,18 +391,17 @@ static void nfs41_sequence_free_slot(con
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+ 	nfs4_free_slot(tbl, res->sr_slotid);
+-	nfs41_check_drain_session_complete(clp->cl_session);
++	nfs41_check_drain_session_complete(res->sr_session);
+ 	spin_unlock(&tbl->slot_tbl_lock);
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ }
+ 
+-static void nfs41_sequence_done(struct nfs_client *clp,
+-				struct nfs4_sequence_res *res,
+-				int rpc_status)
++static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+ {
+ 	unsigned long timestamp;
+ 	struct nfs4_slot_table *tbl;
+ 	struct nfs4_slot *slot;
++	struct nfs_client *clp;
+ 
+ 	/*
+ 	 * sr_status remains 1 if an RPC level error occurred. The server
+@@ -411,13 +416,16 @@ static void nfs41_sequence_done(struct n
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+ 		goto out;
+ 
++	tbl = &res->sr_session->fc_slot_table;
++	slot = tbl->slots + res->sr_slotid;
++
+ 	/* Check the SEQUENCE operation status */
+-	if (res->sr_status == 0) {
+-		tbl = &clp->cl_session->fc_slot_table;
+-		slot = tbl->slots + res->sr_slotid;
++	switch (res->sr_status) {
++	case 0:
+ 		/* Update the slot's sequence and clientid lease timer */
+ 		++slot->seq_nr;
+ 		timestamp = res->sr_renewal_time;
++		clp = res->sr_session->clp;
+ 		spin_lock(&clp->cl_lock);
+ 		if (time_before(clp->cl_last_renewal, timestamp))
+ 			clp->cl_last_renewal = timestamp;
+@@ -425,11 +433,39 @@ static void nfs41_sequence_done(struct n
+ 		/* Check sequence flags */
+ 		if (atomic_read(&clp->cl_count) > 1)
+ 			nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
++		break;
++	case -NFS4ERR_DELAY:
++		/* The server detected a resend of the RPC call and
++		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
++		 * of RFC5661.
++		 */
++		dprintk("%s: slot=%d seq=%d: Operation in progress\n",
++				__func__, res->sr_slotid, slot->seq_nr);
++		goto out_retry;
++	default:
++		/* Just update the slot sequence no. */
++		++slot->seq_nr;
+ 	}
+ out:
+ 	/* The session may be reset by one of the error handlers. */
+ 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
+-	nfs41_sequence_free_slot(clp, res);
++	nfs41_sequence_free_slot(res);
++	return 1;
++out_retry:
++	rpc_delay(task, NFS4_POLL_RETRY_MAX);
++	rpc_restart_call(task);
++	/* FIXME: rpc_restart_call() should be made to return success/fail */
++	if (RPC_ASSASSINATED(task))
++		goto out;
++	return 0;
++}
++
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	if (res->sr_session == NULL)
++		return 1;
++	return nfs41_sequence_done(task, res);
+ }
+ 
+ /*
+@@ -480,12 +516,11 @@ static int nfs41_setup_sequence(struct n
+ 	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+ 		return 0;
+ 
+-	memset(res, 0, sizeof(*res));
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ 	tbl = &session->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
++	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+ 	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+ 		/*
+ 		 * The state manager will wait until the slot table is empty.
+@@ -525,6 +560,7 @@ static int nfs41_setup_sequence(struct n
+ 	res->sr_session = session;
+ 	res->sr_slotid = slotid;
+ 	res->sr_renewal_time = jiffies;
++	res->sr_status_flags = 0;
+ 	/*
+ 	 * sr_status is only set in decode_sequence, and so will remain
+ 	 * set to 1 if an rpc level failure occurs.
+@@ -533,33 +569,36 @@ static int nfs41_setup_sequence(struct n
+ 	return 0;
+ }
+ 
+-int nfs4_setup_sequence(struct nfs_client *clp,
++int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 			struct nfs4_sequence_args *args,
+ 			struct nfs4_sequence_res *res,
+ 			int cache_reply,
+ 			struct rpc_task *task)
+ {
++	struct nfs4_session *session = nfs4_get_session(server);
+ 	int ret = 0;
+ 
++	if (ds_session)
++		session = ds_session;
++	if (session == NULL) {
++		args->sa_session = NULL;
++		res->sr_session = NULL;
++		goto out;
++	}
++
+ 	dprintk("--> %s clp %p session %p sr_slotid %d\n",
+-		__func__, clp, clp->cl_session, res->sr_slotid);
++		__func__, session->clp, session, res->sr_slotid);
+ 
+-	if (!nfs4_has_session(clp))
+-		goto out;
+-	ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
++	ret = nfs41_setup_sequence(session, args, res, cache_reply,
+ 				   task);
+-	if (ret && ret != -EAGAIN) {
+-		/* terminate rpc task */
+-		task->tk_status = ret;
+-		task->tk_action = NULL;
+-	}
+ out:
+ 	dprintk("<-- %s status=%d\n", __func__, ret);
+ 	return ret;
+ }
+ 
+ struct nfs41_call_sync_data {
+-	struct nfs_client *clp;
++	const struct nfs_server *seq_server;
+ 	struct nfs4_sequence_args *seq_args;
+ 	struct nfs4_sequence_res *seq_res;
+ 	int cache_reply;
+@@ -569,9 +608,9 @@ static void nfs41_call_sync_prepare(stru
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	dprintk("--> %s data->clp->cl_session %p\n", __func__,
+-		data->clp->cl_session);
+-	if (nfs4_setup_sequence(data->clp, data->seq_args,
++	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
++
++	if (nfs4_setup_sequence(data->seq_server, NULL, data->seq_args,
+ 				data->seq_res, data->cache_reply, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -587,7 +626,7 @@ static void nfs41_call_sync_done(struct 
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
++	nfs41_sequence_done(task, data->seq_res);
+ }
+ 
+ struct rpc_call_ops nfs41_call_sync_ops = {
+@@ -600,8 +639,7 @@ struct rpc_call_ops nfs41_call_priv_sync
+ 	.rpc_call_done = nfs41_call_sync_done,
+ };
+ 
+-static int nfs4_call_sync_sequence(struct nfs_client *clp,
+-				   struct rpc_clnt *clnt,
++static int nfs4_call_sync_sequence(struct nfs_server *server,
+ 				   struct rpc_message *msg,
+ 				   struct nfs4_sequence_args *args,
+ 				   struct nfs4_sequence_res *res,
+@@ -611,13 +649,13 @@ static int nfs4_call_sync_sequence(struc
+ 	int ret;
+ 	struct rpc_task *task;
+ 	struct nfs41_call_sync_data data = {
+-		.clp = clp,
++		.seq_server = server,
+ 		.seq_args = args,
+ 		.seq_res = res,
+ 		.cache_reply = cache_reply,
+ 	};
+ 	struct rpc_task_setup task_setup = {
+-		.rpc_client = clnt,
++		.rpc_client = server->client,
+ 		.rpc_message = msg,
+ 		.callback_ops = &nfs41_call_sync_ops,
+ 		.callback_data = &data
+@@ -642,10 +680,15 @@ int _nfs4_call_sync_session(struct nfs_s
+ 			    struct nfs4_sequence_res *res,
+ 			    int cache_reply)
+ {
+-	return nfs4_call_sync_sequence(server->nfs_client, server->client,
+-				       msg, args, res, cache_reply, 0);
++	return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
+ }
+ 
++#else
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	return 1;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ int _nfs4_call_sync(struct nfs_server *server,
+@@ -659,18 +702,9 @@ int _nfs4_call_sync(struct nfs_server *s
+ }
+ 
+ #define nfs4_call_sync(server, msg, args, res, cache_reply) \
+-	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
++	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
+ 			&(res)->seq_res, (cache_reply))
+ 
+-static void nfs4_sequence_done(const struct nfs_server *server,
+-			       struct nfs4_sequence_res *res, int rpc_status)
+-{
+-#ifdef CONFIG_NFS_V4_1
+-	if (nfs4_has_session(server->nfs_client))
+-		nfs41_sequence_done(server->nfs_client, res, rpc_status);
+-#endif /* CONFIG_NFS_V4_1 */
+-}
+-
+ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(dir);
+@@ -745,19 +779,14 @@ static struct nfs4_opendata *nfs4_openda
+ 	p->o_arg.server = server;
+ 	p->o_arg.bitmask = server->attr_bitmask;
+ 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+-	if (flags & O_EXCL) {
+-		if (nfs4_has_persistent_session(server->nfs_client)) {
+-			/* GUARDED */
+-			p->o_arg.u.attrs = &p->attrs;
+-			memcpy(&p->attrs, attrs, sizeof(p->attrs));
+-		} else { /* EXCLUSIVE4_1 */
+-			u32 *s = (u32 *) p->o_arg.u.verifier.data;
+-			s[0] = jiffies;
+-			s[1] = current->pid;
+-		}
+-	} else if (flags & O_CREAT) {
++	if (flags & O_CREAT) {
++		u32 *s;
++
+ 		p->o_arg.u.attrs = &p->attrs;
+ 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
++		s = (u32 *) p->o_arg.u.verifier.data;
++		s[0] = jiffies;
++		s[1] = current->pid;
+ 	}
+ 	p->c_arg.fh = &p->o_res.fh;
+ 	p->c_arg.stateid = &p->o_res.stateid;
+@@ -851,8 +880,10 @@ static void update_open_stateflags(struc
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+ {
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+-	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
++		memcpy(state->stateid.u.data, stateid->u.data,
++		       sizeof(state->stateid.u.data));
++	memcpy(state->open_stateid.u.data, stateid->u.data,
++	       sizeof(state->open_stateid.u.data));
+ 	switch (fmode) {
+ 		case FMODE_READ:
+ 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+@@ -880,7 +911,8 @@ static void __update_open_stateid(struct
+ 	 */
+ 	write_seqlock(&state->seqlock);
+ 	if (deleg_stateid != NULL) {
+-		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
++		memcpy(state->stateid.u.data, deleg_stateid->u.data,
++		       sizeof(state->stateid.u.data));
+ 		set_bit(NFS_DELEGATED_STATE, &state->flags);
+ 	}
+ 	if (open_stateid != NULL)
+@@ -911,7 +943,8 @@ static int update_open_stateid(struct nf
+ 
+ 	if (delegation == NULL)
+ 		delegation = &deleg_cur->stateid;
+-	else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
++	else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
++			NFS4_STATEID_SIZE) != 0)
+ 		goto no_delegation_unlock;
+ 
+ 	nfs_mark_delegation_referenced(deleg_cur);
+@@ -973,7 +1006,8 @@ static struct nfs4_state *nfs4_try_open_
+ 			break;
+ 		}
+ 		/* Save the delegation */
+-		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
++		memcpy(stateid.u.data, delegation->stateid.u.data,
++		       sizeof(stateid.u.data));
+ 		rcu_read_unlock();
+ 		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
+ 		if (ret != 0)
+@@ -1127,10 +1161,13 @@ static int nfs4_open_recover(struct nfs4
+ 	 * Check if we need to update the current stateid.
+ 	 */
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+-	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
++	    memcmp(state->stateid.u.data, state->open_stateid.u.data,
++		   sizeof(state->stateid.u.data)) != 0) {
+ 		write_seqlock(&state->seqlock);
+ 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
++			memcpy(state->stateid.u.data,
++			       state->open_stateid.u.data,
++			       sizeof(state->stateid.u.data));
+ 		write_sequnlock(&state->seqlock);
+ 	}
+ 	return 0;
+@@ -1199,8 +1236,8 @@ static int _nfs4_open_delegation_recall(
+ 	if (IS_ERR(opendata))
+ 		return PTR_ERR(opendata);
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+-	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
+-			sizeof(opendata->o_arg.u.delegation.data));
++	memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
++			sizeof(opendata->o_arg.u.delegation.u.data));
+ 	ret = nfs4_open_recover(opendata, state);
+ 	nfs4_opendata_put(opendata);
+ 	return ret;
+@@ -1258,8 +1295,8 @@ static void nfs4_open_confirm_done(struc
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+-				sizeof(data->o_res.stateid.data));
++		memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
++				sizeof(data->o_res.stateid.u.data));
+ 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 		renew_lease(data->o_res.server, data->timestamp);
+ 		data->rpc_done = 1;
+@@ -1356,13 +1393,13 @@ static void nfs4_open_prepare(struct rpc
+ 	}
+ 	/* Update sequence id. */
+ 	data->o_arg.id = sp->so_owner_id.id;
+-	data->o_arg.clientid = sp->so_client->cl_clientid;
++	data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
+ 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
+ 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
+ 	}
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
++	if (nfs4_setup_sequence(data->o_arg.server, NULL,
+ 				&data->o_arg.seq_args,
+ 				&data->o_res.seq_res, 1, task))
+ 		return;
+@@ -1385,8 +1422,8 @@ static void nfs4_open_done(struct rpc_ta
+ 
+ 	data->rpc_status = task->tk_status;
+ 
+-	nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->o_res.seq_res))
++		return;
+ 
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+@@ -1539,9 +1576,8 @@ static int _nfs4_proc_open(struct nfs4_o
+ 	return 0;
+ }
+ 
+-static int nfs4_recover_expired_lease(struct nfs_server *server)
++int nfs4_recover_expired_lease(struct nfs_client *clp)
+ {
+-	struct nfs_client *clp = server->nfs_client;
+ 	unsigned int loop;
+ 	int ret;
+ 
+@@ -1557,6 +1593,7 @@ static int nfs4_recover_expired_lease(st
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(nfs4_recover_expired_lease);
+ 
+ /*
+  * OPEN_EXPIRED:
+@@ -1646,7 +1683,7 @@ static int _nfs4_do_open(struct inode *d
+ 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+ 		goto out_err;
+ 	}
+-	status = nfs4_recover_expired_lease(server);
++	status = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (status != 0)
+ 		goto err_put_state_owner;
+ 	if (path->dentry->d_inode != NULL)
+@@ -1773,7 +1810,7 @@ static int _nfs4_do_setattr(struct inode
+ 	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ 		/* Use that stateid */
+ 	} else if (state != NULL) {
+-		nfs4_copy_stateid(&arg.stateid, state, current->files);
++		nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
+ 	} else
+ 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+ 
+@@ -1838,7 +1875,8 @@ static void nfs4_close_done(struct rpc_t
+ 	struct nfs4_state *state = calldata->state;
+ 	struct nfs_server *server = NFS_SERVER(calldata->inode);
+ 
+-	nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+         /* hmm. we are done with the inode, and in the process of freeing
+@@ -1858,7 +1896,7 @@ static void nfs4_close_done(struct rpc_t
+ 			if (calldata->arg.fmode == 0)
+ 				break;
+ 		default:
+-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
++			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ 				rpc_restart_call_prepare(task);
+ 	}
+ 	nfs_release_seqid(calldata->arg.seqid);
+@@ -1903,7 +1941,7 @@ static void nfs4_close_prepare(struct rp
+ 
+ 	nfs_fattr_init(calldata->res.fattr);
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
++	if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), NULL,
+ 				&calldata->arg.seq_args, &calldata->res.seq_res,
+ 				1, task))
+ 		return;
+@@ -2323,6 +2361,9 @@ nfs4_proc_setattr(struct dentry *dentry,
+ 	struct nfs4_state *state = NULL;
+ 	int status;
+ 
++	if (pnfs_ld_layoutret_on_setattr(inode))
++		pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	nfs_fattr_init(fattr);
+ 	
+ 	/* Search for an existing open(O_WRITE) file */
+@@ -2648,8 +2689,9 @@ static int nfs4_proc_unlink_done(struct 
+ {
+ 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
+ 
+-	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
+-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
++	if (!nfs4_sequence_done(task, &res->seq_res))
++		return 0;
++	if (nfs4_async_handle_error(task, res->server, NULL, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+ 	nfs_post_op_update_inode(dir, res->dir_attr);
+@@ -3090,18 +3132,31 @@ static int nfs4_proc_pathconf(struct nfs
+ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+ {
+ 	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
+-	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
+ 
+-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, server->nfs_client);
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
+ 
+ 	nfs_invalidate_atime(data->inode);
+-	if (task->tk_status > 0)
++	if (task->tk_status > 0 && client == server->nfs_client)
+ 		renew_lease(server, data->timestamp);
+ 	return 0;
+ }
+@@ -3112,20 +3167,56 @@ static void nfs4_proc_read_setup(struct 
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ }
+ 
++static void pnfs4_update_write_done(struct nfs_inode *nfsi, struct nfs_write_data *data)
++{
++#ifdef CONFIG_NFS_V4_1
++	pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++	pnfs_need_layoutcommit(nfsi, data->args.context);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
+ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
++	struct nfs_server *server = NFS_SERVER(inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++#ifdef CONFIG_NFS_V4_1
++	/* restore original count after retry? */
++	if (data->pdata.orig_count) {
++		dprintk("%s: restoring original count %u\n", __func__,
++			data->pdata.orig_count);
++		data->args.count = data->pdata.orig_count;
++	}
++
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
++
++	/*
++	 * MDS write: renew lease
++	 * DS write: update lastbyte written, mark for layout commit
++	 */
+ 	if (task->tk_status >= 0) {
+-		renew_lease(NFS_SERVER(inode), data->timestamp);
+-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		if (client == server->nfs_client) {
++			renew_lease(server, data->timestamp);
++			nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		} else
++			pnfs4_update_write_done(NFS_I(inode), data);
+ 	}
+ 	return 0;
+ }
+@@ -3138,20 +3229,42 @@ static void nfs4_proc_write_setup(struct
+ 	data->res.server = server;
+ 	data->timestamp   = jiffies;
+ 
++#ifdef CONFIG_NFS_V4_1
++	/* writes to DS use pnfs vector */
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_WRITE];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+ }
+ 
+ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
++	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
++
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS commit\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL, NULL) == -EAGAIN) {
+ 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ 		return -EAGAIN;
+ 	}
+-	nfs_refresh_inode(inode, data->res.fattr);
++	if (client == server->nfs_client)
++		nfs_refresh_inode(inode, data->res.fattr);
+ 	return 0;
+ }
+ 
+@@ -3161,6 +3274,12 @@ static void nfs4_proc_commit_setup(struc
+ 	
+ 	data->args.bitmask = server->cache_consistency_bitmask;
+ 	data->res.server = server;
++#if defined(CONFIG_NFS_V4_1)
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_COMMIT];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
+@@ -3464,9 +3583,12 @@ static int nfs4_proc_set_acl(struct inod
+ }
+ 
+ static int
+-_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
++nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state, struct nfs_client *clp)
+ {
+-	if (!clp || task->tk_status >= 0)
++	if (!clp)
++		clp = server->nfs_client;
++
++	if (task->tk_status >= 0)
+ 		return 0;
+ 	switch(task->tk_status) {
+ 		case -NFS4ERR_ADMIN_REVOKED:
+@@ -3491,8 +3613,9 @@ _nfs4_async_handle_error(struct rpc_task
+ 		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ 		case -NFS4ERR_SEQ_FALSE_RETRY:
+ 		case -NFS4ERR_SEQ_MISORDERED:
+-			dprintk("%s ERROR %d, Reset session\n", __func__,
+-				task->tk_status);
++			dprintk("%s ERROR %d, Reset session. Exchangeid "
++				"flags 0x%x\n", __func__, task->tk_status,
++				clp->cl_exchange_flags);
+ 			nfs4_schedule_state_recovery(clp);
+ 			task->tk_status = 0;
+ 			return -EAGAIN;
+@@ -3512,6 +3635,8 @@ _nfs4_async_handle_error(struct rpc_task
+ 	task->tk_status = nfs4_map_errors(task->tk_status);
+ 	return 0;
+ do_state_recovery:
++	if (is_ds_only_client(clp))
++		return 0;
+ 	rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+ 	nfs4_schedule_state_recovery(clp);
+ 	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+@@ -3520,12 +3645,6 @@ do_state_recovery:
+ 	return -EAGAIN;
+ }
+ 
+-static int
+-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+-{
+-	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+-}
+-
+ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
+ 		unsigned short port, struct rpc_cred *cred,
+ 		struct nfs4_setclientid_res *res)
+@@ -3641,8 +3760,8 @@ static void nfs4_delegreturn_done(struct
+ {
+ 	struct nfs4_delegreturndata *data = calldata;
+ 
+-	nfs4_sequence_done(data->res.server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_STALE_STATEID:
+@@ -3651,8 +3770,8 @@ static void nfs4_delegreturn_done(struct
+ 		renew_lease(data->res.server, data->timestamp);
+ 		break;
+ 	default:
+-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
+-				-EAGAIN) {
++		if (nfs4_async_handle_error(task, data->res.server, NULL, NULL)
++				== -EAGAIN) {
+ 			nfs_restart_rpc(task, data->res.server->nfs_client);
+ 			return;
+ 		}
+@@ -3672,7 +3791,7 @@ static void nfs4_delegreturn_prepare(str
+ 
+ 	d_data = (struct nfs4_delegreturndata *)data;
+ 
+-	if (nfs4_setup_sequence(d_data->res.server->nfs_client,
++	if (nfs4_setup_sequence(d_data->res.server, NULL,
+ 				&d_data->args.seq_args,
+ 				&d_data->res.seq_res, 1, task))
+ 		return;
+@@ -3892,15 +4011,16 @@ static void nfs4_locku_done(struct rpc_t
+ {
+ 	struct nfs4_unlockdata *calldata = data;
+ 
+-	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
+-			   task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	switch (task->tk_status) {
+ 		case 0:
+-			memcpy(calldata->lsp->ls_stateid.data,
+-					calldata->res.stateid.data,
+-					sizeof(calldata->lsp->ls_stateid.data));
++			memcpy(calldata->lsp->ls_stateid.u.data,
++					calldata->res.stateid.u.data,
++					sizeof(calldata->lsp->ls_stateid.u.
++					       data));
+ 			renew_lease(calldata->server, calldata->timestamp);
+ 			break;
+ 		case -NFS4ERR_BAD_STATEID:
+@@ -3909,7 +4029,7 @@ static void nfs4_locku_done(struct rpc_t
+ 		case -NFS4ERR_EXPIRED:
+ 			break;
+ 		default:
+-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
++			if (nfs4_async_handle_error(task, calldata->server, NULL, NULL) == -EAGAIN)
+ 				nfs_restart_rpc(task,
+ 						 calldata->server->nfs_client);
+ 	}
+@@ -3927,7 +4047,7 @@ static void nfs4_locku_prepare(struct rp
+ 		return;
+ 	}
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence(calldata->server->nfs_client,
++	if (nfs4_setup_sequence(calldata->server, NULL,
+ 				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 1, task))
+ 		return;
+@@ -4082,7 +4202,8 @@ static void nfs4_lock_prepare(struct rpc
+ 	} else
+ 		data->arg.new_lock_owner = 0;
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
++	if (nfs4_setup_sequence(data->server, NULL,
++				&data->arg.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -4101,8 +4222,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 
+-	nfs4_sequence_done(data->server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	data->rpc_status = task->tk_status;
+ 	if (RPC_ASSASSINATED(task))
+@@ -4114,8 +4235,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 			goto out;
+ 	}
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+-					sizeof(data->lsp->ls_stateid.data));
++		memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
++					sizeof(data->lsp->ls_stateid.u.data));
+ 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ 		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ 	}
+@@ -4424,6 +4545,34 @@ out:
+ 	return err;
+ }
+ 
++static void nfs4_release_lockowner_release(void *calldata)
++{
++	kfree(calldata);
++}
++
++const struct rpc_call_ops nfs4_release_lockowner_ops = {
++	.rpc_release = nfs4_release_lockowner_release,
++};
++
++void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
++{
++	struct nfs_server *server = lsp->ls_state->owner->so_server;
++	struct nfs_release_lockowner_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
++	};
++
++	if (server->nfs_client->cl_mvops->minor_version != 0)
++		return;
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (!args)
++		return;
++	args->lock_owner.clientid = server->nfs_client->cl_clientid;
++	args->lock_owner.id = lsp->ls_id.id;
++	msg.rpc_argp = args;
++	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
++}
++
+ #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+ 
+ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+@@ -4526,7 +4675,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	nfs4_verifier verifier;
+ 	struct nfs41_exchange_id_args args = {
+ 		.client = clp,
+-		.flags = clp->cl_exchange_flags,
++		.flags = clp->cl_exchange_flags & ~EXCHGID4_FLAG_CONFIRMED_R,
+ 	};
+ 	struct nfs41_exchange_id_res res = {
+ 		.client = clp,
+@@ -4574,6 +4723,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	dprintk("<-- %s status= %d\n", __func__, status);
+ 	return status;
+ }
++EXPORT_SYMBOL(nfs4_proc_exchange_id);
+ 
+ struct nfs4_get_lease_time_data {
+ 	struct nfs4_get_lease_time_args *args;
+@@ -4611,7 +4761,8 @@ static void nfs4_get_lease_time_done(str
+ 			(struct nfs4_get_lease_time_data *)calldata;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
++	if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
++		return;
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_DELAY:
+ 	case -NFS4ERR_GRACE:
+@@ -4805,13 +4956,6 @@ struct nfs4_session *nfs4_alloc_session(
+ 	if (!session)
+ 		return NULL;
+ 
+-	/*
+-	 * The create session reply races with the server back
+-	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+-	 * so that the client back channel can find the
+-	 * nfs_client struct
+-	 */
+-	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	init_completion(&session->complete);
+ 
+ 	tbl = &session->fc_slot_table;
+@@ -4824,6 +4968,8 @@ struct nfs4_session *nfs4_alloc_session(
+ 	spin_lock_init(&tbl->slot_tbl_lock);
+ 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ 
++	session->session_state = 1<<NFS4_SESSION_INITING;
++
+ 	session->clp = clp;
+ 	return session;
+ }
+@@ -5040,6 +5186,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (!nfs4_has_session(clp))
+ 		return 0;
+ 
++	session = clp->cl_session;
++	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
++		return 0;
++
+ 	rsize = server->rsize;
+ 	if (rsize == 0)
+ 		rsize = NFS_MAX_FILE_IO_SIZE;
+@@ -5047,11 +5197,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (wsize == 0)
+ 		wsize = NFS_MAX_FILE_IO_SIZE;
+ 
+-	session = clp->cl_session;
+ 	session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ 	session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ 
+-	ret = nfs4_recover_expired_lease(server);
++	ret = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (!ret)
+ 		ret = nfs4_check_client_ready(clp);
+ 	return ret;
+@@ -5060,69 +5209,70 @@ int nfs4_init_session(struct nfs_server 
+ /*
+  * Renew the cl_session lease.
+  */
+-static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+-{
++struct nfs4_sequence_data {
++	struct nfs_client *clp;
+ 	struct nfs4_sequence_args args;
+ 	struct nfs4_sequence_res res;
+-
+-	struct rpc_message msg = {
+-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+-		.rpc_argp = &args,
+-		.rpc_resp = &res,
+-		.rpc_cred = cred,
+-	};
+-
+-	args.sa_cache_this = 0;
+-
+-	return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
+-				       &res, args.sa_cache_this, 1);
+-}
++};
+ 
+ static void nfs41_sequence_release(void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(calldata);
++}
++
++static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
+ }
+ 
+ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+-	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
++	if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
++		return;
+ 
+ 	if (task->tk_status < 0) {
+ 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+ 		if (atomic_read(&clp->cl_count) == 1)
+ 			goto out;
+ 
+-		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+-								== -EAGAIN) {
+-			nfs_restart_rpc(task, clp);
++		if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
++			rpc_restart_call_prepare(task);
+ 			return;
+ 		}
+ 	}
+ 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
+ out:
+-	kfree(task->tk_msg.rpc_argp);
+-	kfree(task->tk_msg.rpc_resp);
+-
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 	struct nfs4_sequence_args *args;
+ 	struct nfs4_sequence_res *res;
+ 
+-	clp = (struct nfs_client *)data;
+ 	args = task->tk_msg.rpc_argp;
+ 	res = task->tk_msg.rpc_resp;
+ 
+-	if (nfs4_setup_sequence(clp, args, res, 0, task))
++	if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
+@@ -5133,32 +5283,67 @@ static const struct rpc_call_ops nfs41_s
+ 	.rpc_release = nfs41_sequence_release,
+ };
+ 
+-static int nfs41_proc_async_sequence(struct nfs_client *clp,
+-				     struct rpc_cred *cred)
++static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+ {
+-	struct nfs4_sequence_args *args;
+-	struct nfs4_sequence_res *res;
++	struct nfs4_sequence_data *calldata;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+ 		.rpc_cred = cred,
+ 	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = clp->cl_rpcclient,
++		.rpc_message = &msg,
++		.callback_ops = &nfs41_sequence_ops,
++		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
++	};
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+-		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_NOFS);
+-	res = kzalloc(sizeof(*res), GFP_NOFS);
+-	if (!args || !res) {
+-		kfree(args);
+-		kfree(res);
++		return ERR_PTR(-EIO);
++	calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
++	if (calldata == NULL) {
+ 		nfs_put_client(clp);
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 	}
+-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+-	msg.rpc_argp = args;
+-	msg.rpc_resp = res;
++	calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	msg.rpc_argp = &calldata->args;
++	msg.rpc_resp = &calldata->res;
++	calldata->clp = clp;
++	task_setup_data.callback_data = calldata;
+ 
+-	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			      &nfs41_sequence_ops, (void *)clp);
++	return rpc_run_task(&task_setup_data);
++}
++
++static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret = 0;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task))
++		ret = PTR_ERR(task);
++	else
++		rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
++}
++
++static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	ret = rpc_wait_for_completion_task(task);
++	if (!ret)
++		ret = task->tk_status;
++	rpc_put_task(task);
++out:
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
+ }
+ 
+ struct nfs4_reclaim_complete_data {
+@@ -5172,13 +5357,31 @@ static void nfs4_reclaim_complete_prepar
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+ 
+ 	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+-	if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
++	if (nfs41_setup_sequence(calldata->clp->cl_session,
++				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 0, task))
+ 		return;
+ 
+ 	rpc_call_start(task);
+ }
+ 
++static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case 0:
++	case -NFS4ERR_COMPLETE_ALREADY:
++	case -NFS4ERR_WRONG_CRED: /* What to do here? */
++		break;
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
++}
++
+ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
+ {
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+@@ -5186,32 +5389,13 @@ static void nfs4_reclaim_complete_done(s
+ 	struct nfs4_sequence_res *res = &calldata->res.seq_res;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(clp, res, task->tk_status);
+-	switch (task->tk_status) {
+-	case 0:
+-	case -NFS4ERR_COMPLETE_ALREADY:
+-		break;
+-	case -NFS4ERR_BADSESSION:
+-	case -NFS4ERR_DEADSESSION:
+-		/*
+-		 * Handle the session error, but do not retry the operation, as
+-		 * we have no way of telling whether the clientid had to be
+-		 * reset before we got our reply.  If reset, a new wave of
+-		 * reclaim operations will follow, containing their own reclaim
+-		 * complete.  We don't want our retry to get on the way of
+-		 * recovery by incorrectly indicating to the server that we're
+-		 * done reclaiming state since the process had to be restarted.
+-		 */
+-		_nfs4_async_handle_error(task, NULL, clp, NULL);
+-		break;
+-	default:
+-		if (_nfs4_async_handle_error(
+-				task, NULL, clp, NULL) == -EAGAIN) {
+-			rpc_restart_call_prepare(task);
+-			return;
+-		}
+-	}
++	if (!nfs41_sequence_done(task, res))
++		return;
+ 
++	if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
++		rpc_restart_call_prepare(task);
++		return;
++	}
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+@@ -5268,6 +5452,404 @@ out:
+ 	dprintk("<-- %s status=%d\n", __func__, status);
+ 	return status;
+ }
++
++static void
++nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
++				&lgp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	pnfs_get_layout_done(lgp, task->tk_status);
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	lgp->status = task->tk_status;
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_layoutget_release(void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++
++	dprintk("--> %s\n", __func__);
++	pnfs_layout_release(NFS_I(lgp->args.inode)->layout, NULL);
++	if (lgp->res.layout.buf != NULL)
++		free_page((unsigned long) lgp->res.layout.buf);
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_layoutget_call_ops = {
++	.rpc_call_prepare = nfs4_layoutget_prepare,
++	.rpc_call_done = nfs4_layoutget_done,
++	.rpc_release = nfs4_layoutget_release,
++};
++
++/* FIXME: We need to call nfs4_handle_exception
++ * and deal with retries.
++ * Currently we can't since we release lgp and its contents.
++ */
++static int _nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
++		.rpc_argp = &lgp->args,
++		.rpc_resp = &lgp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutget_call_ops,
++		.callback_data = lgp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
++	if (lgp->res.layout.buf == NULL) {
++		nfs4_layoutget_release(lgp);
++		return -ENOMEM;
++	}
++
++	lgp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = lgp->status;
++	if (status != 0)
++		goto out;
++	status = pnfs_layout_process(lgp);
++out:
++	rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server, _nfs4_proc_layoutget(lgp),
++					    &exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *data)
++{
++	struct nfs4_layoutcommit_data *ldata =
++		(struct nfs4_layoutcommit_data *)data;
++	struct nfs_server *server = NFS_SERVER(ldata->args.inode);
++
++	if (nfs4_setup_sequence(server, NULL, &ldata->args.seq_args,
++				&ldata->res.seq_res, 1, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void
++nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutcommit_data *data =
++		(struct nfs4_layoutcommit_data *)calldata;
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	data->status = task->tk_status;
++}
++
++static void nfs4_layoutcommit_release(void *lcdata)
++{
++	struct nfs4_layoutcommit_data *data =
++		(struct nfs4_layoutcommit_data *)lcdata;
++
++	put_rpccred(data->cred);
++	pnfs_cleanup_layoutcommit(lcdata);
++	pnfs_layoutcommit_free(lcdata);
++	/* Matched by get_layout in pnfs_layoutcommit_inode */
++	put_layout(data->args.inode);
++}
++
++static const struct rpc_call_ops nfs4_layoutcommit_ops = {
++	.rpc_call_prepare = nfs4_layoutcommit_prepare,
++	.rpc_call_done = nfs4_layoutcommit_done,
++	.rpc_release = nfs4_layoutcommit_release,
++};
++
++/* Execute a layoutcommit to the server */
++static int
++_nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
++{
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
++		.rpc_argp = &data->args,
++		.rpc_resp = &data->res,
++		.rpc_cred = data->cred,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.task = &data->task,
++		.rpc_client = NFS_CLIENT(data->args.inode),
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutcommit_ops,
++		.callback_data = data,
++		.flags = RPC_TASK_ASYNC,
++	};
++	struct rpc_task *task;
++	int status = 0;
++
++	dprintk("NFS: %4d initiating layoutcommit call. %llu@%llu lbw: %llu "
++		"type: %d issync %d\n",
++		data->task.tk_pid,
++		data->args.range.length,
++		data->args.range.offset,
++		data->args.lastbytewritten,
++		data->args.layout_type, issync);
++
++	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = data->status;
++out:
++	dprintk("%s: status %d\n", __func__, status);
++	rpc_put_task(task);
++	return 0;
++}
++
++int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
++{
++	struct nfs4_exception exception = { };
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++					_nfs4_proc_layoutcommit(data, issync),
++					&exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void
++nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
++				&lrp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_layoutreturn_release(void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
++
++	dprintk("--> %s return_type %d lo %p\n", __func__,
++		lrp->args.return_type, lo);
++
++	if (lrp->args.return_type == RETURN_FILE) {
++		if (!lrp->res.lrs_present)
++			pnfs_set_layout_stateid(lo, &zero_stateid);
++		pnfs_layout_release(lo, &lrp->args.range);
++	}
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
++	.rpc_call_prepare = nfs4_layoutreturn_prepare,
++	.rpc_call_done = nfs4_layoutreturn_done,
++	.rpc_release = nfs4_layoutreturn_release,
++};
++
++int _nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
++{
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
++		.rpc_argp = &lrp->args,
++		.rpc_resp = &lrp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutreturn_call_ops,
++		.callback_data = lrp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++	lrp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = task->tk_status;
++out:
++	dprintk("<-- %s\n", __func__);
++	rpc_put_task(task);
++	return status;
++}
++
++int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
++{
++	struct nfs_server *server = NFS_SERVER(lrp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_proc_layoutreturn(lrp, issync),
++				&exception);
++	} while (exception.retry);
++
++	return err;
++}
++
++/*
++ * Retrieve the list of Data Server devices from the MDS.
++ */
++static int _nfs4_getdevicelist(struct nfs_server *server,
++				    const struct nfs_fh *fh,
++				    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_getdevicelist_args args = {
++		.fh = fh,
++		.layoutclass = server->pnfs_curr_ld->id,
++	};
++	struct nfs4_getdevicelist_res res = {
++		.devlist = devlist,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_proc_getdevicelist(struct nfs_server *server,
++			    const struct nfs_fh *fh,
++			    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_exception exception = { };
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_getdevicelist(server, fh, devlist),
++				&exception);
++	} while (exception.retry);
++
++	dprintk("nfs4_pnfs_getdevlist: err=%d, num_devs=%u\n",
++		err, devlist->num_devs);
++
++	return err;
++}
++
++int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
++{
++	struct nfs4_getdeviceinfo_args args = {
++		.pdev = pdev,
++	};
++	struct nfs4_getdeviceinfo_res res = {
++		.pdev = pdev,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++
++	return status;
++}
++
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+@@ -5325,28 +5907,30 @@ struct nfs4_state_maintenance_ops nfs41_
+ };
+ #endif
+ 
+-/*
+- * Per minor version reboot and network partition recovery ops
+- */
+-
+-struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
+-	&nfs40_reboot_recovery_ops,
+-#if defined(CONFIG_NFS_V4_1)
+-	&nfs41_reboot_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
++	.minor_version = 0,
++	.call_sync = _nfs4_call_sync,
++	.validate_stateid = nfs4_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
++	.state_renewal_ops = &nfs40_state_renewal_ops,
+ };
+ 
+-struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
+-	&nfs40_nograce_recovery_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_nograce_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
++	.minor_version = 1,
++	.call_sync = _nfs4_call_sync_session,
++	.validate_stateid = nfs41_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
++	.state_renewal_ops = &nfs41_state_renewal_ops,
+ };
++#endif
+ 
+-struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
+-	&nfs40_state_renewal_ops,
++const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
++	[0] = &nfs_v4_0_minor_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_state_renewal_ops,
++	[1] = &nfs_v4_1_minor_ops,
+ #endif
+ };
+ 
+@@ -5364,6 +5948,7 @@ const struct nfs_rpc_ops nfs_v4_clientop
+ 	.dentry_ops	= &nfs4_dentry_operations,
+ 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+ 	.file_inode_ops	= &nfs4_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs4_proc_get_root,
+ 	.getattr	= nfs4_proc_getattr,
+ 	.setattr	= nfs4_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-31 20:42:05.526213255 -0400
+@@ -54,17 +54,17 @@
+ void
+ nfs4_renew_state(struct work_struct *work)
+ {
+-	struct nfs4_state_maintenance_ops *ops;
++	const struct nfs4_state_maintenance_ops *ops;
+ 	struct nfs_client *clp =
+ 		container_of(work, struct nfs_client, cl_renewd.work);
+ 	struct rpc_cred *cred;
+ 	long lease;
+ 	unsigned long last, now;
+ 
+-	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
++	ops = clp->cl_mvops->state_renewal_ops;
+ 	dprintk("%s: start\n", __func__);
+ 	/* Are there any active superblocks? */
+-	if (list_empty(&clp->cl_superblocks))
++	if (list_empty(&clp->cl_superblocks) && !is_ds_only_client(clp))
+ 		goto out;
+ 	spin_lock(&clp->cl_lock);
+ 	lease = clp->cl_lease_time;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-31 20:41:19.158078621 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-31 20:42:05.527232994 -0400
+@@ -48,11 +48,13 @@
+ #include <linux/random.h>
+ #include <linux/workqueue.h>
+ #include <linux/bitops.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define OPENOWNER_POOL_SIZE	8
+ 
+@@ -126,6 +128,11 @@ static int nfs41_setup_state_renewal(str
+ 	int status;
+ 	struct nfs_fsinfo fsinfo;
+ 
++	if (is_ds_only_client(clp)) {
++		nfs4_schedule_state_renewal(clp);
++		return 0;
++	}
++
+ 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+ 	if (status == 0) {
+ 		/* Update lease time and schedule renewal */
+@@ -145,7 +152,9 @@ static void nfs4_end_drain_session(struc
+ 	struct nfs4_session *ses = clp->cl_session;
+ 	int max_slots;
+ 
+-	if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
++	if (ses == NULL)
++		return;
++	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		spin_lock(&ses->fc_slot_table.slot_tbl_lock);
+ 		max_slots = ses->fc_slot_table.max_slots;
+ 		while (max_slots--) {
+@@ -167,7 +176,7 @@ static int nfs4_begin_drain_session(stru
+ 	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
++	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ 	if (tbl->highest_used_slotid != -1) {
+ 		INIT_COMPLETION(ses->complete);
+ 		spin_unlock(&tbl->slot_tbl_lock);
+@@ -371,7 +380,6 @@ nfs4_alloc_state_owner(void)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+ 	INIT_LIST_HEAD(&sp->so_states);
+-	INIT_LIST_HEAD(&sp->so_delegations);
+ 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+ 	sp->so_seqid.sequence = &sp->so_sequence;
+ 	spin_lock_init(&sp->so_sequence.lock);
+@@ -384,7 +392,7 @@ static void
+ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+ {
+ 	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+-		struct nfs_client *clp = sp->so_client;
++		struct nfs_client *clp = sp->so_server->nfs_client;
+ 
+ 		spin_lock(&clp->cl_lock);
+ 		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+@@ -406,7 +414,6 @@ struct nfs4_state_owner *nfs4_get_state_
+ 	new = nfs4_alloc_state_owner();
+ 	if (new == NULL)
+ 		return NULL;
+-	new->so_client = clp;
+ 	new->so_server = server;
+ 	new->so_cred = cred;
+ 	spin_lock(&clp->cl_lock);
+@@ -423,7 +430,7 @@ struct nfs4_state_owner *nfs4_get_state_
+ 
+ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+ {
+-	struct nfs_client *clp = sp->so_client;
++	struct nfs_client *clp = sp->so_server->nfs_client;
+ 	struct rpc_cred *cred = sp->so_cred;
+ 
+ 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+@@ -583,8 +590,24 @@ static void __nfs4_close(struct path *pa
+ 	if (!call_close) {
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+-	} else
++	} else {
++		u32 roc_iomode;
++		struct nfs_inode *nfsi = NFS_I(state->inode);
++
++		if (has_layout(nfsi) &&
++		    (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
++			struct pnfs_layout_range range = {
++				.iomode = roc_iomode,
++				.offset = 0,
++				.length = NFS4_MAX_UINT64,
++			};
++
++			pnfs_return_layout(state->inode, &range, NULL,
++					   RETURN_FILE, wait);
++		}
++
+ 		nfs4_do_close(path, state, gfp_mask, wait);
++	}
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+@@ -602,12 +625,21 @@ void nfs4_close_sync(struct path *path, 
+  * that is compatible with current->files
+  */
+ static struct nfs4_lock_state *
+-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *pos;
+ 	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+-		if (pos->ls_owner != fl_owner)
++		if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ 			continue;
++		switch (pos->ls_owner.lo_type) {
++		case NFS4_POSIX_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.posix_owner != fl_owner)
++				continue;
++			break;
++		case NFS4_FLOCK_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.flock_owner != fl_pid)
++				continue;
++		}
+ 		atomic_inc(&pos->ls_count);
+ 		return pos;
+ 	}
+@@ -619,10 +651,10 @@ __nfs4_find_lock_state(struct nfs4_state
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp;
+-	struct nfs_client *clp = state->owner->so_client;
++	struct nfs_client *clp = state->owner->so_server->nfs_client;
+ 
+ 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+@@ -633,7 +665,18 @@ static struct nfs4_lock_state *nfs4_allo
+ 	lsp->ls_seqid.sequence = &lsp->ls_sequence;
+ 	atomic_set(&lsp->ls_count, 1);
+ 	lsp->ls_state = state;
+-	lsp->ls_owner = fl_owner;
++	lsp->ls_owner.lo_type = type;
++	switch (lsp->ls_owner.lo_type) {
++	case NFS4_FLOCK_LOCK_TYPE:
++		lsp->ls_owner.lo_u.flock_owner = fl_pid;
++		break;
++	case NFS4_POSIX_LOCK_TYPE:
++		lsp->ls_owner.lo_u.posix_owner = fl_owner;
++		break;
++	default:
++		kfree(lsp);
++		return NULL;
++	}
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ 	spin_unlock(&clp->cl_lock);
+@@ -643,7 +686,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 
+ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+ {
+-	struct nfs_client *clp = lsp->ls_state->owner->so_client;
++	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ 
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+@@ -657,13 +700,13 @@ static void nfs4_free_lock_state(struct 
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
++static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp, *new = NULL;
+ 	
+ 	for(;;) {
+ 		spin_lock(&state->state_lock);
+-		lsp = __nfs4_find_lock_state(state, owner);
++		lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ 		if (lsp != NULL)
+ 			break;
+ 		if (new != NULL) {
+@@ -674,7 +717,7 @@ static struct nfs4_lock_state *nfs4_get_
+ 			break;
+ 		}
+ 		spin_unlock(&state->state_lock);
+-		new = nfs4_alloc_lock_state(state, owner);
++		new = nfs4_alloc_lock_state(state, owner, pid, type);
+ 		if (new == NULL)
+ 			return NULL;
+ 	}
+@@ -701,6 +744,8 @@ void nfs4_put_lock_state(struct nfs4_loc
+ 	if (list_empty(&state->lock_states))
+ 		clear_bit(LK_STATE_IN_USE, &state->flags);
+ 	spin_unlock(&state->state_lock);
++	if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
++		nfs4_release_lockowner(lsp);
+ 	nfs4_free_lock_state(lsp);
+ }
+ 
+@@ -728,7 +773,12 @@ int nfs4_set_lock_state(struct nfs4_stat
+ 
+ 	if (fl->fl_ops != NULL)
+ 		return 0;
+-	lsp = nfs4_get_lock_state(state, fl->fl_owner);
++	if (fl->fl_flags & FL_POSIX)
++		lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
++	else if (fl->fl_flags & FL_FLOCK)
++		lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
++	else
++		return -EINVAL;
+ 	if (lsp == NULL)
+ 		return -ENOMEM;
+ 	fl->fl_u.nfs4_fl.owner = lsp;
+@@ -740,7 +790,7 @@ int nfs4_set_lock_state(struct nfs4_stat
+  * Byte-range lock aware utility to initialize the stateid of read/write
+  * requests.
+  */
+-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
++void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+ {
+ 	struct nfs4_lock_state *lsp;
+ 	int seq;
+@@ -753,7 +803,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 		return;
+ 
+ 	spin_lock(&state->state_lock);
+-	lsp = __nfs4_find_lock_state(state, fl_owner);
++	lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ 	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ 		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ 	spin_unlock(&state->state_lock);
+@@ -1031,8 +1081,8 @@ restart:
+ 				 * Open state on this file cannot be recovered
+ 				 * All we can do is revert to using the zero stateid.
+ 				 */
+-				memset(state->stateid.data, 0,
+-					sizeof(state->stateid.data));
++				memset(state->stateid.u.data, 0,
++					sizeof(state->stateid.u.data));
+ 				/* Mark the file as being 'closed' */
+ 				state->state = 0;
+ 				break;
+@@ -1041,11 +1091,11 @@ restart:
+ 			case -NFS4ERR_BAD_STATEID:
+ 			case -NFS4ERR_RECLAIM_BAD:
+ 			case -NFS4ERR_RECLAIM_CONFLICT:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 				break;
+ 			case -NFS4ERR_EXPIRED:
+ 			case -NFS4ERR_NO_GRACE:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 			case -NFS4ERR_STALE_CLIENTID:
+ 			case -NFS4ERR_BADSESSION:
+ 			case -NFS4ERR_BADSLOT:
+@@ -1120,8 +1170,7 @@ static void nfs4_state_end_reclaim_reboo
+ 	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ 		return;
+ 
+-	nfs4_reclaim_complete(clp,
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++	nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+ 
+ 	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ 		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+@@ -1211,8 +1260,8 @@ restart:
+ static int nfs4_check_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_maintenance_ops *ops =
+-		nfs4_state_renewal_ops[clp->cl_minorversion];
++	const struct nfs4_state_maintenance_ops *ops =
++		clp->cl_mvops->state_renewal_ops;
+ 	int status = -NFS4ERR_EXPIRED;
+ 
+ 	/* Is the client already known to have an expired lease? */
+@@ -1235,8 +1284,8 @@ out:
+ static int nfs4_reclaim_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_recovery_ops *ops =
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion];
++	const struct nfs4_state_recovery_ops *ops =
++		clp->cl_mvops->reboot_recovery_ops;
+ 	int status = -ENOENT;
+ 
+ 	cred = ops->get_clid_cred(clp);
+@@ -1421,6 +1470,7 @@ static void nfs4_state_manager(struct nf
+ 			}
+ 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ 			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
++			pnfs_destroy_all_layouts(clp);
+ 		}
+ 
+ 		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+@@ -1444,7 +1494,7 @@ static void nfs4_state_manager(struct nf
+ 		/* First recover reboot state... */
+ 		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->reboot_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ 				continue;
+@@ -1458,7 +1508,7 @@ static void nfs4_state_manager(struct nf
+ 		/* Now recover expired state... */
+ 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_nograce_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->nograce_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-31 20:41:19.160150207 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-31 20:42:05.530092192 -0400
+@@ -50,8 +50,10 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+ 
+@@ -89,7 +91,7 @@ static int nfs4_stat_to_errno(int);
+ #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
+ #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
+ 				((3+NFS4_FHSIZE) >> 2))
+-#define nfs4_fattr_bitmap_maxsz 3
++#define nfs4_fattr_bitmap_maxsz 4
+ #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+ #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+ #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+@@ -111,7 +113,11 @@ static int nfs4_stat_to_errno(int);
+ #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+ #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
+ #define encode_fsinfo_maxsz	(encode_getattr_maxsz)
+-#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
++/* The 5 accounts for the PNFS attributes, and assumes that at most three
++ * layout types will be returned.
++ */
++#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + \
++				 nfs4_fattr_bitmap_maxsz + 8 + 5)
+ #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
+ #define decode_renew_maxsz	(op_decode_hdr_maxsz)
+ #define encode_setclientid_maxsz \
+@@ -202,14 +208,17 @@ static int nfs4_stat_to_errno(int);
+ #define encode_link_maxsz	(op_encode_hdr_maxsz + \
+ 				nfs4_name_maxsz)
+ #define decode_link_maxsz	(op_decode_hdr_maxsz + decode_change_info_maxsz)
++#define encode_lockowner_maxsz	(7)
+ #define encode_lock_maxsz	(op_encode_hdr_maxsz + \
+ 				 7 + \
+-				 1 + encode_stateid_maxsz + 8)
++				 1 + encode_stateid_maxsz + 1 + \
++				 encode_lockowner_maxsz)
+ #define decode_lock_denied_maxsz \
+ 				(8 + decode_lockowner_maxsz)
+ #define decode_lock_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+-#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 12)
++#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 5 + \
++				encode_lockowner_maxsz)
+ #define decode_lockt_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+ #define encode_locku_maxsz	(op_encode_hdr_maxsz + 3 + \
+@@ -217,6 +226,11 @@ static int nfs4_stat_to_errno(int);
+ 				 4)
+ #define decode_locku_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_stateid_maxsz)
++#define encode_release_lockowner_maxsz \
++				(op_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define decode_release_lockowner_maxsz \
++				(op_decode_hdr_maxsz)
+ #define encode_access_maxsz	(op_encode_hdr_maxsz + 1)
+ #define decode_access_maxsz	(op_decode_hdr_maxsz + 2)
+ #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
+@@ -302,6 +316,35 @@ static int nfs4_stat_to_errno(int);
+ 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+ #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
+ #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
++#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
++				encode_verifier_maxsz)
++#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 +  \
++				decode_verifier_maxsz +             \
++				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM *  \
++				NFS4_PNFS_DEVICEID4_SIZE))
++#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
++				XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE))
++#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
++				4 /*layout type */ + \
++				4 /* opaque devaddr4 length */ +\
++				4 /* notification bitmap length */ + \
++				4 /* notification bitmap */)
++#define encode_layoutget_maxsz	(op_encode_hdr_maxsz + 10 + \
++				encode_stateid_maxsz)
++#define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
++				decode_stateid_maxsz + \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
++#define encode_layoutcommit_maxsz (18 +                           \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
++				op_encode_hdr_maxsz +          \
++				encode_stateid_maxsz)
++#define decode_layoutcommit_maxsz (3 + op_decode_hdr_maxsz)
++#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
++				encode_stateid_maxsz + \
++				1 /* FIXME: opaque lrf_body always empty at
++				   *the moment */)
++#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
++				1 + decode_stateid_maxsz)
+ #else /* CONFIG_NFS_V4_1 */
+ #define encode_sequence_maxsz	0
+ #define decode_sequence_maxsz	0
+@@ -471,6 +514,12 @@ static int nfs4_stat_to_errno(int);
+ 				decode_sequence_maxsz + \
+ 				decode_putfh_maxsz + \
+ 				decode_locku_maxsz)
++#define NFS4_enc_release_lockowner_sz \
++				(compound_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define NFS4_dec_release_lockowner_sz \
++				(compound_decode_hdr_maxsz + \
++				 decode_lockowner_maxsz)
+ #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+ 				encode_sequence_maxsz + \
+ 				encode_putfh_maxsz + \
+@@ -685,6 +734,60 @@ static int nfs4_stat_to_errno(int);
+ #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
+ 					 decode_sequence_maxsz + \
+ 					 decode_reclaim_complete_maxsz)
++#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_getdevicelist_maxsz)
++#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_getdevicelist_maxsz)
++#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
++				encode_sequence_maxsz +\
++				encode_getdeviceinfo_maxsz)
++#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz +    \
++				decode_sequence_maxsz + \
++				decode_getdeviceinfo_maxsz)
++#define NFS4_enc_layoutget_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz +        \
++				encode_layoutget_maxsz)
++#define NFS4_dec_layoutget_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz +        \
++				decode_layoutget_maxsz)
++#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_layoutcommit_maxsz + \
++				encode_getattr_maxsz)
++#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutcommit_maxsz + \
++				decode_getattr_maxsz)
++#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_layoutreturn_maxsz)
++#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutreturn_maxsz)
++#define NFS4_enc_dswrite_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_write_maxsz)
++#define NFS4_dec_dswrite_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_write_maxsz)
++#define NFS4_enc_dscommit_sz	(compound_encode_hdr_maxsz + \
++				encode_putfh_maxsz + \
++				encode_commit_maxsz)
++#define NFS4_dec_dscommit_sz	(compound_decode_hdr_maxsz + \
++				decode_putfh_maxsz + \
++				decode_commit_maxsz)
+ 
+ const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ 				      compound_encode_hdr_maxsz +
+@@ -915,7 +1018,7 @@ static void encode_close(struct xdr_stre
+ 	p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_CLOSE);
+ 	*p++ = cpu_to_be32(arg->seqid->sequence->counter);
+-	xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_close_maxsz;
+ }
+@@ -989,6 +1092,35 @@ static void encode_getattr_two(struct xd
+ 	hdr->replen += decode_getattr_maxsz;
+ }
+ 
++static void
++encode_getattr_three(struct xdr_stream *xdr,
++		     uint32_t bm0, uint32_t bm1, uint32_t bm2,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_GETATTR);
++	if (bm2) {
++		p = reserve_space(xdr, 16);
++		*p++ = cpu_to_be32(3);
++		*p++ = cpu_to_be32(bm0);
++		*p++ = cpu_to_be32(bm1);
++		*p = cpu_to_be32(bm2);
++	} else if (bm1) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(2);
++		*p++ = cpu_to_be32(bm0);
++		*p = cpu_to_be32(bm1);
++	} else {
++		p = reserve_space(xdr, 8);
++		*p++ = cpu_to_be32(1);
++		*p = cpu_to_be32(bm0);
++	}
++	hdr->nops++;
++	hdr->replen += decode_getattr_maxsz;
++}
++
+ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+ 	encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
+@@ -997,8 +1129,11 @@ static void encode_getfattr(struct xdr_s
+ 
+ static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+-	encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+-			   bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
++	encode_getattr_three(xdr,
++			     bitmask[0] & nfs4_fsinfo_bitmap[0],
++			     bitmask[1] & nfs4_fsinfo_bitmap[1],
++			     bitmask[2] & nfs4_fsinfo_bitmap[2],
++			     hdr);
+ }
+ 
+ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+@@ -1042,6 +1177,17 @@ static inline uint64_t nfs4_lock_length(
+ 	return fl->fl_end - fl->fl_start + 1;
+ }
+ 
++static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 28);
++	p = xdr_encode_hyper(p, lowner->clientid);
++	*p++ = cpu_to_be32(16);
++	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
++	xdr_encode_hyper(p, lowner->id);
++}
++
+ /*
+  * opcode,type,reclaim,offset,length,new_lock_owner = 32
+  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+@@ -1058,18 +1204,16 @@ static void encode_lock(struct xdr_strea
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	*p = cpu_to_be32(args->new_lock_owner);
+ 	if (args->new_lock_owner){
+-		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
++		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 		*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+-		p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
++					    NFS4_STATEID_SIZE);
+ 		*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+-		p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-		*p++ = cpu_to_be32(16);
+-		p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-		xdr_encode_hyper(p, args->lock_owner.id);
++		encode_lockowner(xdr, &args->lock_owner);
+ 	}
+ 	else {
+ 		p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+-		p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
+ 		*p = cpu_to_be32(args->lock_seqid->sequence->counter);
+ 	}
+ 	hdr->nops++;
+@@ -1080,15 +1224,12 @@ static void encode_lockt(struct xdr_stre
+ {
+ 	__be32 *p;
+ 
+-	p = reserve_space(xdr, 52);
++	p = reserve_space(xdr, 24);
+ 	*p++ = cpu_to_be32(OP_LOCKT);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+-	p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-	*p++ = cpu_to_be32(16);
+-	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-	xdr_encode_hyper(p, args->lock_owner.id);
++	encode_lockowner(xdr, &args->lock_owner);
+ 	hdr->nops++;
+ 	hdr->replen += decode_lockt_maxsz;
+ }
+@@ -1101,13 +1242,25 @@ static void encode_locku(struct xdr_stre
+ 	*p++ = cpu_to_be32(OP_LOCKU);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	*p++ = cpu_to_be32(args->seqid->sequence->counter);
+-	p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
++				    NFS4_STATEID_SIZE);
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	hdr->nops++;
+ 	hdr->replen += decode_locku_maxsz;
+ }
+ 
++static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
++	encode_lockowner(xdr, lowner);
++	hdr->nops++;
++	hdr->replen += decode_release_lockowner_maxsz;
++}
++
+ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
+ {
+ 	int len = name->len;
+@@ -1172,7 +1325,7 @@ static inline void encode_createmode(str
+ 		break;
+ 	default:
+ 		clp = arg->server->nfs_client;
+-		if (clp->cl_minorversion > 0) {
++		if (clp->cl_mvops->minor_version > 0) {
+ 			if (nfs4_has_persistent_session(clp)) {
+ 				*p = cpu_to_be32(NFS4_CREATE_GUARDED);
+ 				encode_attrs(xdr, arg->u.attrs, arg->server);
+@@ -1251,7 +1404,7 @@ static inline void encode_claim_delegate
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	encode_string(xdr, name->len, name->name);
+ }
+ 
+@@ -1282,7 +1435,7 @@ static void encode_open_confirm(struct x
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	hdr->nops++;
+ 	hdr->replen += decode_open_confirm_maxsz;
+@@ -1294,7 +1447,7 @@ static void encode_open_downgrade(struct
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	encode_share_access(xdr, arg->fmode);
+ 	hdr->nops++;
+@@ -1324,17 +1477,17 @@ static void encode_putrootfh(struct xdr_
+ 	hdr->replen += decode_putrootfh_maxsz;
+ }
+ 
+-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
++static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
+ {
+ 	nfs4_stateid stateid;
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, NFS4_STATEID_SIZE);
+ 	if (ctx->state != NULL) {
+-		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+-		xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
++		nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
++		xdr_encode_opaque_fixed(p, stateid.u.data, NFS4_STATEID_SIZE);
+ 	} else
+-		xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++		xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+@@ -1344,7 +1497,7 @@ static void encode_read(struct xdr_strea
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_READ);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 12);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1448,7 +1601,7 @@ encode_setacl(struct xdr_stream *xdr, st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ 	p = reserve_space(xdr, 2*4);
+ 	*p++ = cpu_to_be32(1);
+ 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
+@@ -1479,7 +1632,7 @@ static void encode_setattr(struct xdr_st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setattr_maxsz;
+ 	encode_attrs(xdr, arg->iap, server);
+@@ -1523,7 +1676,7 @@ static void encode_write(struct xdr_stre
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_WRITE);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 16);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1542,7 +1695,7 @@ static void encode_delegreturn(struct xd
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 
+ 	*p++ = cpu_to_be32(OP_DELEGRETURN);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_delegreturn_maxsz;
+ }
+@@ -1696,6 +1849,162 @@ static void encode_sequence(struct xdr_s
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#ifdef CONFIG_NFS_V4_1
++static void
++encode_getdevicelist(struct xdr_stream *xdr,
++		     const struct nfs4_getdevicelist_args *args,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++	nfs4_verifier dummy = {
++		.data = "dummmmmy",
++	};
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_GETDEVICELIST);
++	*p++ = cpu_to_be32(args->layoutclass);
++	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
++	xdr_encode_hyper(p, 0ULL);                          /* cookie */
++	encode_nfs4_verifier(xdr, &dummy);
++	hdr->nops++;
++}
++
++static void
++encode_getdeviceinfo(struct xdr_stream *xdr,
++		     const struct nfs4_getdeviceinfo_args *args,
++		     struct compound_hdr *hdr)
++{
++	int has_bitmap = (args->pdev->dev_notify_types != 0);
++	int len = 16 + NFS4_PNFS_DEVICEID4_SIZE + (has_bitmap * 4);
++	__be32 *p;
++
++	p = reserve_space(xdr, len);
++	*p++ = cpu_to_be32(OP_GETDEVICEINFO);
++	p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
++				    NFS4_PNFS_DEVICEID4_SIZE);
++	*p++ = cpu_to_be32(args->pdev->layout_type);
++	*p++ = cpu_to_be32(args->pdev->pglen + len);	/* gdia_maxcount */
++	*p++ = cpu_to_be32(has_bitmap);			/* bitmap length [01] */
++	if (has_bitmap)
++		*p = cpu_to_be32(args->pdev->dev_notify_types);
++	hdr->nops++;
++}
++
++static void
++encode_layoutget(struct xdr_stream *xdr,
++		      const struct nfs4_layoutget_args *args,
++		      struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTGET);
++	*p++ = cpu_to_be32(0);     /* Signal layout available */
++	*p++ = cpu_to_be32(args->type);
++	*p++ = cpu_to_be32(args->range.iomode);
++	p = xdr_encode_hyper(p, args->range.offset);
++	p = xdr_encode_hyper(p, args->range.length);
++	p = xdr_encode_hyper(p, args->minlength);
++	pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++	p = xdr_encode_opaque_fixed(p, &stateid.u.data, NFS4_STATEID_SIZE);
++	*p = cpu_to_be32(args->maxcount);
++
++	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
++		__func__,
++		args->type,
++		args->range.iomode,
++		(unsigned long)args->range.offset,
++		(unsigned long)args->range.length,
++		args->maxcount);
++	hdr->nops++;
++	hdr->replen += decode_layoutget_maxsz;
++}
++
++static int
++encode_layoutcommit(struct xdr_stream *xdr,
++		    const struct nfs4_layoutcommit_args *args,
++		    struct compound_hdr *hdr)
++{
++	struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++	__be32 *p;
++
++	dprintk("%s: %llu@%llu lbw: %llu type: %d\n", __func__,
++		args->range.length, args->range.offset, args->lastbytewritten,
++		args->layout_type);
++
++	p = reserve_space(xdr, 40 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
++	p = xdr_encode_hyper(p, args->range.offset);
++	p = xdr_encode_hyper(p, args->range.length);
++	*p++ = cpu_to_be32(0);     /* reclaim */
++	p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(1);     /* newoffset = TRUE */
++	p = xdr_encode_hyper(p, args->lastbytewritten);
++	*p = cpu_to_be32(args->time_modify_changed != 0);
++	if (args->time_modify_changed) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(0);
++		*p++ = cpu_to_be32(args->time_modify.tv_sec);
++		*p = cpu_to_be32(args->time_modify.tv_nsec);
++	}
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(args->layout_type);
++
++	if (ld_io_ops->encode_layoutcommit) {
++		ld_io_ops->encode_layoutcommit(NFS_I(args->inode)->layout,
++					       xdr, args);
++	} else {
++		p = reserve_space(xdr, 4);
++		xdr_encode_opaque(p, NULL, 0);
++	}
++
++	hdr->nops++;
++	hdr->replen += decode_layoutcommit_maxsz;
++	return 0;
++}
++
++static void
++encode_layoutreturn(struct xdr_stream *xdr,
++		    const struct nfs4_layoutreturn_args *args,
++		    struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
++	*p++ = cpu_to_be32(args->reclaim);
++	*p++ = cpu_to_be32(args->layout_type);
++	*p++ = cpu_to_be32(args->range.iomode);
++	*p = cpu_to_be32(args->return_type);
++	if (args->return_type == RETURN_FILE) {
++		struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++
++		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
++		p = xdr_encode_hyper(p, args->range.offset);
++		p = xdr_encode_hyper(p, args->range.length);
++		pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++		p = xdr_encode_opaque_fixed(p, &stateid.u.data,
++					    NFS4_STATEID_SIZE);
++		dprintk("%s: call %pF\n", __func__,
++		ld_io_ops->encode_layoutreturn);
++		if (ld_io_ops->encode_layoutreturn) {
++			ld_io_ops->encode_layoutreturn(
++				NFS_I(args->inode)->layout, xdr, args);
++		} else {
++			p = reserve_space(xdr, 4);
++			*p = cpu_to_be32(0);
++		}
++	}
++	hdr->nops++;
++	hdr->replen += decode_layoutreturn_maxsz;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" ENCODE ROUTINES.
+  */
+@@ -1704,7 +2013,7 @@ static u32 nfs4_xdr_minorversion(const s
+ {
+ #if defined(CONFIG_NFS_V4_1)
+ 	if (args->sa_session)
+-		return args->sa_session->clp->cl_minorversion;
++		return args->sa_session->clp->cl_mvops->minor_version;
+ #endif /* CONFIG_NFS_V4_1 */
+ 	return 0;
+ }
+@@ -2048,6 +2357,20 @@ static int nfs4_xdr_enc_locku(struct rpc
+ 	return 0;
+ }
+ 
++static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = 0,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
+ /*
+  * Encode a READLINK request
+  */
+@@ -2330,7 +2653,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 	struct compound_hdr hdr = {
+ 		.nops	= 0,
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2395,7 +2718,7 @@ static int nfs4_xdr_enc_exchange_id(stru
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2413,7 +2736,7 @@ static int nfs4_xdr_enc_create_session(s
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2431,7 +2754,7 @@ static int nfs4_xdr_enc_destroy_session(
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = session->clp->cl_minorversion,
++		.minorversion = session->clp->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2469,7 +2792,7 @@ static int nfs4_xdr_enc_get_lease_time(s
+ 	struct compound_hdr hdr = {
+ 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2499,6 +2822,159 @@ static int nfs4_xdr_enc_reclaim_complete
+ 	return 0;
+ }
+ 
++/*
++ * Encode GETDEVICELIST request
++ */
++static int
++nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, uint32_t *p,
++			   struct nfs4_getdevicelist_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_getdevicelist(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode GETDEVICEINFO request
++ */
++static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
++				      struct nfs4_getdeviceinfo_args *args)
++{
++	struct xdr_stream xdr;
++	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++	int replen;
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_getdeviceinfo(&xdr, args, &hdr);
++
++	/* set up reply kvec. Subtract notification bitmap max size (8)
++	 * so that notification bitmap is put in xdr_buf tail */
++	replen = (RPC_REPHDRSIZE + auth->au_rslack +
++		  NFS4_dec_getdeviceinfo_sz - 8) << 2;
++	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pdev->pages,
++			 args->pdev->pgbase, args->pdev->pglen);
++	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
++		__func__, replen, args->pdev->pages,
++		args->pdev->pgbase, args->pdev->pglen);
++
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTGET request
++ */
++static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
++				  struct nfs4_layoutget_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutget(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTCOMMIT request
++ */
++static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_layoutcommit_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_layoutcommit(&xdr, args, &hdr);
++	encode_getfattr(&xdr, args->bitmask, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode LAYOUTRETURN request
++ */
++static int nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_layoutreturn_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutreturn(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server WRITE request
++ */
++static int nfs4_xdr_enc_dswrite(struct rpc_rqst *req, uint32_t *p,
++				struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_write(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server COMMIT request
++ */
++static int nfs4_xdr_enc_dscommit(struct rpc_rqst *req, uint32_t *p,
++				 struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_commit(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+@@ -2599,14 +3075,17 @@ static int decode_attr_bitmap(struct xdr
+ 		goto out_overflow;
+ 	bmlen = be32_to_cpup(p);
+ 
+-	bitmap[0] = bitmap[1] = 0;
++	bitmap[0] = bitmap[1] = bitmap[2] = 0;
+ 	p = xdr_inline_decode(xdr, (bmlen << 2));
+ 	if (unlikely(!p))
+ 		goto out_overflow;
+ 	if (bmlen > 0) {
+ 		bitmap[0] = be32_to_cpup(p++);
+-		if (bmlen > 1)
+-			bitmap[1] = be32_to_cpup(p);
++		if (bmlen > 1) {
++			bitmap[1] = be32_to_cpup(p++);
++			if (bmlen > 2)
++				bitmap[2] = be32_to_cpup(p);
++		}
+ 	}
+ 	return 0;
+ out_overflow:
+@@ -2635,8 +3114,9 @@ static int decode_attr_supported(struct 
+ 		decode_attr_bitmap(xdr, bitmask);
+ 		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+ 	} else
+-		bitmask[0] = bitmask[1] = 0;
+-	dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
++		bitmask[0] = bitmask[1] = bitmask[2] = 0;
++	dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__,
++		bitmask[0], bitmask[1], bitmask[2]);
+ 	return 0;
+ }
+ 
+@@ -3565,7 +4045,7 @@ static int decode_opaque_fixed(struct xd
+ 
+ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+ {
+-	return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
++	return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+@@ -3621,7 +4101,7 @@ out_overflow:
+ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3647,7 +4127,7 @@ xdr_error:
+ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3679,7 +4159,7 @@ xdr_error:
+ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3705,7 +4185,7 @@ static int decode_getfattr(struct xdr_st
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0},
++		 bitmap[3] = {0},
+ 		 type;
+ 	int status;
+ 	umode_t fmode = 0;
+@@ -3824,24 +4304,101 @@ xdr_error:
+ 	return status;
+ }
+ 
+-
+-static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * Decode potentially multiple layout types. Currently we only support
++ * one layout driver per file system.
++ */
++static int decode_pnfs_list(struct xdr_stream *xdr, uint32_t *layoutclass)
+ {
+-	__be32 *savep;
+-	uint32_t attrlen, bitmap[2];
+-	int status;
++	uint32_t *p;
++	int num;
+ 
+-	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+-		goto xdr_error;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	num = be32_to_cpup(p);
+ 
+-	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++	/* pNFS is not supported by the underlying file system */
++	if (num == 0) {
++		*layoutclass = 0;
++		return 0;
++	}
+ 
+-	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+-		goto xdr_error;
++	/* TODO: We will eventually support multiple layout drivers ? */
++	if (num > 1)
++		printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
++			"per filesystem not supported\n", __func__);
++
++	/* Decode and set first layout type */
++	p = xdr_inline_decode(xdr, num * 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	*layoutclass = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++/*
++ * The type of file system exported
++ */
++static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
++				uint32_t *layoutclass)
++{
++	int status = 0;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
++	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
++		return -EIO;
++	if (likely(bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES)) {
++		status = decode_pnfs_list(xdr, layoutclass);
++		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
++	}
++	return status;
++}
++
++/*
++ * The prefered block size for layout directed io
++ */
++static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
++				      uint32_t *res)
++{
++	__be32 *p;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
++	*res = 0;
++	if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p)) {
++			print_overflow_msg(__func__, xdr);
++			return -EIO;
++		}
++		*res = be32_to_cpup(p);
++		bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
++	}
++	return 0;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
++static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++{
++	__be32 *savep;
++	uint32_t attrlen, bitmap[3];
++	int status;
++
++	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
++		goto xdr_error;
++
++	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++
++	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
++		goto xdr_error;
+ 	if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+ 		goto xdr_error;
+ 	if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+@@ -3850,6 +4407,14 @@ static int decode_fsinfo(struct xdr_stre
+ 	if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+ 		goto xdr_error;
+ 	fsinfo->wtpref = fsinfo->wtmax;
++#if defined(CONFIG_NFS_V4_1)
++	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
++	if (status)
++		goto xdr_error;
++	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
++	if (status)
++		goto xdr_error;
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ 	status = verify_attr_len(xdr, savep, attrlen);
+ xdr_error:
+@@ -3973,6 +4538,11 @@ static int decode_locku(struct xdr_strea
+ 	return status;
+ }
+ 
++static int decode_release_lockowner(struct xdr_stream *xdr)
++{
++	return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
++}
++
+ static int decode_lookup(struct xdr_stream *xdr)
+ {
+ 	return decode_op_hdr(xdr, OP_LOOKUP);
+@@ -4333,7 +4903,7 @@ static int decode_getacl(struct xdr_stre
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0};
++		 bitmap[3] = {0};
+ 	struct kvec *iov = req->rq_rcv_buf.head;
+ 	int status;
+ 
+@@ -4682,6 +5252,226 @@ out_overflow:
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * TODO: Need to handle case when EOF != true;
++ */
++static int decode_getdevicelist(struct xdr_stream *xdr,
++				struct pnfs_devicelist *res)
++{
++	__be32 *p;
++	int status, i;
++	struct nfs_writeverf verftemp;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 8 + 8 + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++
++	/* TODO: Skip cookie for now */
++	p += 2;
++
++	/* Read verifier */
++	p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
++
++	res->num_devs = be32_to_cpup(p);
++
++	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
++
++	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM)
++		return -NFS4ERR_REP_TOO_BIG;
++
++	p = xdr_inline_decode(xdr,
++			      res->num_devs * NFS4_PNFS_DEVICEID4_SIZE + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	for (i = 0; i < res->num_devs; i++)
++		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
++					    NFS4_PNFS_DEVICEID4_SIZE);
++	res->eof = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_getdeviceinfo(struct xdr_stream *xdr,
++				struct pnfs_device *pdev)
++{
++	__be32 *p;
++	uint32_t len, type;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
++	if (status) {
++		if (status == -ETOOSMALL) {
++			p = xdr_inline_decode(xdr, 4);
++			if (unlikely(!p))
++				goto out_overflow;
++			pdev->mincount = be32_to_cpup(p);
++			dprintk("%s: Min count too small. mincnt = %u\n",
++				__func__, pdev->mincount);
++		}
++		return status;
++	}
++
++	p = xdr_inline_decode(xdr, 8);
++	if (unlikely(!p))
++		goto out_overflow;
++	type = be32_to_cpup(p++);
++	if (type != pdev->layout_type) {
++		dprintk("%s: layout mismatch req: %u pdev: %u\n",
++			__func__, pdev->layout_type, type);
++		return -EINVAL;
++	}
++	/*
++	 * Get the length of the opaque device_addr4. xdr_read_pages places
++	 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
++	 * and places the remaining xdr data in xdr_buf->tail
++	 */
++	pdev->mincount = be32_to_cpup(p);
++	xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
++
++	/* At most one bitmap word */
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	len = be32_to_cpup(p);
++	if (len) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p))
++			goto out_overflow;
++		pdev->dev_notify_types = be32_to_cpup(p);
++	} else
++		pdev->dev_notify_types = 0;
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
++			    struct nfs4_layoutget_res *res)
++{
++	__be32 *p;
++	int status;
++	u32 layout_count, dummy;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTGET);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->return_on_close = be32_to_cpup(p++);
++	p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
++	layout_count = be32_to_cpup(p);
++	if (!layout_count) {
++		dprintk("%s: server responded with empty layout array\n",
++			__func__);
++		return -EINVAL;
++	}
++
++	p = xdr_inline_decode(xdr, 24);
++	if (unlikely(!p))
++		goto out_overflow;
++	p = xdr_decode_hyper(p, &res->range.offset);
++	p = xdr_decode_hyper(p, &res->range.length);
++	res->range.iomode = be32_to_cpup(p++);
++	res->type = be32_to_cpup(p++);
++
++	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
++	if (unlikely(status))
++		return status;
++
++	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
++		__func__,
++		(unsigned long)res->range.offset,
++		(unsigned long)res->range.length,
++		res->range.iomode,
++		res->type,
++		res->layout.len);
++
++	/* presuambly, nfs4_proc_layoutget allocated a single page */
++	if (res->layout.len > PAGE_SIZE)
++		return -ENOMEM;
++	memcpy(res->layout.buf, p, res->layout.len);
++
++	/* FIXME: the whole layout array should be passed up to the pnfs
++	 * client */
++	if (layout_count > 1) {
++		dprintk("%s: server responded with %d layouts, dropping tail\n",
++			__func__, layout_count);
++
++		while (--layout_count) {
++			p = xdr_inline_decode(xdr, 24);
++			if (unlikely(!p))
++				goto out_overflow;
++			status = decode_opaque_inline(xdr, &dummy, (char **)&p);
++			if (unlikely(status))
++				return status;
++		}
++	}
++
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutreturn(struct xdr_stream *xdr,
++			       struct nfs4_layoutreturn_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->lrs_present = be32_to_cpup(p);
++	if (res->lrs_present)
++		status = decode_stateid(xdr, &res->stateid);
++	return status;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutcommit(struct xdr_stream *xdr,
++				    struct rpc_rqst *req,
++				    struct nfs4_layoutcommit_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->sizechanged = be32_to_cpup(p);
++
++	if (res->sizechanged) {
++		p = xdr_inline_decode(xdr, 8);
++		if (unlikely(!p))
++			goto out_overflow;
++		xdr_decode_hyper(p, &res->newsize);
++	}
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" DECODE ROUTINES.
+  */
+@@ -5259,6 +6049,19 @@ out:
+ 	return status;
+ }
+ 
++static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (!status)
++		status = decode_release_lockowner(&xdr);
++	return status;
++}
++
+ /*
+  * Decode READLINK response
+  */
+@@ -5696,6 +6499,186 @@ static int nfs4_xdr_dec_reclaim_complete
+ 		status = decode_reclaim_complete(&xdr, (void *)NULL);
+ 	return status;
+ }
++
++/*
++ * Decode GETDEVICELIST response
++ */
++static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_getdevicelist_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	dprintk("encoding getdevicelist!\n");
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status != 0)
++		goto out;
++	status = decode_getdevicelist(&xdr, res->devlist);
++out:
++	return status;
++}
++
++/*
++ * Decode GETDEVINFO response
++ */
++static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_getdeviceinfo_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_getdeviceinfo(&xdr, res->pdev);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTGET response
++ */
++static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
++				  struct nfs4_layoutget_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutget(&xdr, rqstp, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTRETURN response
++ */
++static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_layoutreturn_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutreturn(&xdr, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTCOMMIT response
++ */
++static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_layoutcommit_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutcommit(&xdr, rqstp, res);
++	if (status)
++		goto out;
++	decode_getfattr(&xdr, res->fattr, res->server,
++			!RPC_IS_ASYNC(rqstp->rq_task));
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server WRITE response
++ */
++static int nfs4_xdr_dec_dswrite(struct rpc_rqst *rqstp, uint32_t *p,
++				struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_write(&xdr, res);
++	if (!status)
++		return res->count;
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server COMMIT response
++ */
++static int nfs4_xdr_dec_dscommit(struct rpc_rqst *rqstp, uint32_t *p,
++				 struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_commit(&xdr, res);
++out:
++	return status;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+@@ -5866,6 +6849,7 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(GETACL,		enc_getacl,	dec_getacl),
+   PROC(SETACL,		enc_setacl,	dec_setacl),
+   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
++  PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ #if defined(CONFIG_NFS_V4_1)
+   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+   PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
+@@ -5873,6 +6857,13 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
+   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+   PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
++  PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
++  PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
++  PROC(LAYOUTGET,  enc_layoutget,     dec_layoutget),
++  PROC(LAYOUTCOMMIT, enc_layoutcommit,  dec_layoutcommit),
++  PROC(LAYOUTRETURN, enc_layoutreturn,  dec_layoutreturn),
++  PROC(PNFS_WRITE, enc_dswrite,  dec_dswrite),
++  PROC(PNFS_COMMIT, enc_dscommit,  dec_dscommit),
+ #endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-31 20:42:05.532213157 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-31 20:42:05.532213157 -0400
+@@ -0,0 +1,11 @@
++#
++# Makefile for the pNFS Objects Layout Driver kernel module
++#
++objlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o objio_osd.o
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
++
++#
++# Panasas pNFS Layout Driver kernel module
++#
++panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
++obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-31 20:42:05.533243491 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-31 20:42:05.534105468 -0400
+@@ -0,0 +1,1087 @@
++/*
++ *  objio_osd.c
++ *
++ *  pNFS Objects layout implementation over open-osd initiator library
++ *
++ *  Copyright (C) 2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bharrosh@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <scsi/scsi_device.h>
++#include <scsi/osd_attributes.h>
++#include <scsi/osd_initiator.h>
++#include <scsi/osd_sec.h>
++#include <scsi/osd_sense.h>
++
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++#define _LLU(x) ((unsigned long long)x)
++
++enum { BIO_MAX_PAGES_KMALLOC =
++		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
++};
++
++/* A per mountpoint struct currently for device cache */
++struct objio_mount_type {
++	struct list_head dev_list;
++	spinlock_t dev_list_lock;
++};
++
++struct _dev_ent {
++	struct list_head list;
++	struct pnfs_deviceid d_id;
++	struct osd_dev *od;
++};
++
++static void _dev_list_remove_all(struct objio_mount_type *omt)
++{
++	spin_lock(&omt->dev_list_lock);
++
++	while (!list_empty(&omt->dev_list)) {
++		struct _dev_ent *de = list_entry(omt->dev_list.next,
++				 struct _dev_ent, list);
++
++		list_del_init(&de->list);
++		osduld_put_device(de->od);
++		kfree(de);
++	}
++
++	spin_unlock(&omt->dev_list_lock);
++}
++
++static struct osd_dev *___dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct list_head *le;
++
++	list_for_each(le, &omt->dev_list) {
++		struct _dev_ent *de = list_entry(le, struct _dev_ent, list);
++
++		if (0 == memcmp(&de->d_id, d_id, sizeof(*d_id)))
++			return de->od;
++	}
++
++	return NULL;
++}
++
++static struct osd_dev *_dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct osd_dev *od;
++
++	spin_lock(&omt->dev_list_lock);
++	od = ___dev_list_find(omt, d_id);
++	spin_unlock(&omt->dev_list_lock);
++	return od;
++}
++
++static int _dev_list_add(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id, struct osd_dev *od)
++{
++	struct _dev_ent *de = kzalloc(sizeof(*de), GFP_KERNEL);
++
++	if (!de)
++		return -ENOMEM;
++
++	spin_lock(&omt->dev_list_lock);
++
++	if (___dev_list_find(omt, d_id)) {
++		kfree(de);
++		goto out;
++	}
++
++	de->d_id = *d_id;
++	de->od = od;
++	list_add(&de->list, &omt->dev_list);
++
++out:
++	spin_unlock(&omt->dev_list_lock);
++	return 0;
++}
++
++struct objio_segment {
++	struct pnfs_osd_layout *layout;
++
++	unsigned mirrors_p1;
++	unsigned stripe_unit;
++	unsigned group_width;	/* Data stripe_units without integrity comps */
++	u64 group_depth;
++	unsigned group_count;
++
++	unsigned num_comps;
++	/* variable length */
++	struct osd_dev	*ods[1];
++};
++
++struct objio_state;
++typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
++
++struct objio_state {
++	/* Generic layer */
++	struct objlayout_io_state ol_state;
++
++	struct objio_segment *objio_seg;
++
++	struct kref kref;
++	objio_done_fn done;
++	void *private;
++
++	unsigned long length;
++	unsigned numdevs; /* Actually used devs in this IO */
++	/* A per-device variable array of size numdevs */
++	struct _objio_per_comp {
++		struct bio *bio;
++		struct osd_request *or;
++		unsigned long length;
++		u64 offset;
++		unsigned dev;
++	} per_dev[];
++};
++
++/* Send and wait for a get_device_info of devices in the layout,
++   then look them up with the osd_initiator library */
++static struct osd_dev *_device_lookup(struct pnfs_layout_hdr *pnfslay,
++			       struct objio_segment *objio_seg, unsigned comp)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	struct pnfs_osd_deviceaddr *deviceaddr;
++	struct pnfs_deviceid *d_id;
++	struct osd_dev *od;
++	struct osd_dev_info odi;
++	struct objio_mount_type *omt = PNFS_NFS_SERVER(pnfslay)->pnfs_ld_data;
++	int err;
++
++	d_id = &layout->olo_comps[comp].oc_object_id.oid_device_id;
++
++	od = _dev_list_find(omt, d_id);
++	if (od)
++		return od;
++
++	err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr);
++	if (unlikely(err)) {
++		dprintk("%s: objlayout_get_deviceinfo=>%d\n", __func__, err);
++		return ERR_PTR(err);
++	}
++
++	odi.systemid_len = deviceaddr->oda_systemid.len;
++	if (odi.systemid_len > sizeof(odi.systemid)) {
++		err = -EINVAL;
++		goto out;
++	} else if (odi.systemid_len)
++		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
++		       odi.systemid_len);
++	odi.osdname_len	 = deviceaddr->oda_osdname.len;
++	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
++
++	if (!odi.osdname_len && !odi.systemid_len) {
++		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
++			__func__);
++		err = -ENODEV;
++		goto out;
++	}
++
++	od = osduld_info_lookup(&odi);
++	if (unlikely(IS_ERR(od))) {
++		err = PTR_ERR(od);
++		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
++		goto out;
++	}
++
++	_dev_list_add(omt, d_id, od);
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	objlayout_put_deviceinfo(deviceaddr);
++	return err ? ERR_PTR(err) : od;
++}
++
++static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
++	struct objio_segment *objio_seg)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	unsigned i, num_comps = layout->olo_num_comps;
++	int err;
++
++	/* lookup all devices */
++	for (i = 0; i < num_comps; i++) {
++		struct osd_dev *od;
++
++		od = _device_lookup(pnfslay, objio_seg, i);
++		if (unlikely(IS_ERR(od))) {
++			err = PTR_ERR(od);
++			goto out;
++		}
++		objio_seg->ods[i] = od;
++	}
++	objio_seg->num_comps = num_comps;
++	err = 0;
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	return err;
++}
++
++static int _verify_data_map(struct pnfs_osd_layout *layout)
++{
++	struct pnfs_osd_data_map *data_map = &layout->olo_map;
++	u64 stripe_length;
++	u32 group_width;
++
++/* FIXME: Only raid0 for now. if not go through MDS */
++	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
++		printk(KERN_ERR "Only RAID_0 for now\n");
++		return -ENOTSUPP;
++	}
++	if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
++		printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
++			  data_map->odm_num_comps, data_map->odm_mirror_cnt);
++		return -EINVAL;
++	}
++
++	if (data_map->odm_group_width)
++		group_width = data_map->odm_group_width;
++	else
++		group_width = data_map->odm_num_comps /
++						(data_map->odm_mirror_cnt + 1);
++
++	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
++	if (stripe_length >= (1ULL << 32)) {
++		printk(KERN_ERR "Total Stripe length(0x%llx)"
++			  " >= 32bit is not supported\n", _LLU(stripe_length));
++		return -ENOTSUPP;
++	}
++
++	if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
++		printk(KERN_ERR "Stripe Unit(0x%llx)"
++			  " must be Multples of PAGE_SIZE(0x%lx)\n",
++			  _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
++		return -ENOTSUPP;
++	}
++
++	return 0;
++}
++
++int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	struct objio_segment *objio_seg;
++	int err;
++
++	err = _verify_data_map(layout);
++	if (unlikely(err))
++		return err;
++
++	objio_seg = kzalloc(sizeof(*objio_seg) +
++			(layout->olo_num_comps - 1) * sizeof(objio_seg->ods[0]),
++			GFP_KERNEL);
++	if (!objio_seg)
++		return -ENOMEM;
++
++	objio_seg->layout = layout;
++	err = objio_devices_lookup(pnfslay, objio_seg);
++	if (err)
++		goto free_seg;
++
++	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
++	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
++	if (layout->olo_map.odm_group_width) {
++		objio_seg->group_width = layout->olo_map.odm_group_width;
++		objio_seg->group_depth = layout->olo_map.odm_group_depth;
++		objio_seg->group_count = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1 /
++						objio_seg->group_width;
++	} else {
++		objio_seg->group_width = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1;
++		objio_seg->group_depth = -1;
++		objio_seg->group_count = 1;
++	}
++
++	*outp = objio_seg;
++	return 0;
++
++free_seg:
++	dprintk("%s: Error: return %d\n", __func__, err);
++	kfree(objio_seg);
++	*outp = NULL;
++	return err;
++}
++
++void objio_free_lseg(void *p)
++{
++	struct objio_segment *objio_seg = p;
++
++	kfree(objio_seg);
++}
++
++int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct objio_segment *objio_seg = seg;
++	struct objio_state *ios;
++	const unsigned first_size = sizeof(*ios) +
++				objio_seg->num_comps * sizeof(ios->per_dev[0]);
++	const unsigned sec_size = objio_seg->num_comps *
++						sizeof(ios->ol_state.ioerrs[0]);
++
++	dprintk("%s: num_comps=%d\n", __func__, objio_seg->num_comps);
++	ios = kzalloc(first_size + sec_size, GFP_KERNEL);
++	if (unlikely(!ios))
++		return -ENOMEM;
++
++	ios->objio_seg = objio_seg;
++	ios->ol_state.ioerrs = ((void *)ios) + first_size;
++	ios->ol_state.num_comps = objio_seg->num_comps;
++
++	*outp = &ios->ol_state;
++	return 0;
++}
++
++void objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++
++	kfree(ios);
++}
++
++enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
++{
++	switch (oep) {
++	case OSD_ERR_PRI_NO_ERROR:
++		return (enum pnfs_osd_errno)0;
++
++	case OSD_ERR_PRI_CLEAR_PAGES:
++		BUG_ON(1);
++		return 0;
++
++	case OSD_ERR_PRI_RESOURCE:
++		return PNFS_OSD_ERR_RESOURCE;
++	case OSD_ERR_PRI_BAD_CRED:
++		return PNFS_OSD_ERR_BAD_CRED;
++	case OSD_ERR_PRI_NO_ACCESS:
++		return PNFS_OSD_ERR_NO_ACCESS;
++	case OSD_ERR_PRI_UNREACHABLE:
++		return PNFS_OSD_ERR_UNREACHABLE;
++	case OSD_ERR_PRI_NOT_FOUND:
++		return PNFS_OSD_ERR_NOT_FOUND;
++	case OSD_ERR_PRI_NO_SPACE:
++		return PNFS_OSD_ERR_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case OSD_ERR_PRI_EIO:
++		return PNFS_OSD_ERR_EIO;
++	}
++}
++
++static void _clear_bio(struct bio *bio)
++{
++	struct bio_vec *bv;
++	unsigned i;
++
++	__bio_for_each_segment(bv, bio, i, 0) {
++		unsigned this_count = bv->bv_len;
++
++		if (likely(PAGE_SIZE == this_count))
++			clear_highpage(bv->bv_page);
++		else
++			zero_user(bv->bv_page, bv->bv_offset, this_count);
++	}
++}
++
++static int _io_check(struct objio_state *ios, bool is_write)
++{
++	enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
++	int lin_ret = 0;
++	int i;
++
++	for (i = 0; i <  ios->numdevs; i++) {
++		struct osd_sense_info osi;
++		struct osd_request *or = ios->per_dev[i].or;
++		int ret;
++
++		if (!or)
++			continue;
++
++		ret = osd_req_decode_sense(or, &osi);
++		if (likely(!ret))
++			continue;
++
++		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
++			/* start read offset passed endof file */
++			BUG_ON(is_write);
++			_clear_bio(ios->per_dev[i].bio);
++			dprintk("%s: start read offset passed end of file "
++				"offset=0x%llx, length=0x%lx\n", __func__,
++				_LLU(ios->per_dev[i].offset),
++				ios->per_dev[i].length);
++
++			continue; /* we recovered */
++		}
++		objlayout_io_set_result(&ios->ol_state, ios->per_dev[i].dev,
++					osd_pri_2_pnfs_err(osi.osd_err_pri),
++					ios->per_dev[i].offset,
++					ios->per_dev[i].length,
++					is_write);
++
++		if (osi.osd_err_pri >= oep) {
++			oep = osi.osd_err_pri;
++			lin_ret = ret;
++		}
++	}
++
++	return lin_ret;
++}
++
++/*
++ * Common IO state helpers.
++ */
++static void _io_free(struct objio_state *ios)
++{
++	unsigned i;
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[i];
++
++		if (per_dev->or) {
++			osd_end_request(per_dev->or);
++			per_dev->or = NULL;
++		}
++
++		if (per_dev->bio) {
++			bio_put(per_dev->bio);
++			per_dev->bio = NULL;
++		}
++	}
++}
++
++struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
++{
++	unsigned min_dev = ios->objio_seg->layout->olo_comps_index;
++	unsigned max_dev = min_dev + ios->ol_state.num_comps;
++
++	BUG_ON(dev < min_dev || max_dev <= dev);
++	return ios->objio_seg->ods[dev - min_dev];
++}
++
++struct _striping_info {
++	u64 obj_offset;
++	u64 group_length;
++	u64 total_group_length;
++	u64 Major;
++	unsigned dev;
++	unsigned unit_off;
++};
++
++static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
++			      struct _striping_info *si)
++{
++	u32	stripe_unit = ios->objio_seg->stripe_unit;
++	u32	group_width = ios->objio_seg->group_width;
++	u64	group_depth = ios->objio_seg->group_depth;
++	u32	U = stripe_unit * group_width;
++
++	u64	T = U * group_depth;
++	u64	S = T * ios->objio_seg->group_count;
++	u64	M = div64_u64(file_offset, S);
++
++	/*
++	G = (L - (M * S)) / T
++	H = (L - (M * S)) % T
++	*/
++	u64	LmodU = file_offset - M * S;
++	u32	G = div64_u64(LmodU, T);
++	u64	H = LmodU - G * T;
++
++	u32	N = div_u64(H, U);
++
++	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
++	si->obj_offset = si->unit_off + (N * stripe_unit) +
++				  (M * group_depth * stripe_unit);
++
++	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
++	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
++	si->dev *= ios->objio_seg->mirrors_p1;
++
++	si->group_length = T - H;
++	si->total_group_length = T;
++	si->Major = M;
++}
++
++static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
++		unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len)
++{
++	unsigned pg = *cur_pg;
++	struct request_queue *q =
++			osd_request_queue(_io_od(ios, per_dev->dev));
++
++	per_dev->length += cur_len;
++
++	if (per_dev->bio == NULL) {
++		unsigned stripes = ios->ol_state.num_comps /
++						     ios->objio_seg->mirrors_p1;
++		unsigned pages_in_stripe = stripes *
++				      (ios->objio_seg->stripe_unit / PAGE_SIZE);
++		unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
++				    stripes;
++
++		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
++		if (unlikely(!per_dev->bio)) {
++			dprintk("Faild to allocate BIO size=%u\n", bio_size);
++			return -ENOMEM;
++		}
++	}
++
++	while (cur_len > 0) {
++		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
++		unsigned added_len;
++
++		BUG_ON(ios->ol_state.nr_pages <= pg);
++		cur_len -= pglen;
++
++		added_len = bio_add_pc_page(q, per_dev->bio,
++					ios->ol_state.pages[pg], pglen, pgbase);
++		if (unlikely(pglen != added_len))
++			return -ENOMEM;
++		pgbase = 0;
++		++pg;
++	}
++	BUG_ON(cur_len);
++
++	*cur_pg = pg;
++	return 0;
++}
++
++static int _prepare_one_group(struct objio_state *ios, u64 length,
++			      struct _striping_info *si, unsigned first_comp,
++			      unsigned *last_pg)
++{
++	unsigned stripe_unit = ios->objio_seg->stripe_unit;
++	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
++	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
++	unsigned dev = si->dev;
++	unsigned first_dev = dev - (dev % devs_in_group);
++	unsigned comp = first_comp + (dev - first_dev);
++	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
++	unsigned cur_pg = *last_pg;
++	int ret = 0;
++
++	while (length) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[comp];
++		unsigned cur_len, page_off = 0;
++
++		if (!per_dev->length) {
++			per_dev->dev = dev;
++			if (dev < si->dev) {
++				per_dev->offset = si->obj_offset + stripe_unit -
++								   si->unit_off;
++				cur_len = stripe_unit;
++			} else if (dev == si->dev) {
++				per_dev->offset = si->obj_offset;
++				cur_len = stripe_unit - si->unit_off;
++				page_off = si->unit_off & ~PAGE_MASK;
++				BUG_ON(page_off &&
++				      (page_off != ios->ol_state.pgbase));
++			} else { /* dev > si->dev */
++				per_dev->offset = si->obj_offset - si->unit_off;
++				cur_len = stripe_unit;
++			}
++
++			if (max_comp < comp)
++				max_comp = comp;
++
++			dev += mirrors_p1;
++			dev = (dev % devs_in_group) + first_dev;
++		} else {
++			cur_len = stripe_unit;
++		}
++		if (cur_len >= length)
++			cur_len = length;
++
++		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
++				       cur_len);
++		if (unlikely(ret))
++			goto out;
++
++		comp += mirrors_p1;
++		comp = (comp % devs_in_group) + first_comp;
++
++		length -= cur_len;
++		ios->length += cur_len;
++	}
++out:
++	ios->numdevs = max_comp + mirrors_p1;
++	*last_pg = cur_pg;
++	return ret;
++}
++
++static int _io_rw_pagelist(struct objio_state *ios)
++{
++	u64 length = ios->ol_state.count;
++	struct _striping_info si;
++	unsigned devs_in_group = ios->objio_seg->group_width *
++				 ios->objio_seg->mirrors_p1;
++	unsigned first_comp = 0;
++	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
++	unsigned last_pg = 0;
++	int ret = 0;
++
++	_calc_stripe_info(ios, ios->ol_state.offset, &si);
++	while (length) {
++		if (length < si.group_length)
++			si.group_length = length;
++
++		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
++					 &last_pg);
++		if (unlikely(ret))
++			goto out;
++
++		length -= si.group_length;
++
++		si.group_length = si.total_group_length;
++		si.unit_off = 0;
++		++si.Major;
++		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
++						ios->objio_seg->group_depth;
++
++		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
++		si.dev %= num_comps;
++
++		first_comp += devs_in_group;
++		first_comp %= num_comps;
++	}
++
++out:
++	if (!ios->length)
++		return ret;
++
++	return 0;
++}
++
++static ssize_t _sync_done(struct objio_state *ios)
++{
++	struct completion *waiting = ios->private;
++
++	complete(waiting);
++	return 0;
++}
++
++static void _last_io(struct kref *kref)
++{
++	struct objio_state *ios = container_of(kref, struct objio_state, kref);
++
++	ios->done(ios);
++}
++
++static void _done_io(struct osd_request *or, void *p)
++{
++	struct objio_state *ios = p;
++
++	kref_put(&ios->kref, _last_io);
++}
++
++static ssize_t _io_exec(struct objio_state *ios)
++{
++	DECLARE_COMPLETION_ONSTACK(wait);
++	ssize_t status = 0; /* sync status */
++	unsigned i;
++	objio_done_fn saved_done_fn = ios->done;
++	bool sync = ios->ol_state.sync;
++
++	if (sync) {
++		ios->done = _sync_done;
++		ios->private = &wait;
++	}
++
++	kref_init(&ios->kref);
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct osd_request *or = ios->per_dev[i].or;
++
++		if (!or)
++			continue;
++
++		kref_get(&ios->kref);
++		osd_execute_request_async(or, _done_io, ios);
++	}
++
++	kref_put(&ios->kref, _last_io);
++
++	if (sync) {
++		wait_for_completion(&wait);
++		status = saved_done_fn(ios);
++	}
++
++	return status;
++}
++
++/*
++ * read
++ */
++static ssize_t _read_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, false);
++
++	_io_free(ios);
++
++	if (likely(!ret))
++		status = ios->length;
++	else
++		status = ret;
++
++	objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct osd_request *or = NULL;
++	struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++	unsigned dev = per_dev->dev;
++	struct pnfs_osd_object_cred *cred =
++			&ios->objio_seg->layout->olo_comps[dev];
++	struct osd_obj_id obj = {
++		.partition = cred->oc_object_id.oid_partition_id,
++		.id = cred->oc_object_id.oid_object_id,
++	};
++	int ret;
++
++	or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++	if (unlikely(!or)) {
++		ret = -ENOMEM;
++		goto err;
++	}
++	per_dev->or = or;
++
++	osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
++
++	ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++	if (ret) {
++		dprintk("%s: Faild to osd_finalize_request() => %d\n",
++			__func__, ret);
++		goto err;
++	}
++
++	dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++		__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++		per_dev->length);
++
++err:
++	return ret;
++}
++
++static ssize_t _read_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _read_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _read_done;
++	return _io_exec(ios); /* In sync mode exec returns the io status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _read_exec(ios);
++}
++
++/*
++ * write
++ */
++static ssize_t _write_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, true);
++
++	_io_free(ios);
++
++	if (likely(!ret)) {
++		/* FIXME: should be based on the OSD's persistence model
++		 * See OSD2r05 Section 4.13 Data persistence model */
++		ios->ol_state.committed = NFS_UNSTABLE; //NFS_FILE_SYNC;
++		status = ios->length;
++	} else {
++		status = ret;
++	}
++
++	objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
++	unsigned dev = ios->per_dev[cur_comp].dev;
++	unsigned last_comp = cur_comp + ios->objio_seg->mirrors_p1;
++	int ret;
++
++	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
++		struct osd_request *or = NULL;
++		struct pnfs_osd_object_cred *cred =
++					&ios->objio_seg->layout->olo_comps[dev];
++		struct osd_obj_id obj = {
++			.partition = cred->oc_object_id.oid_partition_id,
++			.id = cred->oc_object_id.oid_object_id,
++		};
++		struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++		struct bio *bio;
++
++		or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++		if (unlikely(!or)) {
++			ret = -ENOMEM;
++			goto err;
++		}
++		per_dev->or = or;
++
++		if (per_dev != master_dev) {
++			bio = bio_kmalloc(GFP_KERNEL,
++					  master_dev->bio->bi_max_vecs);
++			if (unlikely(!bio)) {
++				dprintk("Faild to allocate BIO size=%u\n",
++					master_dev->bio->bi_max_vecs);
++				ret = -ENOMEM;
++				goto err;
++			}
++
++			__bio_clone(bio, master_dev->bio);
++			bio->bi_bdev = NULL;
++			bio->bi_next = NULL;
++			per_dev->bio = bio;
++			per_dev->dev = dev;
++			per_dev->length = master_dev->length;
++			per_dev->offset =  master_dev->offset;
++		} else {
++			bio = master_dev->bio;
++			/* FIXME: bio_set_dir() */
++			bio->bi_rw |= (1 << BIO_RW);
++		}
++
++		osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
++
++		ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++		if (ret) {
++			dprintk("%s: Faild to osd_finalize_request() => %d\n",
++				__func__, ret);
++			goto err;
++		}
++
++		dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++			__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++			per_dev->length);
++	}
++
++err:
++	return ret;
++}
++
++static ssize_t _write_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _write_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _write_done;
++	return _io_exec(ios); /* In sync mode exec returns the io->status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	/* TODO: ios->stable = stable; */
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _write_exec(ios);
++}
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++objlayout_get_stripesize(struct pnfs_layout_hdr *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zx\n", __func__, maxsz);
++	return maxsz;
++}
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++objlayout_get_blocksize(void)
++{
++	ssize_t sz = BIO_MAX_PAGES_KMALLOC * PAGE_SIZE;
++
++	return sz;
++}
++
++static struct layoutdriver_policy_operations objlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = objlayout_get_stripesize,
++	.get_blocksize         = objlayout_get_blocksize,
++};
++
++static struct pnfs_layoutdriver_type objlayout_type = {
++	.id = LAYOUT_OSD2_OBJECTS,
++	.name = "LAYOUT_OSD2_OBJECTS",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &objlayout_policy_operations,
++};
++
++void *objio_init_mt(void)
++{
++	struct objio_mount_type *omt = kzalloc(sizeof(*omt), GFP_KERNEL);
++
++	if (!omt)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&omt->dev_list);
++	spin_lock_init(&omt->dev_list_lock);
++	return omt;
++}
++
++void objio_fini_mt(void *mountid)
++{
++	_dev_list_remove_all(mountid);
++	kfree(mountid);
++}
++
++MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++objlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++objlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(objlayout_init);
++module_exit(objlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-31 20:42:05.535059115 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-31 20:42:05.535059115 -0400
+@@ -0,0 +1,790 @@
++/*
++ *  objlayout.c
++ *
++ *  pNFS layout driver for Panasas OSDs
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <scsi/osd_initiator.h>
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct pnfs_client_operations *pnfs_client_ops;
++
++/*
++ * Create a objlayout layout structure for the given inode and return it.
++ */
++static struct pnfs_layout_hdr *
++objlayout_alloc_layout(struct inode *inode)
++{
++	struct objlayout *objlay;
++
++	objlay = kzalloc(sizeof(struct objlayout), GFP_KERNEL);
++	if (objlay) {
++		spin_lock_init(&objlay->lock);
++		INIT_LIST_HEAD(&objlay->err_list);
++	}
++	dprintk("%s: Return %p\n", __func__, objlay);
++	return &objlay->pnfs_layout;
++}
++
++/*
++ * Free an objlayout layout structure
++ */
++static void
++objlayout_free_layout(struct pnfs_layout_hdr *lo)
++{
++	struct objlayout *objlay = OBJLAYOUT(lo);
++
++	dprintk("%s: objlay %p\n", __func__, objlay);
++
++	WARN_ON(!list_empty(&objlay->err_list));
++	kfree(objlay);
++}
++
++/*
++ * Unmarshall layout and store it in pnfslay.
++ */
++static struct pnfs_layout_segment *
++objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
++		     struct nfs4_layoutget_res *lgr)
++{
++	int status;
++	void *layout = lgr->layout.buf;
++	struct pnfs_layout_segment *lseg;
++	struct objlayout_segment *objlseg;
++	struct pnfs_osd_layout *pnfs_osd_layout;
++
++	dprintk("%s: Begin pnfslay %p layout %p\n", __func__, pnfslay, layout);
++
++	BUG_ON(!layout);
++
++	status = -ENOMEM;
++	lseg = kzalloc(sizeof(*lseg) + sizeof(*objlseg) +
++		       pnfs_osd_layout_incore_sz(layout), GFP_KERNEL);
++	if (!lseg)
++		goto err;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	pnfs_osd_layout = (struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++	pnfs_osd_xdr_decode_layout(pnfs_osd_layout, layout);
++
++	status = objio_alloc_lseg(&objlseg->internal, pnfslay, lseg,
++				  pnfs_osd_layout);
++	if (status)
++		goto err;
++
++	dprintk("%s: Return %p\n", __func__, lseg);
++	return lseg;
++
++ err:
++	kfree(lseg);
++	return ERR_PTR(status);
++}
++
++/*
++ * Free a layout segement
++ */
++static void
++objlayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	struct objlayout_segment *objlseg;
++
++	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
++
++	if (unlikely(!lseg))
++		return;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	objio_free_lseg(objlseg->internal);
++	kfree(lseg);
++}
++
++/*
++ * I/O Operations
++ */
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
++static struct objlayout_io_state *
++objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
++			struct page **pages,
++			unsigned pgbase,
++			unsigned nr_pages,
++			loff_t offset,
++			size_t count,
++			struct pnfs_layout_segment *lseg,
++			void *rpcdata)
++{
++	struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++	struct objlayout_io_state *state;
++	u64 lseg_end_offset;
++	size_t size_nr_pages;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	if (objio_alloc_io_state(objlseg->internal, &state))
++		return NULL;
++
++	BUG_ON(offset < lseg->range.offset);
++	lseg_end_offset = end_offset(lseg->range.offset, lseg->range.length);
++	BUG_ON(offset >= lseg_end_offset);
++	if (offset + count > lseg_end_offset) {
++		count = lseg->range.length - (offset - lseg->range.offset);
++		dprintk("%s: truncated count %Zd\n", __func__, count);
++	}
++
++	if (pgbase > PAGE_SIZE) {
++		unsigned n = pgbase >> PAGE_SHIFT;
++
++		pgbase &= ~PAGE_MASK;
++		pages += n;
++		nr_pages -= n;
++	}
++
++	size_nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	BUG_ON(nr_pages < size_nr_pages);
++	if (nr_pages > size_nr_pages)
++		nr_pages = size_nr_pages;
++
++	INIT_LIST_HEAD(&state->err_list);
++	state->lseg = lseg;
++	state->rpcdata = rpcdata;
++	state->pages = pages;
++	state->pgbase = pgbase;
++	state->nr_pages = nr_pages;
++	state->offset = offset;
++	state->count = count;
++	state->sync = 0;
++
++	return state;
++}
++
++static void
++objlayout_free_io_state(struct objlayout_io_state *state)
++{
++	dprintk("%s: freeing io_state\n", __func__);
++	if (unlikely(!state))
++		return;
++
++	objio_free_io_state(state);
++}
++
++/*
++ * I/O done common code
++ */
++static void
++objlayout_iodone(struct objlayout_io_state *state)
++{
++	dprintk("%s: state %p status\n", __func__, state);
++
++	if (likely(state->status >= 0)) {
++		objlayout_free_io_state(state);
++	} else {
++		struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++		spin_lock(&objlay->lock);
++		objlay->delta_space_valid = OBJ_DSU_INVALID;
++		list_add(&objlay->err_list, &state->err_list);
++		spin_unlock(&objlay->lock);
++	}
++}
++
++/*
++ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
++ *
++ * The @index component IO failed (error returned from target). Register
++ * the error for later reporting at layout-return.
++ */
++void
++objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
++			int osd_error, u64 offset, u64 length, bool is_write)
++{
++	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
++
++	BUG_ON(index >= state->num_comps);
++	if (osd_error) {
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(state->lseg);
++		struct pnfs_osd_layout *layout =
++				(typeof(layout))objlseg->pnfs_osd_layout;
++
++		ioerr->oer_component = layout->olo_comps[index].oc_object_id;
++		ioerr->oer_comp_offset = offset;
++		ioerr->oer_comp_length = length;
++		ioerr->oer_iswrite = is_write;
++		ioerr->oer_errno = osd_error;
++
++		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
++			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
++			__func__, index, ioerr->oer_errno,
++			ioerr->oer_iswrite,
++			_DEVID_LO(&ioerr->oer_component.oid_device_id),
++			_DEVID_HI(&ioerr->oer_component.oid_device_id),
++			ioerr->oer_component.oid_partition_id,
++			ioerr->oer_component.oid_object_id,
++			ioerr->oer_comp_offset,
++			ioerr->oer_comp_length);
++	} else {
++		/* User need not call if no error is reported */
++		ioerr->oer_errno = 0;
++	}
++}
++
++static void _rpc_commit_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_commit_complete(wdata);
++}
++
++/*
++ * Commit data remotely on OSDs
++ */
++enum pnfs_try_status
++objlayout_commit(struct nfs_write_data *wdata, int how)
++{
++	int status = PNFS_ATTEMPTED;
++
++	INIT_WORK(&wdata->task.u.tk_work, _rpc_commit_complete);
++	schedule_work(&wdata->task.u.tk_work);
++	dprintk("%s: Return %d\n", __func__, status);
++	return status;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_read_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++
++	pnfs_client_ops->nfs_readlist_complete(rdata);
++}
++
++void
++objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
++{
++	int eof = state->eof;
++	struct nfs_read_data *rdata;
++
++	state->status = status;
++	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
++	rdata = state->rpcdata;
++	rdata->task.tk_status = status;
++	if (status >= 0) {
++		rdata->res.count = status;
++		rdata->res.eof = eof;
++	}
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_readlist_complete(rdata);
++	else {
++		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
++		schedule_work(&rdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async reads.
++ */
++enum pnfs_try_status
++objlayout_read_pagelist(struct nfs_read_data *rdata, unsigned nr_pages)
++{
++	loff_t offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct objlayout_io_state *state;
++	ssize_t status = 0;
++	loff_t eof;
++
++	dprintk("%s: Begin inode %p offset %llu count %d\n",
++		__func__, rdata->inode, offset, (int)count);
++
++	eof = i_size_read(rdata->inode);
++	if (unlikely(offset + count > eof)) {
++		if (offset >= eof) {
++			status = 0;
++			rdata->res.count = 0;
++			rdata->res.eof = 1;
++			goto out;
++		}
++		count = eof - offset;
++	}
++
++	state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
++					 rdata->args.pages, rdata->args.pgbase,
++					 nr_pages, offset, count,
++					 rdata->pdata.lseg, rdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->eof = state->offset + state->count >= eof;
++
++	status = objio_read_pagelist(state);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	rdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_write_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_writelist_complete(wdata);
++}
++
++void
++objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
++		     bool sync)
++{
++	struct nfs_write_data *wdata;
++
++	dprintk("%s: Begin\n", __func__);
++	wdata = state->rpcdata;
++	state->status = status;
++	wdata->task.tk_status = status;
++	if (status >= 0) {
++		wdata->res.count = status;
++		wdata->verf.committed = state->committed;
++		dprintk("%s: Return status %d committed %d\n",
++			__func__, wdata->task.tk_status,
++			wdata->verf.committed);
++	} else
++		dprintk("%s: Return status %d\n",
++			__func__, wdata->task.tk_status);
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_writelist_complete(wdata);
++	else {
++		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
++		schedule_work(&wdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async writes.
++ */
++enum pnfs_try_status
++objlayout_write_pagelist(struct nfs_write_data *wdata,
++			 unsigned nr_pages,
++			 int how)
++{
++	struct objlayout_io_state *state;
++	ssize_t status;
++
++	dprintk("%s: Begin inode %p offset %llu count %u\n",
++		__func__, wdata->inode, wdata->args.offset, wdata->args.count);
++
++	state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
++					 wdata->args.pages,
++					 wdata->args.pgbase,
++					 nr_pages,
++					 wdata->args.offset,
++					 wdata->args.count,
++					 wdata->pdata.lseg, wdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->sync = how & FLUSH_SYNC;
++
++	status = objio_write_pagelist(state, how & FLUSH_STABLE);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	wdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++void
++objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_layoutcommit_args *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct pnfs_osd_layoutupdate lou;
++	__be32 *start;
++
++	dprintk("%s: Begin\n", __func__);
++
++	spin_lock(&objlay->lock);
++	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
++	lou.dsu_delta = objlay->delta_space_used;
++	objlay->delta_space_used = 0;
++	objlay->delta_space_valid = OBJ_DSU_INIT;
++	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
++	spin_unlock(&objlay->lock);
++
++	start = xdr_reserve_space(xdr, 4);
++
++	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++
++	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
++		lou.dsu_delta, lou.olu_ioerr_flag);
++}
++
++static int
++err_prio(u32 oer_errno)
++{
++	switch (oer_errno) {
++	case 0:
++		return 0;
++
++	case PNFS_OSD_ERR_RESOURCE:
++		return OSD_ERR_PRI_RESOURCE;
++	case PNFS_OSD_ERR_BAD_CRED:
++		return OSD_ERR_PRI_BAD_CRED;
++	case PNFS_OSD_ERR_NO_ACCESS:
++		return OSD_ERR_PRI_NO_ACCESS;
++	case PNFS_OSD_ERR_UNREACHABLE:
++		return OSD_ERR_PRI_UNREACHABLE;
++	case PNFS_OSD_ERR_NOT_FOUND:
++		return OSD_ERR_PRI_NOT_FOUND;
++	case PNFS_OSD_ERR_NO_SPACE:
++		return OSD_ERR_PRI_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case PNFS_OSD_ERR_EIO:
++		return OSD_ERR_PRI_EIO;
++	}
++}
++
++static void
++merge_ioerr(struct pnfs_osd_ioerr *dest_err,
++	    const struct pnfs_osd_ioerr *src_err)
++{
++	u64 dest_end, src_end;
++
++	if (!dest_err->oer_errno) {
++		*dest_err = *src_err;
++		/* accumulated device must be blank */
++		memset(&dest_err->oer_component.oid_device_id, 0,
++			sizeof(dest_err->oer_component.oid_device_id));
++
++		return;
++	}
++
++	if (dest_err->oer_component.oid_partition_id !=
++				src_err->oer_component.oid_partition_id)
++		dest_err->oer_component.oid_partition_id = 0;
++
++	if (dest_err->oer_component.oid_object_id !=
++				src_err->oer_component.oid_object_id)
++		dest_err->oer_component.oid_object_id = 0;
++
++	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
++		dest_err->oer_comp_offset = src_err->oer_comp_offset;
++
++	dest_end = end_offset(dest_err->oer_comp_offset,
++			      dest_err->oer_comp_length);
++	src_end =  end_offset(src_err->oer_comp_offset,
++			      src_err->oer_comp_length);
++	if (dest_end < src_end)
++		dest_end = src_end;
++
++	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
++
++	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
++	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
++			dest_err->oer_errno = src_err->oer_errno;
++	} else if (src_err->oer_iswrite) {
++		dest_err->oer_iswrite = true;
++		dest_err->oer_errno = src_err->oer_errno;
++	}
++}
++
++static void
++encode_accumulated_error(struct objlayout *objlay, struct xdr_stream *xdr)
++{
++	struct objlayout_io_state *state, *tmp;
++	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++
++		for (i = 0; i < state->num_comps; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			merge_ioerr(&accumulated_err, ioerr);
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++
++	BUG_ON(pnfs_osd_xdr_encode_ioerr(xdr, &accumulated_err));
++}
++
++void
++objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_layoutreturn_args *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct objlayout_io_state *state, *tmp;
++	__be32 *start, *uninitialized_var(last_xdr);
++
++	dprintk("%s: Begin\n", __func__);
++	start = xdr_reserve_space(xdr, 4);
++	BUG_ON(!start);
++
++	spin_lock(&objlay->lock);
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++		int res = 0;
++
++		for (i = 0; i < state->num_comps && !res; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			dprintk("%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			last_xdr = xdr->p;
++			res = pnfs_osd_xdr_encode_ioerr(xdr, &state->ioerrs[i]);
++		}
++		if (unlikely(res)) {
++			/* no space for even one error descriptor */
++			BUG_ON(last_xdr == start + 1);
++
++			/* we've encountered a situation with lots and lots of
++			 * errors and no space to encode them all. Use the last
++			 * available slot to report the union of all the
++			 * remaining errors.
++			 */
++			xdr_rewind_stream(xdr, last_xdr -
++					       pnfs_osd_ioerr_xdr_sz() / 4);
++			encode_accumulated_error(objlay, xdr);
++			goto loop_done;
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++loop_done:
++	spin_unlock(&objlay->lock);
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++	dprintk("%s: Return\n", __func__);
++}
++
++struct objlayout_deviceinfo {
++	struct page *page;
++	struct pnfs_osd_deviceaddr da; /* This must be last */
++};
++
++/* Initialize and call nfs_getdeviceinfo, then decode and return a
++ * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
++ * should be called.
++ */
++int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr)
++{
++	struct objlayout_deviceinfo *odi;
++	struct pnfs_device pd;
++	struct super_block *sb;
++	struct page *page;
++	size_t sz;
++	u32 *p;
++	int err;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	pd.area = page_address(page);
++
++	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
++	pd.layout_type = LAYOUT_OSD2_OBJECTS;
++	pd.dev_notify_types = 0;
++	pd.pages = &page;
++	pd.pgbase = 0;
++	pd.pglen = PAGE_SIZE;
++	pd.mincount = 0;
++
++	sb = PNFS_INODE(pnfslay)->i_sb;
++	err = pnfs_client_ops->nfs_getdeviceinfo(PNFS_NFS_SERVER(pnfslay), &pd);
++	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
++	if (err)
++		goto err_out;
++
++	p = pd.area;
++	sz = pnfs_osd_xdr_deviceaddr_incore_sz(p);
++	odi = kzalloc(sz + (sizeof(*odi) - sizeof(odi->da)), GFP_KERNEL);
++	if (!odi) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
++	odi->page = page;
++	*deviceaddr = &odi->da;
++	return 0;
++
++err_out:
++	__free_page(page);
++	return err;
++}
++
++void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
++{
++	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
++						struct objlayout_deviceinfo,
++						da);
++
++	__free_page(odi->page);
++	kfree(odi);
++}
++
++/*
++ * Initialize a mountpoint by retrieving the list of
++ * available devices for it.
++ * Return the pnfs_mount_type structure so the
++ * pNFS_client can refer to the mount point later on.
++ */
++static int
++objlayout_initialize_mountpoint(struct nfs_server *server,
++				const struct nfs_fh *mntfh)
++{
++	void *data;
++
++	data = objio_init_mt();
++	if (IS_ERR(data)) {
++		printk(KERN_INFO "%s: objlayout lib not ready err=%ld\n",
++		       __func__, PTR_ERR(data));
++		return PTR_ERR(data);
++	}
++	server->pnfs_ld_data = data;
++
++	dprintk("%s: Return data=%p\n", __func__, data);
++	return 0;
++}
++
++/*
++ * Uninitialize a mountpoint
++ */
++static int
++objlayout_uninitialize_mountpoint(struct nfs_server *server)
++{
++	dprintk("%s: Begin %p\n", __func__, server->pnfs_ld_data);
++	objio_fini_mt(server->pnfs_ld_data);
++	return 0;
++}
++
++struct layoutdriver_io_operations objlayout_io_operations = {
++	.commit                  = objlayout_commit,
++	.read_pagelist           = objlayout_read_pagelist,
++	.write_pagelist          = objlayout_write_pagelist,
++	.alloc_layout            = objlayout_alloc_layout,
++	.free_layout             = objlayout_free_layout,
++	.alloc_lseg              = objlayout_alloc_lseg,
++	.free_lseg               = objlayout_free_lseg,
++	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
++	.encode_layoutreturn     = objlayout_encode_layoutreturn,
++	.initialize_mountpoint   = objlayout_initialize_mountpoint,
++	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
++};
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-31 20:42:05.535059115 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-31 20:42:05.535059115 -0400
+@@ -0,0 +1,171 @@
++/*
++ *  objlayout.h
++ *
++ *  Data types and function declerations for interfacing with the
++ *  pNFS standard object layout driver.
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef _OBJLAYOUT_H
++#define _OBJLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * in-core layout segment
++ */
++struct objlayout_segment {
++	void *internal;    /* for provider internal use */
++	u8 pnfs_osd_layout[];
++};
++
++/*
++ * per-inode layout
++ */
++struct objlayout {
++	struct pnfs_layout_hdr pnfs_layout;
++
++	 /* for layout_commit */
++	enum osd_delta_space_valid_enum {
++		OBJ_DSU_INIT = 0,
++		OBJ_DSU_VALID,
++		OBJ_DSU_INVALID,
++	} delta_space_valid;
++	s64 delta_space_used;  /* consumed by write ops */
++
++	 /* for layout_return */
++	spinlock_t lock;
++	struct list_head err_list;
++};
++
++static inline struct objlayout *
++OBJLAYOUT(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct objlayout, pnfs_layout);
++}
++
++/*
++ * per-I/O operation state
++ * embedded in objects provider io_state data structure
++ */
++struct objlayout_io_state {
++	struct pnfs_layout_segment *lseg;
++
++	struct page **pages;
++	unsigned pgbase;
++	unsigned nr_pages;
++	unsigned long count;
++	loff_t offset;
++	bool sync;
++
++	void *rpcdata;
++	int status;             /* res */
++	int eof;                /* res */
++	int committed;          /* res */
++
++	/* Error reporting (layout_return) */
++	struct list_head err_list;
++	unsigned num_comps;
++	/* Pointer to array of error descriptors of size num_comps.
++	 * It should contain as many entries as devices in the osd_layout
++	 * that participate in the I/O. It is up to the io_engine to allocate
++	 * needed space and set num_comps.
++	 */
++	struct pnfs_osd_ioerr *ioerrs;
++};
++
++/*
++ * Raid engine I/O API
++ */
++extern void *objio_init_mt(void);
++extern void objio_fini_mt(void *mt);
++
++extern int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout);
++extern void objio_free_lseg(void *p);
++
++extern int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp);
++extern void objio_free_io_state(struct objlayout_io_state *state);
++
++extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
++extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
++				    bool stable);
++
++/*
++ * callback API
++ */
++extern void objlayout_io_set_result(struct objlayout_io_state *state,
++				    unsigned index, int osd_error,
++				    u64 offset, u64 length, bool is_write);
++
++static inline void
++objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
++{
++	struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++	/* If one of the I/Os errored out and the delta_space_used was
++	 * invalid we render the complete report as invalid. Protocol mandate
++	 * the DSU be accurate or not reported.
++	 */
++	spin_lock(&objlay->lock);
++	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
++		objlay->delta_space_valid = OBJ_DSU_VALID;
++		objlay->delta_space_used += space_used;
++	}
++	spin_unlock(&objlay->lock);
++}
++
++extern void objlayout_read_done(struct objlayout_io_state *state,
++				ssize_t status, bool sync);
++extern void objlayout_write_done(struct objlayout_io_state *state,
++				 ssize_t status, bool sync);
++
++extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr);
++extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
++
++/*
++ * exported generic objects function vectors
++ */
++extern struct layoutdriver_io_operations objlayout_io_operations;
++extern struct pnfs_client_operations *pnfs_client_ops;
++
++#endif /* _OBJLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-31 20:42:05.536110535 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-31 20:42:05.536110535 -0400
+@@ -0,0 +1,734 @@
++/*
++ *  panfs_shim.c
++ *
++ *  Shim layer for interfacing with the Panasas DirectFlow module I/O stack
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <asm/byteorder.h>
++
++#include "objlayout.h"
++#include "panfs_shim.h"
++
++#include <linux/panfs_shim_api.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct panfs_export_operations *panfs_export_ops;
++
++void *
++objio_init_mt(void)
++{
++	return panfs_export_ops == NULL ? ERR_PTR(-EAGAIN) : NULL;
++}
++
++void objio_fini_mt(void *mountid)
++{
++}
++
++static int
++panfs_shim_conv_raid01(struct pnfs_osd_layout *layout,
++		       struct pnfs_osd_data_map *lo_map,
++		       pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt) {
++		hdr->type = PAN_AGG_RAID1;
++		hdr->hdr.raid1.num_comps = lo_map->odm_mirror_cnt + 1;
++	} else if (layout->olo_num_comps > 1) {
++		hdr->type = PAN_AGG_RAID0;
++		hdr->hdr.raid0.num_comps = layout->olo_num_comps;
++		hdr->hdr.raid0.stripe_unit = lo_map->odm_stripe_unit;
++	} else
++		hdr->type = PAN_AGG_SIMPLE;
++	return 0;
++}
++
++static int
++panfs_shim_conv_raid5(struct pnfs_osd_layout *layout,
++		      struct pnfs_osd_data_map *lo_map,
++		      pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt)
++		goto err;
++
++	if (lo_map->odm_group_width || lo_map->odm_group_depth) {
++		if (!lo_map->odm_group_width || !lo_map->odm_group_depth)
++			goto err;
++
++		hdr->type = PAN_AGG_GRP_RAID5_LEFT;
++		hdr->hdr.grp_raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.grp_raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.grp_raid5_left.stripe_unit = lo_map->odm_stripe_unit;
++		hdr->hdr.grp_raid5_left.rg_width = lo_map->odm_group_width;
++		hdr->hdr.grp_raid5_left.rg_depth = lo_map->odm_group_depth;
++		/* this is a guess, panasas server is not supposed to
++		   hand out layotu otherwise */
++		hdr->hdr.grp_raid5_left.group_layout_policy =
++			PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN;
++	} else {
++		hdr->type = PAN_AGG_RAID5_LEFT;
++		hdr->hdr.raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.raid5_left.stripe_unit2 =
++		hdr->hdr.raid5_left.stripe_unit1 =
++		hdr->hdr.raid5_left.stripe_unit0 = lo_map->odm_stripe_unit;
++	}
++
++	return 0;
++err:
++	return -EINVAL;
++}
++
++/*
++ * Convert a pnfs_osd data map into Panasas aggregation layout header
++ */
++static int
++panfs_shim_conv_pnfs_osd_data_map(
++	struct pnfs_osd_layout *layout,
++	pan_agg_layout_hdr_t *hdr)
++{
++	int status = -EINVAL;
++	struct pnfs_osd_data_map *lo_map = &layout->olo_map;
++
++	if (!layout->olo_num_comps) {
++		dprintk("%s: !!layout.n_comps(%u)\n", __func__,
++			layout->olo_num_comps);
++		goto err;
++	}
++
++	switch (lo_map->odm_raid_algorithm) {
++	case PNFS_OSD_RAID_0:
++		if (layout->olo_num_comps != lo_map->odm_num_comps ||
++		    layout->olo_comps_index) {
++			dprintk("%s: !!PNFS_OSD_RAID_0 "
++				"layout.n_comps(%u) map.n_comps(%u) "
++				"comps_index(%u)\n", __func__,
++				layout->olo_num_comps,
++				lo_map->odm_num_comps,
++				layout->olo_comps_index);
++			goto err;
++		}
++		status = panfs_shim_conv_raid01(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_5:
++		if (!lo_map->odm_group_width) {
++			if (layout->olo_num_comps != lo_map->odm_num_comps ||
++			    layout->olo_comps_index) {
++				dprintk("%s: !!PNFS_OSD_RAID_5 !group_width "
++					"layout.n_comps(%u)!=map.n_comps(%u) "
++					"|| comps_index(%u)\n", __func__,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		} else if ((layout->olo_num_comps != lo_map->odm_num_comps &&
++			    layout->olo_num_comps > lo_map->odm_group_width) ||
++			   (layout->olo_comps_index % lo_map->odm_group_width)){
++				dprintk("%s: !!PNFS_OSD_RAID_5 group_width(%u) "
++					"layout.n_comps(%u) map.n_comps(%u) "
++					"comps_index(%u)\n", __func__,
++					lo_map->odm_group_width,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		status = panfs_shim_conv_raid5(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_4:
++	case PNFS_OSD_RAID_PQ:
++	default:
++		dprintk("%s: !!PNFS_OSD_RAID_(%d)\n", __func__,
++			lo_map->odm_raid_algorithm);
++		goto err;
++	}
++
++	return 0;
++
++err:
++	return status;
++}
++
++/*
++ * Convert pnfs_osd layout into Panasas map and caps type
++ */
++int
++objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	int i, total_comps;
++	int status;
++	struct pnfs_osd_object_cred *lo_comp;
++	pan_size_t alloc_sz, local_sz;
++	pan_sm_map_cap_t *mcs = NULL;
++	u8 *buf;
++	pan_agg_comp_obj_t *pan_comp;
++	pan_sm_sec_t *pan_sec;
++
++	status = -EINVAL;
++	if (layout->olo_num_comps < layout->olo_map.odm_group_width) {
++		total_comps = layout->olo_comps_index + layout->olo_num_comps;
++	} else {
++		/* allocate full map, otherwise SAM gets confused */
++		total_comps = layout->olo_map.odm_num_comps;
++	}
++	alloc_sz = total_comps *
++		   (sizeof(pan_agg_comp_obj_t) + sizeof(pan_sm_sec_t));
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p = layout->olo_comps[i].oc_cap.cred;
++		if (panfs_export_ops->sm_sec_t_get_size_otw(
++			(pan_sm_sec_otw_t *)&p, &local_sz, NULL, NULL))
++			goto err;
++		alloc_sz += local_sz;
++	}
++
++	status = -ENOMEM;
++	mcs = kzalloc(sizeof(*mcs) + alloc_sz, GFP_KERNEL);
++	if (!mcs)
++		goto err;
++	buf = (u8 *)&mcs[1];
++
++	mcs->offset = lseg->range.offset;
++	mcs->length = lseg->range.length;
++#if 0
++	/* FIXME: for now */
++	mcs->expiration_time.ts_sec  = 0;
++	mcs->expiration_time.ts_nsec = 0;
++#endif
++	mcs->full_map.map_hdr.avail_state = PAN_AGG_OBJ_STATE_NORMAL;
++	status = panfs_shim_conv_pnfs_osd_data_map(layout,
++						   &mcs->full_map.layout_hdr);
++	if (status)
++		goto err;
++
++	mcs->full_map.components.size = total_comps;
++	mcs->full_map.components.data = (pan_agg_comp_obj_t *)buf;
++	buf += total_comps * sizeof(pan_agg_comp_obj_t);
++
++	mcs->secs.size = total_comps;
++	mcs->secs.data = (pan_sm_sec_t *)buf;
++	buf += total_comps * sizeof(pan_sm_sec_t);
++
++	lo_comp = layout->olo_comps;
++	pan_comp = mcs->full_map.components.data + layout->olo_comps_index;
++	pan_sec = mcs->secs.data + layout->olo_comps_index;
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p;
++		pan_stor_obj_id_t *obj_id = &mcs->full_map.map_hdr.obj_id;
++		struct pnfs_osd_objid *oc_obj_id = &lo_comp->oc_object_id;
++		u64 dev_id = __be64_to_cpup(
++			(__be64 *)oc_obj_id->oid_device_id.data + 1);
++
++		dprintk("%s: i=%d deviceid=%Lx:%Lx partition=%Lx object=%Lx\n",
++			__func__, i,
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data),
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data + 1),
++			oc_obj_id->oid_partition_id, oc_obj_id->oid_object_id);
++
++		if (i == 0) {
++			/* make up mgr_id to calm sam down */
++			pan_mgr_id_construct_artificial(PAN_MGR_SM, 0,
++							&obj_id->dev_id);
++			obj_id->grp_id = oc_obj_id->oid_partition_id;
++			obj_id->obj_id = oc_obj_id->oid_object_id;
++		}
++
++		if (obj_id->grp_id != lo_comp->oc_object_id.oid_partition_id) {
++			dprintk("%s: i=%d grp_id=0x%Lx oid_partition_id=0x%Lx\n",
++				__func__, i, (u64)obj_id->grp_id,
++				lo_comp->oc_object_id.oid_partition_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		if (obj_id->obj_id != lo_comp->oc_object_id.oid_object_id) {
++			dprintk("%s: i=%d obj_id=0x%Lx oid_object_id=0x%Lx\n",
++				__func__, i, obj_id->obj_id,
++				lo_comp->oc_object_id.oid_object_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		pan_comp->dev_id = dev_id;
++		if (!pan_stor_is_device_id_an_obsd_id(pan_comp->dev_id)) {
++			dprintk("%s: i=%d dev_id=0x%Lx not an obsd_id\n",
++				__func__, i, obj_id->dev_id);
++			status = -EINVAL;
++			goto err;
++		}
++		if (lo_comp->oc_osd_version == PNFS_OSD_MISSING) {
++			dprintk("%s: degraded maps not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++		pan_comp->avail_state = PAN_AGG_COMP_STATE_NORMAL;
++		if (lo_comp->oc_cap_key_sec != PNFS_OSD_CAP_KEY_SEC_NONE) {
++			dprintk("%s: cap key security not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++
++		p = lo_comp->oc_cap.cred;
++		panfs_export_ops->sm_sec_t_unmarshall(
++			(pan_sm_sec_otw_t *)&p,
++			pan_sec,
++			buf,
++			alloc_sz,
++			NULL,
++			&local_sz);
++		buf += local_sz;
++		alloc_sz -= local_sz;
++
++		lo_comp++;
++		pan_comp++;
++		pan_sec++;
++	}
++
++	*outp = mcs;
++	dprintk("%s:Return mcs=%p\n", __func__, mcs);
++	return 0;
++
++err:
++	objio_free_lseg(mcs);
++	dprintk("%s:Error %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * Free a Panasas map and caps type
++ */
++void
++objio_free_lseg(void *p)
++{
++	kfree(p);
++}
++
++/*
++ * I/O routines
++ */
++int
++objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct panfs_shim_io_state *p;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	*outp = &p->ol_state;
++	return 0;
++}
++
++/*
++ * Free an I/O state
++ */
++void
++objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	int i;
++
++	dprintk("%s: freeing io_state\n", __func__);
++	for (i = 0; i < state->ol_state.nr_pages; i++)
++		kunmap(state->ol_state.pages[i]);
++
++	if (state->ucreds)
++		panfs_export_ops->ucreds_put(state->ucreds);
++	kfree(state->sg_list);
++	kfree(state);
++}
++
++static int
++panfs_shim_pages_to_sg(
++	struct panfs_shim_io_state *state,
++	struct page **pages,
++	unsigned int pgbase,
++	unsigned nr_pages,
++	size_t count)
++{
++	unsigned i, n;
++	pan_sg_entry_t *sg;
++
++	dprintk("%s pgbase %u nr_pages %u count %d "
++		"pg0 %p flags 0x%x index %llu\n",
++		__func__, pgbase, nr_pages, (int)count, pages[0],
++		(unsigned)pages[0]->flags, (unsigned long long)pages[0]->index);
++
++	sg = kmalloc(nr_pages * sizeof(*sg), GFP_KERNEL);
++	if (sg == NULL)
++		return -ENOMEM;
++
++	dprintk("%s sg_list %p pages %p pgbase %u nr_pages %u\n",
++		__func__, sg, pages, pgbase, nr_pages);
++
++	for (i = 0; i < nr_pages; i++) {
++		sg[i].buffer = (char *)kmap(pages[i]) + pgbase;
++		n = PAGE_SIZE - pgbase;
++		pgbase = 0;
++		if (n > count)
++			n = count;
++		sg[i].chunk_size = n;
++		count -= n;
++		if (likely(count)) {
++			sg[i].next = &sg[i+1];
++		} else {
++			/* we're done */
++			sg[i].next = NULL;
++			break;
++		}
++	}
++	BUG_ON(count);
++
++	state->sg_list = sg;
++	return 0;
++}
++
++/*
++ * Callback function for async reads
++ */
++static void
++panfs_shim_read_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_read_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.read.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++		status = res_p->length;
++		WARN_ON(status < 0);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_read rc %d: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_read_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.read.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.read.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_read(PAN_SAM_ACCESS_BYPASS_TIMESTAMP,
++					&state->u.read.args,
++					&state->obj_sec,
++					state->sg_list,
++					state->ucreds,
++					ol_state->sync ?
++						NULL : panfs_shim_read_done,
++					state, NULL,
++					&state->u.read.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_read_done(state, NULL, &state->u.read.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++/*
++ * Callback function for async writes
++ */
++static void
++panfs_shim_write_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_write_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.write.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++/*		state->ol_state.committed = NFS_FILE_SYNC;*/
++		state->ol_state.committed = NFS_UNSTABLE;
++		status = res_p->length;
++		WARN_ON(status < 0);
++
++		objlayout_add_delta_space_used(&state->ol_state,
++					       res_p->delta_capacity_used);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_write rc %u: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_write_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_write_pagelist(struct objlayout_io_state *ol_state,
++		     bool stable /* unused, PanOSD writes are stable */)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.write.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.write.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_write(PAN_SAM_ACCESS_NONE,
++					 &state->u.write.args,
++					 &state->obj_sec,
++					 state->sg_list,
++					 state->ucreds,
++					 ol_state->sync ?
++						NULL : panfs_shim_write_done,
++					 state,
++					 NULL,
++					 &state->u.write.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_write_done(state, NULL, &state->u.write.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++int
++panfs_shim_register(struct panfs_export_operations *ops)
++{
++	if (panfs_export_ops) {
++		printk(KERN_INFO
++		       "%s: panfs already registered (panfs ops %p)\n",
++		       __func__, panfs_export_ops);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: registering panfs ops %p\n",
++	       __func__, ops);
++
++	panfs_export_ops = ops;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_register);
++
++int
++panfs_shim_unregister(void)
++{
++	if (!panfs_export_ops) {
++		printk(KERN_INFO "%s: panfs is not registered\n", __func__);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: unregistering panfs ops %p\n",
++	       __func__, panfs_export_ops);
++
++	panfs_export_ops = NULL;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_unregister);
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++panlayout_get_stripesize(struct pnfs_layout_hdr *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	dprintk("%s: Begin\n", __func__);
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *panlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)panlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			n *= 8;	/* FIXME: until we have 2-D coalescing */
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zd\n", __func__, maxsz);
++	return maxsz;
++}
++
++#define PANLAYOUT_DEF_STRIPE_UNIT    (64*1024)
++#define PANLAYOUT_DEF_STRIPE_WIDTH   9
++#define PANLAYOUT_MAX_STRIPE_WIDTH   11
++#define PANLAYOUT_MAX_GATHER_STRIPES 8
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++panlayout_get_blocksize(void)
++{
++	ssize_t sz = (PANLAYOUT_MAX_STRIPE_WIDTH-1) *
++		      PANLAYOUT_DEF_STRIPE_UNIT *
++		      PANLAYOUT_MAX_GATHER_STRIPES;
++	dprintk("%s: Return %Zd\n", __func__, sz);
++	return sz;
++}
++
++static struct layoutdriver_policy_operations panlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = panlayout_get_stripesize,
++	.get_blocksize         = panlayout_get_blocksize,
++};
++
++#define PNFS_LAYOUT_PANOSD (NFS4_PNFS_PRIVATE_LAYOUT | LAYOUT_OSD2_OBJECTS)
++
++static struct pnfs_layoutdriver_type panlayout_type = {
++	.id = PNFS_LAYOUT_PANOSD,
++	.name = "PNFS_LAYOUT_PANOSD",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &panlayout_policy_operations,
++};
++
++MODULE_DESCRIPTION("pNFS Layout Driver for Panasas OSDs");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++panlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Registered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++panlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Unregistered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(panlayout_init);
++module_exit(panlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-31 20:42:05.537124598 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-31 20:42:05.537124598 -0400
+@@ -0,0 +1,482 @@
++/*
++ *  panfs_shim.h
++ *
++ *  Data types and external function declerations for interfacing with
++ *  panfs (Panasas DirectFlow) I/O stack
++ *
++ *  Copyright (C) 2007 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#ifndef _PANLAYOUT_PANFS_SHIM_H
++#define _PANLAYOUT_PANFS_SHIM_H
++
++typedef s8 pan_int8_t;
++typedef u8 pan_uint8_t;
++typedef s16 pan_int16_t;
++typedef u16 pan_uint16_t;
++typedef s32 pan_int32_t;
++typedef u32 pan_uint32_t;
++typedef s64 pan_int64_t;
++typedef u64 pan_uint64_t;
++
++/*
++ * from pan_base_types.h
++ */
++typedef  pan_uint64_t pan_rpc_none_t;
++typedef pan_uint32_t  pan_rpc_arrdim_t;
++typedef pan_uint32_t  pan_status_t;
++typedef pan_uint8_t   pan_otw_t;
++typedef pan_uint8_t   pan_pad_t;
++
++typedef pan_uint32_t  pan_timespec_sec_t;
++typedef pan_uint32_t  pan_timespec_nsec_t;
++
++typedef  struct pan_timespec_s  pan_timespec_t;
++struct pan_timespec_s {
++  pan_timespec_sec_t   ts_sec;
++  pan_timespec_nsec_t  ts_nsec;
++};
++
++/*
++ * from pan_std_types.h
++ */
++typedef pan_uint32_t pan_size_t;
++typedef  int  pan_bool_t;
++
++/*
++ * from pan_common_error.h
++ */
++#define PAN_SUCCESS                                         ((pan_status_t)0)
++#define PAN_ERR_IN_PROGRESS                                 ((pan_status_t)55)
++
++/*
++ * from pan_sg.h
++ */
++typedef struct pan_sg_entry_s pan_sg_entry_t;
++struct pan_sg_entry_s {
++  void                  *buffer;       /* pointer to memory */
++  pan_uint32_t           chunk_size;   /* size of each chunk (bytes) */
++  pan_sg_entry_t        *next;
++};
++
++/*
++ * from pan_storage.h
++ */
++typedef pan_uint64_t pan_stor_dev_id_t;
++typedef pan_uint32_t pan_stor_obj_grp_id_t;
++typedef pan_uint64_t pan_stor_obj_uniq_t;
++typedef pan_uint32_t pan_stor_action_t;
++typedef pan_uint8_t pan_stor_cap_key_t[20];
++
++typedef pan_uint8_t pan_stor_key_type_t;
++typedef pan_uint64_t pan_stor_len_t;
++typedef pan_int64_t pan_stor_delta_len_t;
++typedef pan_uint64_t pan_stor_offset_t;
++typedef pan_uint16_t pan_stor_op_t;
++
++typedef pan_uint16_t pan_stor_sec_level_t;
++
++struct pan_stor_obj_id_s {
++  pan_stor_dev_id_t      dev_id;
++  pan_stor_obj_uniq_t    obj_id;
++  pan_stor_obj_grp_id_t  grp_id;
++};
++
++typedef struct pan_stor_obj_id_s pan_stor_obj_id_t;
++
++#define PAN_STOR_OP_NONE ((pan_stor_op_t) 0U)
++#define PAN_STOR_OP_READ ((pan_stor_op_t) 8U)
++#define PAN_STOR_OP_WRITE ((pan_stor_op_t) 9U)
++#define PAN_STOR_OP_APPEND ((pan_stor_op_t) 10U)
++#define PAN_STOR_OP_GETATTR ((pan_stor_op_t) 11U)
++#define PAN_STOR_OP_SETATTR ((pan_stor_op_t) 12U)
++#define PAN_STOR_OP_FLUSH ((pan_stor_op_t) 13U)
++#define PAN_STOR_OP_CLEAR ((pan_stor_op_t) 14U)
++
++/*
++ * from pan_aggregation_map.h
++ */
++typedef pan_uint8_t pan_agg_type_t;
++typedef pan_uint64_t pan_agg_map_version_t;
++typedef pan_uint8_t pan_agg_obj_state_t;
++typedef pan_uint8_t pan_agg_comp_state_t;
++typedef pan_uint8_t pan_agg_comp_flag_t;
++
++#define PAN_AGG_OBJ_STATE_INVALID ((pan_agg_obj_state_t) 0x00)
++#define PAN_AGG_OBJ_STATE_NORMAL ((pan_agg_obj_state_t) 0x01)
++#define PAN_AGG_OBJ_STATE_DEGRADED ((pan_agg_obj_state_t) 0x02)
++#define PAN_AGG_OBJ_STATE_RECONSTRUCT ((pan_agg_obj_state_t) 0x03)
++#define PAN_AGG_OBJ_STATE_COPYBACK ((pan_agg_obj_state_t) 0x04)
++#define PAN_AGG_OBJ_STATE_UNAVAILABLE ((pan_agg_obj_state_t) 0x05)
++#define PAN_AGG_OBJ_STATE_CREATING ((pan_agg_obj_state_t) 0x06)
++#define PAN_AGG_OBJ_STATE_DELETED ((pan_agg_obj_state_t) 0x07)
++#define PAN_AGG_COMP_STATE_INVALID ((pan_agg_comp_state_t) 0x00)
++#define PAN_AGG_COMP_STATE_NORMAL ((pan_agg_comp_state_t) 0x01)
++#define PAN_AGG_COMP_STATE_UNAVAILABLE ((pan_agg_comp_state_t) 0x02)
++#define PAN_AGG_COMP_STATE_COPYBACK ((pan_agg_comp_state_t) 0x03)
++#define PAN_AGG_COMP_F_NONE ((pan_agg_comp_flag_t) 0x00)
++#define PAN_AGG_COMP_F_ATTR_STORING ((pan_agg_comp_flag_t) 0x01)
++#define PAN_AGG_COMP_F_OBJ_CORRUPT_OBS ((pan_agg_comp_flag_t) 0x02)
++#define PAN_AGG_COMP_F_TEMP ((pan_agg_comp_flag_t) 0x04)
++
++struct pan_aggregation_map_s {
++  pan_agg_map_version_t  version;
++  pan_agg_obj_state_t    avail_state;
++  pan_stor_obj_id_t      obj_id;
++};
++
++typedef struct pan_aggregation_map_s pan_aggregation_map_t;
++
++struct pan_agg_comp_obj_s {
++  pan_stor_dev_id_t     dev_id;
++  pan_agg_comp_state_t  avail_state;
++  pan_agg_comp_flag_t   comp_flags;
++};
++
++typedef struct pan_agg_comp_obj_s pan_agg_comp_obj_t;
++
++struct pan_agg_simple_header_s {
++  pan_uint8_t  unused;
++};
++
++typedef struct pan_agg_simple_header_s pan_agg_simple_header_t;
++
++struct pan_agg_raid1_header_s {
++  pan_uint16_t  num_comps;
++};
++
++typedef struct pan_agg_raid1_header_s pan_agg_raid1_header_t;
++
++struct pan_agg_raid0_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++};
++
++typedef struct pan_agg_raid0_header_s pan_agg_raid0_header_t;
++
++struct pan_agg_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit0;
++  pan_uint32_t  stripe_unit1;
++  pan_uint32_t  stripe_unit2;
++};
++
++typedef struct pan_agg_raid5_left_header_s pan_agg_raid5_left_header_t;
++
++typedef struct pan_agg_grp_raid5_left_header_s pan_agg_grp_raid5_left_header_t;
++
++struct pan_agg_grp_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++  pan_uint16_t  rg_width;
++  pan_uint16_t  rg_depth;
++  pan_uint8_t   group_layout_policy;
++};
++
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_INVALID ((pan_uint8_t) 0x00)
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN ((pan_uint8_t) 0x01)
++
++#define PAN_AGG_NULL_MAP ((pan_agg_type_t) 0x00)
++#define PAN_AGG_SIMPLE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_RAID1 ((pan_agg_type_t) 0x02)
++#define PAN_AGG_RAID0 ((pan_agg_type_t) 0x03)
++#define PAN_AGG_RAID5_LEFT ((pan_agg_type_t) 0x04)
++#define PAN_AGG_GRP_RAID5_LEFT ((pan_agg_type_t) 0x06)
++#define PAN_AGG_MINTYPE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_MAXTYPE ((pan_agg_type_t) 0x06)
++
++struct pan_agg_layout_hdr_s {
++  pan_agg_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_uint64_t                        null;
++    pan_agg_simple_header_t             simple;
++    pan_agg_raid1_header_t              raid1;
++    pan_agg_raid0_header_t              raid0;
++    pan_agg_raid5_left_header_t         raid5_left;
++    pan_agg_grp_raid5_left_header_t     grp_raid5_left;
++  } hdr;
++};
++
++typedef struct pan_agg_layout_hdr_s pan_agg_layout_hdr_t;
++
++struct pan_agg_comp_obj_a_s {
++  pan_rpc_arrdim_t size;
++  pan_agg_comp_obj_t *data;
++};
++typedef struct pan_agg_comp_obj_a_s pan_agg_comp_obj_a;
++
++struct pan_agg_full_map_s {
++  pan_aggregation_map_t  map_hdr;
++  pan_agg_layout_hdr_t   layout_hdr;
++  pan_agg_comp_obj_a     components;
++};
++
++typedef struct pan_agg_full_map_s pan_agg_full_map_t;
++
++/*
++ * from pan_obsd_rpc_types.h
++ */
++typedef pan_uint8_t pan_obsd_security_key_a[16];
++
++typedef pan_uint8_t pan_obsd_capability_key_a[20];
++
++typedef pan_uint8_t pan_obsd_key_holder_id_t;
++
++#define PAN_OBSD_KEY_HOLDER_BASIS_KEY ((pan_obsd_key_holder_id_t) 0x01)
++#define PAN_OBSD_KEY_HOLDER_CAP_KEY ((pan_obsd_key_holder_id_t) 0x02)
++
++struct pan_obsd_key_holder_s {
++  pan_obsd_key_holder_id_t select;
++  pan_pad_t pad[3];
++  union {
++    pan_obsd_security_key_a    basis_key;
++    pan_obsd_capability_key_a  cap_key;
++  } key;
++};
++
++typedef struct pan_obsd_key_holder_s pan_obsd_key_holder_t;
++
++/*
++ * from pan_sm_sec.h
++ */
++typedef pan_uint8_t pan_sm_sec_type_t;
++typedef pan_uint8_t pan_sm_sec_otw_allo_mode_t;
++
++struct pan_obsd_capability_generic_otw_t_s {
++  pan_rpc_arrdim_t size;
++  pan_uint8_t *data;
++};
++typedef struct pan_obsd_capability_generic_otw_t_s
++				pan_obsd_capability_generic_otw_t;
++
++struct pan_sm_sec_obsd_s {
++  pan_obsd_key_holder_t              key;
++  pan_obsd_capability_generic_otw_t  cap_otw;
++  pan_sm_sec_otw_allo_mode_t         allo_mode;
++};
++
++typedef struct pan_sm_sec_obsd_s pan_sm_sec_obsd_t;
++
++struct pan_sm_sec_s {
++  pan_sm_sec_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_rpc_none_t     none;
++    pan_sm_sec_obsd_t  obsd;
++  } variant;
++};
++
++typedef struct pan_sm_sec_s pan_sm_sec_t;
++
++struct pan_sm_sec_a_s {
++  pan_rpc_arrdim_t size;
++  pan_sm_sec_t *data;
++};
++typedef struct pan_sm_sec_a_s pan_sm_sec_a;
++typedef pan_otw_t *pan_sm_sec_otw_t;
++
++/*
++ * from pan_sm_types.h
++ */
++typedef pan_uint64_t pan_sm_cap_handle_t;
++
++struct pan_sm_map_cap_s {
++  pan_agg_full_map_t   full_map;
++  pan_stor_offset_t    offset;
++  pan_stor_len_t       length;
++  pan_sm_sec_a         secs;
++  pan_sm_cap_handle_t  handle;
++  pan_timespec_t       expiration_time;
++  pan_stor_action_t    action_mask;
++  pan_uint32_t         flags;
++};
++
++typedef struct pan_sm_map_cap_s pan_sm_map_cap_t;
++
++/*
++ * from pan_sm_ops.h
++ */
++typedef pan_rpc_none_t pan_sm_cache_ptr_t;
++
++/*
++ * from pan_sam_api.h
++ */
++typedef pan_uint32_t    pan_sam_access_flags_t;
++
++typedef struct pan_sam_dev_error_s  pan_sam_dev_error_t;
++struct pan_sam_dev_error_s {
++    pan_stor_dev_id_t       dev_id;
++    pan_stor_op_t           stor_op;
++    pan_status_t            error;
++};
++
++typedef struct pan_sam_ext_status_s pan_sam_ext_status_t;
++struct pan_sam_ext_status_s {
++    pan_uint32_t        available;
++    pan_uint32_t        size;
++    pan_sam_dev_error_t *errors;
++};
++
++enum pan_sam_rpc_sec_sel_e {
++    PAN_SAM_RPC_SEC_DEFAULT,
++    PAN_SAM_RPC_SEC_ATLEAST,
++    PAN_SAM_RPC_SEC_EXACTLY
++};
++typedef enum pan_sam_rpc_sec_sel_e pan_sam_rpc_sec_sel_t;
++
++typedef struct pan_sam_obj_sec_s pan_sam_obj_sec_t;
++struct pan_sam_obj_sec_s {
++    pan_stor_sec_level_t    min_security;
++    pan_sm_map_cap_t        *map_ccaps;
++};
++
++typedef struct  pan_sam_rpc_sec_s   pan_sam_rpc_sec_t;
++struct pan_sam_rpc_sec_s {
++    pan_sam_rpc_sec_sel_t   selector;
++};
++
++typedef struct pan_sam_read_args_s pan_sam_read_args_t;
++struct pan_sam_read_args_s {
++    pan_stor_obj_id_t                obj_id;
++    pan_sm_cache_ptr_t               obj_ent;
++    void                            *return_attr;
++    void                            *checksum;
++    pan_stor_offset_t                offset;
++    pan_uint16_t                     sm_options;
++    void                            *callout;
++    void                            *callout_arg;
++};
++
++typedef struct pan_sam_read_res_s pan_sam_read_res_t;
++struct pan_sam_read_res_s {
++    pan_status_t             result;
++    pan_sam_ext_status_t     ext_status;
++    pan_stor_len_t           length;
++    void                    *attr;
++    void                    *checksum;
++};
++
++typedef void (*pan_sam_read_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_read_res_t  *res_p,
++    pan_status_t        status);
++
++#define PAN_SAM_ACCESS_NONE                             0x0000
++#define PAN_SAM_ACCESS_BYPASS_TIMESTAMP                 0x0020
++
++typedef struct pan_sam_write_args_s pan_sam_write_args_t;
++struct pan_sam_write_args_s {
++    pan_stor_obj_id_t   obj_id;
++    pan_sm_cache_ptr_t  obj_ent;
++    pan_stor_offset_t   offset;
++    void                *attr;
++    void                *return_attr;
++};
++
++typedef struct pan_sam_write_res_s pan_sam_write_res_t;
++struct pan_sam_write_res_s {
++    pan_status_t            result;
++    pan_sam_ext_status_t    ext_status;
++    pan_stor_len_t          length;
++    pan_stor_delta_len_t    delta_capacity_used;
++    pan_bool_t              parity_dirty;
++    void                   *attr;
++};
++
++typedef void (*pan_sam_write_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_write_res_t *res_p,
++    pan_status_t        status);
++
++/*
++ * from pan_mgr_types.h
++ */
++#define PAN_MGR_ID_TYPE_SHIFT 56
++#define PAN_MGR_ID_TYPE_MASK ((pan_mgr_id_t)18374686479671623680ULL)
++#define PAN_MGR_ID_UNIQ_MASK ((pan_mgr_id_t)72057594037927935ULL)
++
++typedef pan_uint16_t pan_mgr_type_t;
++typedef pan_uint64_t pan_mgr_id_t;
++
++#define PAN_MGR_SM ((pan_mgr_type_t) 2U)
++#define PAN_MGR_OBSD ((pan_mgr_type_t) 6U)
++
++/*
++ * from pan_mgr_types_c.h
++ */
++#define pan_mgr_id_construct_artificial(_mgr_type_, _mgr_uniq_, _mgr_id_p_) { \
++  pan_mgr_id_t  _id1, _id2; \
++\
++  _id1 = (_mgr_type_); \
++  _id1 <<= PAN_MGR_ID_TYPE_SHIFT; \
++  _id1 &= PAN_MGR_ID_TYPE_MASK; \
++  _id2 = (_mgr_uniq_); \
++  _id2 &= PAN_MGR_ID_UNIQ_MASK; \
++  _id1 |= _id2; \
++  *(_mgr_id_p_) = _id1; \
++}
++
++/*
++ * from pan_storage_c.h
++ */
++#define pan_stor_is_device_id_an_obsd_id(_device_id_) \
++    ((((_device_id_) & PAN_MGR_ID_TYPE_MASK) >> PAN_MGR_ID_TYPE_SHIFT) \
++	== PAN_MGR_OBSD)
++
++/*
++ * pnfs_shim internal definitions
++ */
++
++struct panfs_shim_io_state {
++	struct objlayout_io_state ol_state;
++
++	pan_sg_entry_t *sg_list;
++	pan_sam_obj_sec_t obj_sec;
++	void *ucreds;
++	union {
++		struct {
++			pan_sam_read_args_t args;
++			pan_sam_read_res_t res;
++		} read;
++		struct {
++			pan_sam_write_args_t args;
++			pan_sam_write_res_t res;
++		} write;
++	} u;
++};
++
++#endif /* _PANLAYOUT_PANFS_SHIM_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-31 20:42:05.538121971 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-31 20:42:05.538121971 -0400
+@@ -0,0 +1,435 @@
++/*
++ *  pnfs_osd_xdr.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/pnfs_osd_xdr.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/*
++ * The following implementation is based on these Internet Drafts:
++ *
++ * draft-ietf-nfsv4-minorversion-21
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_objid(u32 *p, struct pnfs_osd_objid *objid)
++{
++	COPYMEM(objid->oid_device_id.data, sizeof(objid->oid_device_id.data));
++	READ64(objid->oid_partition_id);
++	READ64(objid->oid_object_id);
++	return p;
++}
++
++static inline u32 *
++pnfs_osd_xdr_decode_opaque_cred(u32 *p,
++				struct pnfs_osd_opaque_cred *opaque_cred)
++{
++	READ32(opaque_cred->cred_len);
++	COPYMEM(opaque_cred->cred, opaque_cred->cred_len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_object_cred(u32 *p, struct pnfs_osd_object_cred *comp,
++				u8 **credp)
++{
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_objid(p, &comp->oc_object_id);
++	READ32(comp->oc_osd_version);
++	READ32(comp->oc_cap_key_sec);
++
++	cred = *credp;
++	comp->oc_cap_key.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap_key);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap_key.cred_len));
++	comp->oc_cap.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap.cred_len));
++	*credp = cred;
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_data_map(u32 *p, struct pnfs_osd_data_map *data_map)
++{
++	READ32(data_map->odm_num_comps);
++	READ64(data_map->odm_stripe_unit);
++	READ32(data_map->odm_group_width);
++	READ32(data_map->odm_group_depth);
++	READ32(data_map->odm_mirror_cnt);
++	READ32(data_map->odm_raid_algorithm);
++	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
++		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
++		__func__,
++		data_map->odm_num_comps,
++		(unsigned long long)data_map->odm_stripe_unit,
++		data_map->odm_group_width,
++		data_map->odm_group_depth,
++		data_map->odm_mirror_cnt,
++		data_map->odm_raid_algorithm);
++	return p;
++}
++
++struct pnfs_osd_layout *
++pnfs_osd_xdr_decode_layout(struct pnfs_osd_layout *layout, u32 *p)
++{
++	int i;
++	u32 *start = p;
++	struct pnfs_osd_object_cred *comp;
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_data_map(p, &layout->olo_map);
++	READ32(layout->olo_comps_index);
++	READ32(layout->olo_num_comps);
++	layout->olo_comps = (struct pnfs_osd_object_cred *)(layout + 1);
++	comp = layout->olo_comps;
++	cred = (u8 *)(comp + layout->olo_num_comps);
++	dprintk("%s: comps_index=%u num_comps=%u\n",
++		__func__, layout->olo_comps_index, layout->olo_num_comps);
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		p = pnfs_osd_xdr_decode_object_cred(p, comp, &cred);
++		dprintk("%s: comp[%d]=dev(%llx:%llx) par=0x%llx obj=0x%llx "
++			"key_len=%u cap_len=%u\n",
++			__func__, i,
++			_DEVID_LO(&comp->oc_object_id.oid_device_id),
++			_DEVID_HI(&comp->oc_object_id.oid_device_id),
++			comp->oc_object_id.oid_partition_id,
++			comp->oc_object_id.oid_object_id,
++			comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
++		comp++;
++	}
++	dprintk("%s: xdr_size=%Zd end=%p in_core_size=%Zd\n", __func__,
++	       (char *)p - (char *)start, cred, (char *)cred - (char *)layout);
++	return layout;
++}
++
++/*
++ * Get Device Information Decoding
++ *
++ * Note: since Device Information is currently done synchronously, most
++ *       of the actual fields are left inside the rpc buffer and are only
++ *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
++ *       should not be freed while the returned information is in use.
++ */
++
++u32 *__xdr_read_calc_nfs4_string(
++	u32 *p, struct nfs4_string *str, u8 **freespace)
++{
++	u32 len;
++	char *data;
++	bool need_copy;
++
++	READ32(len);
++	data = (char *)p;
++
++	if (data[len]) { /* Not null terminated we'll need extra space */
++		data = *freespace;
++		*freespace += len + 1;
++		need_copy = true;
++	} else {
++		need_copy = false;
++	}
++
++	if (str) {
++		str->len = len;
++		str->data = data;
++		if (need_copy) {
++			memcpy(data, p, len);
++			data[len] = 0;
++		}
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++u32 *__xdr_read_calc_u8_opaque(
++	u32 *p, struct nfs4_string *str)
++{
++	u32 len;
++
++	READ32(len);
++
++	if (str) {
++		str->len = len;
++		str->data = (char *)p;
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetid {
++ * 	u32			oti_type;
++ * 	struct nfs4_string	oti_scsi_device_id;
++ * };
++ */
++u32 *__xdr_read_calc_targetid(
++	u32 *p, struct pnfs_osd_targetid* targetid, u8 **freespace)
++{
++	u32 oti_type;
++
++	READ32(oti_type);
++	if (targetid)
++		targetid->oti_type = oti_type;
++
++	switch (oti_type) {
++	case OBJ_TARGET_SCSI_NAME:
++	case OBJ_TARGET_SCSI_DEVICE_ID:
++		p = __xdr_read_calc_u8_opaque(p,
++			targetid ? &targetid->oti_scsi_device_id : NULL);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_net_addr {
++ * 	struct nfs4_string	r_netid;
++ * 	struct nfs4_string	r_addr;
++ * };
++ */
++u32 *__xdr_read_calc_net_addr(
++	u32 *p, struct pnfs_osd_net_addr* netaddr, u8 **freespace)
++{
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_netid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_addr : NULL,
++			freespace);
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetaddr {
++ * 	u32				ota_available;
++ * 	struct pnfs_osd_net_addr	ota_netaddr;
++ * };
++ */
++u32 *__xdr_read_calc_targetaddr(
++	u32 *p, struct pnfs_osd_targetaddr *targetaddr, u8 **freespace)
++{
++	u32 ota_available;
++
++	READ32(ota_available);
++	if (targetaddr)
++		targetaddr->ota_available = ota_available;
++
++	if (ota_available) {
++		p = __xdr_read_calc_net_addr(p,
++				targetaddr ? &targetaddr->ota_netaddr : NULL,
++				freespace);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++u32 *__xdr_read_calc_deviceaddr(
++	u32 *p, struct pnfs_osd_deviceaddr *deviceaddr, u8 **freespace)
++{
++	p = __xdr_read_calc_targetid(p,
++			deviceaddr ? &deviceaddr->oda_targetid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_targetaddr(p,
++			deviceaddr ? &deviceaddr->oda_targetaddr : NULL,
++			freespace);
++
++	if (deviceaddr)
++		COPYMEM(deviceaddr->oda_lun, sizeof(deviceaddr->oda_lun));
++	else
++		p += XDR_QUADLEN(sizeof(deviceaddr->oda_lun));
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_systemid : NULL);
++
++	if (deviceaddr) {
++		p = pnfs_osd_xdr_decode_object_cred(p,
++				&deviceaddr->oda_root_obj_cred, freespace);
++	} else {
++		*freespace += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_osdname : NULL);
++
++	return p;
++}
++
++size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p)
++{
++	u8 *null_freespace = NULL;
++	size_t sz;
++
++	__xdr_read_calc_deviceaddr(p, NULL, &null_freespace);
++	sz = sizeof(struct pnfs_osd_deviceaddr) + (size_t)null_freespace;
++
++	return sz;
++}
++
++void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p)
++{
++	u8 *freespace = (u8 *)(deviceaddr + 1);
++
++	__xdr_read_calc_deviceaddr(p, deviceaddr, &freespace);
++}
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou)
++{
++	__be32 *p = xdr_reserve_space(xdr, 16);
++
++	if (!p)
++		return -E2BIG;
++
++	*p++ = cpu_to_be32(lou->dsu_valid);
++	if (lou->dsu_valid)
++		p = xdr_encode_hyper(p, lou->dsu_delta);
++	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ */
++static inline int pnfs_osd_xdr_encode_objid(struct xdr_stream *xdr,
++					    struct pnfs_osd_objid *object_id)
++{
++	__be32 *p;
++
++	p = xdr_reserve_space(xdr, 32);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
++				    sizeof(object_id->oid_device_id.data));
++	p = xdr_encode_hyper(p, object_id->oid_partition_id);
++	p = xdr_encode_hyper(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++int pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr,
++			      struct pnfs_osd_ioerr *ioerr)
++{
++	__be32 *p;
++	int ret;
++
++	ret = pnfs_osd_xdr_encode_objid(xdr, &ioerr->oer_component);
++	if (ret)
++		return ret;
++
++	p = xdr_reserve_space(xdr, 24);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
++	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
++	*p++ = cpu_to_be32(ioerr->oer_iswrite);
++	*p   = cpu_to_be32(ioerr->oer_errno);
++
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-31 20:41:19.162150222 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-31 20:42:05.539131687 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ 
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ static struct kmem_cache *nfs_page_cachep;
+ 
+@@ -56,7 +57,8 @@ nfs_page_free(struct nfs_page *p)
+ struct nfs_page *
+ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ 		   struct page *page,
+-		   unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page		*req;
+ 
+@@ -79,7 +81,11 @@ nfs_create_request(struct nfs_open_conte
+ 	req->wb_pgbase	= offset;
+ 	req->wb_bytes   = count;
+ 	req->wb_context = get_nfs_open_context(ctx);
++	req->wb_lock_context = nfs_get_lock_context(ctx);
+ 	kref_init(&req->wb_kref);
++	req->wb_lseg    = lseg;
++	if (lseg)
++		get_lseg(lseg);
+ 	return req;
+ }
+ 
+@@ -141,18 +147,26 @@ void nfs_clear_request(struct nfs_page *
+ {
+ 	struct page *page = req->wb_page;
+ 	struct nfs_open_context *ctx = req->wb_context;
++	struct nfs_lock_context *l_ctx = req->wb_lock_context;
+ 
+ 	if (page != NULL) {
+ 		page_cache_release(page);
+ 		req->wb_page = NULL;
+ 	}
++	if (l_ctx != NULL) {
++		nfs_put_lock_context(l_ctx);
++		req->wb_lock_context = NULL;
++	}
+ 	if (ctx != NULL) {
+ 		put_nfs_open_context(ctx);
+ 		req->wb_context = NULL;
+ 	}
++	if (req->wb_lseg != NULL) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
+ }
+ 
+-
+ /**
+  * nfs_release_request - Release the count on an NFS read/write request
+  * @req: request to release
+@@ -231,11 +245,12 @@ void nfs_pageio_init(struct nfs_pageio_d
+  * Return 'true' if this is the case, else return 'false'.
+  */
+ static int nfs_can_coalesce_requests(struct nfs_page *prev,
+-				     struct nfs_page *req)
++				     struct nfs_page *req,
++				     struct nfs_pageio_descriptor *pgio)
+ {
+ 	if (req->wb_context->cred != prev->wb_context->cred)
+ 		return 0;
+-	if (req->wb_context->lockowner != prev->wb_context->lockowner)
++	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
+ 		return 0;
+ 	if (req->wb_context->state != prev->wb_context->state)
+ 		return 0;
+@@ -245,6 +260,12 @@ static int nfs_can_coalesce_requests(str
+ 		return 0;
+ 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ 		return 0;
++	if (req->wb_lseg != prev->wb_lseg)
++		return 0;
++#ifdef CONFIG_NFS_V4_1
++	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
++		return 0;
++#endif /* CONFIG_NFS_V4_1 */
+ 	return 1;
+ }
+ 
+@@ -277,7 +298,7 @@ static int nfs_pageio_do_add_request(str
+ 		if (newlen > desc->pg_bsize)
+ 			return 0;
+ 		prev = nfs_list_entry(desc->pg_list.prev);
+-		if (!nfs_can_coalesce_requests(prev, req))
++		if (!nfs_can_coalesce_requests(prev, req, desc))
+ 			return 0;
+ 	} else
+ 		desc->pg_base = req->wb_pgbase;
+@@ -366,6 +387,7 @@ void nfs_pageio_cond_complete(struct nfs
+  * @idx_start: lower bound of page->index to scan
+  * @npages: idx_start + npages sets the upper bound to scan.
+  * @tag: tag to scan for
++ * @use_pnfs: will be set TRUE if commit needs to be handled by layout driver
+  *
+  * Moves elements from one of the inode request lists.
+  * If the number of requests is set to 0, the entire address_space
+@@ -375,7 +397,7 @@ void nfs_pageio_cond_complete(struct nfs
+  */
+ int nfs_scan_list(struct nfs_inode *nfsi,
+ 		struct list_head *dst, pgoff_t idx_start,
+-		unsigned int npages, int tag)
++		  unsigned int npages, int tag, int *use_pnfs)
+ {
+ 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ 	struct nfs_page *req;
+@@ -406,6 +428,8 @@ int nfs_scan_list(struct nfs_inode *nfsi
+ 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+ 						req->wb_index, tag);
+ 				nfs_list_add_request(req, dst);
++				if (req->wb_lseg)
++					*use_pnfs = 1;
+ 				res++;
+ 				if (res == INT_MAX)
+ 					goto out;
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-31 20:42:05.541150301 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-31 20:42:05.541150301 -0400
+@@ -0,0 +1,2037 @@
++/*
++ *  linux/fs/nfs/pnfs.c
++ *
++ *  pNFS functions to call and manage layout drivers.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/smp_lock.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_mount.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/rculist.h>
++
++#include "internal.h"
++#include "nfs4_fs.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS
++
++#define MIN_POOL_LC		(4)
++
++static int pnfs_initialized;
++
++static void pnfs_free_layout(struct pnfs_layout_hdr *lo,
++			     struct pnfs_layout_range *range);
++static inline void get_layout(struct pnfs_layout_hdr *lo);
++
++/* Locking:
++ *
++ * pnfs_spinlock:
++ * 	protects pnfs_modules_tbl.
++ */
++static spinlock_t pnfs_spinlock = __SPIN_LOCK_UNLOCKED(pnfs_spinlock);
++
++/*
++ * pnfs_modules_tbl holds all pnfs modules
++ */
++static struct list_head	pnfs_modules_tbl;
++static struct kmem_cache *pnfs_cachep;
++static mempool_t *pnfs_layoutcommit_mempool;
++
++static inline struct nfs4_layoutcommit_data *pnfs_layoutcommit_alloc(void)
++{
++	struct nfs4_layoutcommit_data *p =
++			mempool_alloc(pnfs_layoutcommit_mempool, GFP_NOFS);
++	if (p)
++		memset(p, 0, sizeof(*p));
++
++	return p;
++}
++
++void pnfs_layoutcommit_free(struct nfs4_layoutcommit_data *p)
++{
++	mempool_free(p, pnfs_layoutcommit_mempool);
++}
++
++/*
++ * struct pnfs_module - One per pNFS device module.
++ */
++struct pnfs_module {
++	struct pnfs_layoutdriver_type *pnfs_ld_type;
++	struct list_head        pnfs_tblid;
++};
++
++int
++pnfs_initialize(void)
++{
++	INIT_LIST_HEAD(&pnfs_modules_tbl);
++
++	pnfs_cachep = kmem_cache_create("nfs4_layoutcommit_data",
++					sizeof(struct nfs4_layoutcommit_data),
++					0, SLAB_HWCACHE_ALIGN, NULL);
++	if (pnfs_cachep == NULL)
++		return -ENOMEM;
++
++	pnfs_layoutcommit_mempool = mempool_create(MIN_POOL_LC,
++						   mempool_alloc_slab,
++						   mempool_free_slab,
++						   pnfs_cachep);
++	if (pnfs_layoutcommit_mempool == NULL) {
++		kmem_cache_destroy(pnfs_cachep);
++		return -ENOMEM;
++	}
++
++	pnfs_initialized = 1;
++	return 0;
++}
++
++void pnfs_uninitialize(void)
++{
++	mempool_destroy(pnfs_layoutcommit_mempool);
++	kmem_cache_destroy(pnfs_cachep);
++}
++
++/* search pnfs_modules_tbl for right pnfs module */
++static int
++find_pnfs(u32 id, struct pnfs_module **module) {
++	struct  pnfs_module *local = NULL;
++
++	dprintk("PNFS: %s: Searching for %u\n", __func__, id);
++	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) {
++		if (local->pnfs_ld_type->id == id) {
++			*module = local;
++			return(1);
++		}
++	}
++	return 0;
++}
++
++/* Set cred to indicate we require a layoutcommit
++ * If we don't even have a layout, we don't need to commit it.
++ */
++void
++pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
++{
++	dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (has_layout(nfsi) &&
++	    !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state)) {
++		nfsi->layout->cred = get_rpccred(ctx->state->owner->so_cred);
++		__set_bit(NFS_INO_LAYOUTCOMMIT,
++			  &nfsi->layout->state);
++		nfsi->change_attr++;
++		spin_unlock(&nfsi->vfs_inode.i_lock);
++		dprintk("%s: Set layoutcommit\n", __func__);
++		return;
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Update last_write_offset for layoutcommit.
++ * TODO: We should only use commited extents, but the current nfs
++ * implementation does not calculate the written range in nfs_commit_done.
++ * We therefore update this field in writeback_done.
++ */
++void
++pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
++{
++	loff_t end_pos;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (offset < nfsi->layout->write_begin_pos)
++		nfsi->layout->write_begin_pos = offset;
++	end_pos = offset + extent - 1; /* I'm being inclusive */
++	if (end_pos > nfsi->layout->write_end_pos)
++		nfsi->layout->write_end_pos = end_pos;
++	dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
++		__func__,
++		(unsigned long) extent,
++		(unsigned long) offset ,
++		(unsigned long) nfsi->layout->write_begin_pos,
++		(unsigned long) nfsi->layout->write_end_pos);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Unitialize a mountpoint in a layout driver */
++void
++unmount_pnfs_layoutdriver(struct nfs_server *nfss)
++{
++	if (PNFS_EXISTS_LDIO_OP(nfss, uninitialize_mountpoint))
++		nfss->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint(nfss);
++}
++
++/*
++ * Set the server pnfs module to the first registered pnfs_type.
++ * Only one pNFS layout driver is supported.
++ */
++void
++set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
++		      u32 id)
++{
++	struct pnfs_module *mod = NULL;
++
++	if (server->pnfs_curr_ld)
++		return;
++
++	if (!find_pnfs(id, &mod)) {
++		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
++		find_pnfs(id, &mod);
++	}
++
++	if (!mod) {
++		dprintk("%s: No pNFS module found for %u. ", __func__, id);
++		goto out_err;
++	}
++
++	server->pnfs_curr_ld = mod->pnfs_ld_type;
++	if (mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
++							server, mntfh)) {
++		printk(KERN_ERR "%s: Error initializing mount point "
++		       "for layout driver %u. ", __func__, id);
++		goto out_err;
++	}
++
++	dprintk("%s: pNFS module for %u set\n", __func__, id);
++	return;
++
++out_err:
++	dprintk("Using NFSv4 I/O\n");
++	server->pnfs_curr_ld = NULL;
++}
++
++/* Allow I/O module to set its functions structure */
++struct pnfs_client_operations*
++pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++	struct layoutdriver_io_operations *io_ops = ld_type->ld_io_ops;
++
++	if (!pnfs_initialized) {
++		printk(KERN_ERR "%s Registration failure. "
++		       "pNFS not initialized.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops || !io_ops->alloc_layout || !io_ops->free_layout) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_layout and free_layout.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->alloc_lseg || !io_ops->free_lseg) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_lseg and free_lseg.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->read_pagelist || !io_ops->write_pagelist ||
++	    !io_ops->commit) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "read_pagelist, write_pagelist, and commit.\n",
++		       __func__);
++		return NULL;
++	}
++
++	pnfs_mod = kmalloc(sizeof(struct pnfs_module), GFP_KERNEL);
++	if (pnfs_mod != NULL) {
++		dprintk("%s Registering id:%u name:%s\n",
++			__func__,
++			ld_type->id,
++			ld_type->name);
++		pnfs_mod->pnfs_ld_type = ld_type;
++		INIT_LIST_HEAD(&pnfs_mod->pnfs_tblid);
++
++		spin_lock(&pnfs_spinlock);
++		list_add(&pnfs_mod->pnfs_tblid, &pnfs_modules_tbl);
++		spin_unlock(&pnfs_spinlock);
++	}
++
++	return &pnfs_ops;
++}
++
++/*  Allow I/O module to set its functions structure */
++void
++pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++
++	if (find_pnfs(ld_type->id, &pnfs_mod)) {
++		dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
++		spin_lock(&pnfs_spinlock);
++		list_del(&pnfs_mod->pnfs_tblid);
++		spin_unlock(&pnfs_spinlock);
++		kfree(pnfs_mod);
++	}
++}
++
++/*
++ * pNFS client layout cache
++ */
++#if defined(CONFIG_SMP)
++#define BUG_ON_UNLOCKED_INO(ino) \
++	BUG_ON(!spin_is_locked(&ino->i_lock))
++#define BUG_ON_UNLOCKED_LO(lo) \
++	BUG_ON_UNLOCKED_INO(PNFS_INODE(lo))
++#else /* CONFIG_SMP */
++#define BUG_ON_UNLOCKED_INO(lo) do {} while (0)
++#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
++#endif /* CONFIG_SMP */
++
++static inline void
++get_layout(struct pnfs_layout_hdr *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	lo->refcount++;
++}
++
++static inline void
++put_layout_locked(struct pnfs_layout_hdr *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	BUG_ON(lo->refcount <= 0);
++
++	lo->refcount--;
++	if (!lo->refcount) {
++		struct layoutdriver_io_operations *io_ops = PNFS_LD_IO_OPS(lo);
++		struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++		dprintk("%s: freeing layout cache %p\n", __func__, lo);
++		WARN_ON(!list_empty(&lo->layouts));
++		io_ops->free_layout(lo);
++		nfsi->layout = NULL;
++	}
++}
++
++void
++put_layout(struct inode *inode)
++{
++	spin_lock(&inode->i_lock);
++	put_layout_locked(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++
++}
++
++void
++pnfs_layout_release(struct pnfs_layout_hdr *lo,
++		    struct pnfs_layout_range *range)
++{
++	struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (range)
++		pnfs_free_layout(lo, range);
++	/*
++	 * Matched in _pnfs_update_layout for layoutget
++	 * and by get_layout in _pnfs_return_layout for layoutreturn
++	 */
++	put_layout_locked(lo);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	wake_up_all(&nfsi->lo_waitq);
++}
++
++void
++pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layout_range range = {
++		.iomode = IOMODE_ANY,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	lo = nfsi->layout;
++	if (lo) {
++		pnfs_free_layout(lo, &range);
++		WARN_ON(!list_empty(&nfsi->layout->segs));
++		WARN_ON(!list_empty(&nfsi->layout->layouts));
++
++		if (nfsi->layout->refcount != 1)
++			printk(KERN_WARNING "%s: layout refcount not=1 %d\n",
++				__func__, nfsi->layout->refcount);
++		WARN_ON(nfsi->layout->refcount != 1);
++
++		/* Matched by refcount set to 1 in alloc_init_layout */
++		put_layout_locked(lo);
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/*
++ * Called by the state manger to remove all layouts established under an
++ * expired lease.
++ */
++void
++pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++	struct pnfs_layout_hdr *lo;
++
++	while (!list_empty(&clp->cl_layouts)) {
++		lo = list_entry(clp->cl_layouts.next, struct pnfs_layout_hdr,
++				layouts);
++		dprintk("%s freeing layout for inode %lu\n", __func__,
++			lo->inode->i_ino);
++		pnfs_destroy_layout(NFS_I(lo->inode));
++	}
++}
++
++static inline void
++init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
++{
++	INIT_LIST_HEAD(&lseg->fi_list);
++	kref_init(&lseg->kref);
++	lseg->valid = true;
++	lseg->layout = lo;
++}
++
++static void
++destroy_lseg(struct kref *kref)
++{
++	struct pnfs_layout_segment *lseg =
++		container_of(kref, struct pnfs_layout_segment, kref);
++
++	dprintk("--> %s\n", __func__);
++	/* Matched by get_layout in pnfs_insert_layout */
++	put_layout_locked(lseg->layout);
++	PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
++}
++
++static void
++put_lseg_locked(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	kref_put(&lseg->kref, destroy_lseg);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++
++void
++put_lseg(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	kref_put(&lseg->kref, destroy_lseg);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++EXPORT_SYMBOL(put_lseg);
++
++void get_lseg(struct pnfs_layout_segment *lseg)
++{
++	kref_get(&lseg->kref);
++}
++EXPORT_SYMBOL(get_lseg);
++
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end: NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1: NFS4_MAX_UINT64;
++}
++
++/*
++ * is l2 fully contained in l1?
++ *   start1                             end1
++ *   [----------------------------------)
++ *           start2           end2
++ *           [----------------)
++ */
++static inline int
++lo_seg_contained(struct pnfs_layout_range *l1,
++		 struct pnfs_layout_range *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (start1 <= start2) && (end1 >= end2);
++}
++
++/*
++ * is l1 and l2 intersecting?
++ *   start1                             end1
++ *   [----------------------------------)
++ *                              start2           end2
++ *                              [----------------)
++ */
++static inline int
++lo_seg_intersecting(struct pnfs_layout_range *l1,
++		    struct pnfs_layout_range *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
++	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
++}
++
++void
++pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
++			const nfs4_stateid *stateid)
++{
++	write_seqlock(&lo->seqlock);
++	memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
++	write_sequnlock(&lo->seqlock);
++}
++
++void
++pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	do {
++		seq = read_seqbegin(&lo->seqlock);
++		memcpy(dst->u.data, lo->stateid.u.data,
++		       sizeof(lo->stateid.u.data));
++	} while (read_seqretry(&lo->seqlock, seq));
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void
++pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
++			      struct nfs4_state *state)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	write_seqlock(&lo->seqlock);
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE))
++		do {
++			seq = read_seqbegin(&state->seqlock);
++			memcpy(lo->stateid.u.data, state->stateid.u.data,
++					sizeof(state->stateid.u.data));
++		} while (read_seqretry(&state->seqlock, seq));
++	write_sequnlock(&lo->seqlock);
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++* Get layout from server.
++*    for now, assume that whole file layouts are requested.
++*    arg->offset: 0
++*    arg->length: all ones
++*/
++static int
++send_layoutget(struct inode *ino,
++	   struct nfs_open_context *ctx,
++	   struct pnfs_layout_range *range,
++	   struct pnfs_layout_segment **lsegpp,
++	   struct pnfs_layout_hdr *lo)
++{
++	int status;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct nfs4_layoutget *lgp;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
++	if (lgp == NULL) {
++		pnfs_layout_release(lo, NULL);
++		return -ENOMEM;
++	}
++	lgp->args.minlength = NFS4_MAX_UINT64;
++	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
++	lgp->args.range.iomode = range->iomode;
++	lgp->args.range.offset = 0;
++	lgp->args.range.length = NFS4_MAX_UINT64;
++	lgp->args.type = server->pnfs_curr_ld->id;
++	lgp->args.inode = ino;
++	lgp->lsegpp = lsegpp;
++
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
++		struct nfs_open_context *oldctx = ctx;
++
++		if (!oldctx) {
++			ctx = nfs_find_open_context(ino, NULL,
++					(range->iomode == IOMODE_READ) ?
++					FMODE_READ: FMODE_WRITE);
++			BUG_ON(!ctx);
++		}
++		/* Set the layout stateid from the open stateid */
++		pnfs_layout_from_open_stateid(NFS_I(ino)->layout, ctx->state);
++		if (!oldctx)
++			put_nfs_open_context(ctx);
++	}
++
++	/* Retrieve layout information from server */
++	status = nfs4_proc_layoutget(lgp);
++
++	dprintk("<-- %s status %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	false
++ */
++static inline int
++should_free_lseg(struct pnfs_layout_segment *lseg,
++		   struct pnfs_layout_range *range)
++{
++	return (range->iomode == IOMODE_ANY ||
++		lseg->range.iomode == range->iomode) &&
++	       lo_seg_intersecting(&lseg->range, range);
++}
++
++static struct pnfs_layout_segment *
++has_layout_to_return(struct pnfs_layout_hdr *lo,
++		     struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *out = NULL, *lseg;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list)
++		if (should_free_lseg(lseg, range)) {
++			out = lseg;
++			break;
++		}
++
++	dprintk("%s:Return lseg=%p\n", __func__, out);
++	return out;
++}
++
++static inline bool
++_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
++{
++	return atomic_read(&lseg->kref.refcount) == 1;
++}
++
++
++static void
++pnfs_free_layout(struct pnfs_layout_hdr *lo,
++		 struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *lseg, *next;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
++		if (!should_free_lseg(lseg, range) ||
++		    !_pnfs_can_return_lseg(lseg))
++			continue;
++		dprintk("%s: freeing lseg %p iomode %d "
++			"offset %llu length %llu\n", __func__,
++			lseg, lseg->range.iomode, lseg->range.offset,
++			lseg->range.length);
++		list_del(&lseg->fi_list);
++		put_lseg_locked(lseg);
++	}
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp;
++
++		clp = PNFS_NFS_SERVER(lo)->nfs_client;
++		spin_lock(&clp->cl_lock);
++		list_del_init(&lo->layouts);
++		spin_unlock(&clp->cl_lock);
++		pnfs_set_layout_stateid(lo, &zero_stateid);
++	}
++
++	dprintk("%s:Return\n", __func__);
++}
++
++static bool
++pnfs_return_layout_barrier(struct nfs_inode *nfsi,
++			   struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *lseg;
++	bool ret = false;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
++		if (!should_free_lseg(lseg, range))
++			continue;
++		lseg->valid = false;
++		if (!_pnfs_can_return_lseg(lseg)) {
++			dprintk("%s: wait on lseg %p refcount %d\n",
++				__func__, lseg,
++				atomic_read(&lseg->kref.refcount));
++			ret = true;
++		}
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	dprintk("%s:Return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++return_layout(struct inode *ino, struct pnfs_layout_range *range,
++	      enum pnfs_layoutreturn_type type, struct pnfs_layout_hdr *lo,
++	      bool wait)
++{
++	struct nfs4_layoutreturn *lrp;
++	struct nfs_server *server = NFS_SERVER(ino);
++	int status = -ENOMEM;
++
++	dprintk("--> %s\n", __func__);
++
++	BUG_ON(type != RETURN_FILE);
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (lrp == NULL) {
++		if (lo && (type == RETURN_FILE))
++			pnfs_layout_release(lo, NULL);
++		goto out;
++	}
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = server->pnfs_curr_ld->id;
++	lrp->args.return_type = type;
++	lrp->args.range = *range;
++	lrp->args.inode = ino;
++
++	status = nfs4_proc_layoutreturn(lrp, wait);
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++}
++
++int
++_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
++		    const nfs4_stateid *stateid, /* optional */
++		    enum pnfs_layoutreturn_type type,
++		    bool wait)
++{
++	struct pnfs_layout_hdr *lo = NULL;
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_range arg;
++	int status = 0;
++
++	dprintk("--> %s type %d\n", __func__, type);
++
++
++	arg.iomode = range ? range->iomode : IOMODE_ANY;
++	arg.offset = 0;
++	arg.length = NFS4_MAX_UINT64;
++
++	if (type == RETURN_FILE) {
++		spin_lock(&ino->i_lock);
++		lo = nfsi->layout;
++		if (lo && !has_layout_to_return(lo, &arg)) {
++			lo = NULL;
++		}
++		if (!lo) {
++			spin_unlock(&ino->i_lock);
++			dprintk("%s: no layout segments to return\n", __func__);
++			goto out;
++		}
++
++		/* Reference for layoutreturn matched in pnfs_layout_release */
++		get_layout(lo);
++
++		spin_unlock(&ino->i_lock);
++
++		if (pnfs_return_layout_barrier(nfsi, &arg)) {
++			if (stateid) { /* callback */
++				status = -EAGAIN;
++				goto out_put;
++			}
++			dprintk("%s: waiting\n", __func__);
++			wait_event(nfsi->lo_waitq,
++				   !pnfs_return_layout_barrier(nfsi, &arg));
++		}
++
++		if (layoutcommit_needed(nfsi)) {
++			if (stateid && !wait) { /* callback */
++				dprintk("%s: layoutcommit pending\n", __func__);
++				status = -EAGAIN;
++				goto out_put;
++			}
++			status = pnfs_layoutcommit_inode(ino, wait);
++			if (status) {
++				/* Return layout even if layoutcommit fails */
++				dprintk("%s: layoutcommit failed, status=%d. "
++					"Returning layout anyway\n",
++					__func__, status);
++			}
++		}
++
++		if (!stateid)
++			status = return_layout(ino, &arg, type, lo, wait);
++		else
++			pnfs_layout_release(lo, &arg);
++	}
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++out_put:
++	put_layout(ino);
++	goto out;
++}
++
++/*
++ * cmp two layout segments for sorting into layout cache
++ */
++static inline s64
++cmp_layout(struct pnfs_layout_range *l1,
++	   struct pnfs_layout_range *l2)
++{
++	s64 d;
++
++	/* higher offset > lower offset */
++	d = l1->offset - l2->offset;
++	if (d)
++		return d;
++
++	/* longer length > shorter length */
++	d = l1->length - l2->length;
++	if (d)
++		return d;
++
++	/* read > read/write */
++	return (int)(l1->iomode == IOMODE_READ) -
++	(int)(l2->iomode == IOMODE_READ);
++}
++
++static void
++pnfs_insert_layout(struct pnfs_layout_hdr *lo,
++		   struct pnfs_layout_segment *lseg)
++{
++	struct pnfs_layout_segment *lp;
++	int found = 0;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp = PNFS_NFS_SERVER(lo)->nfs_client;
++
++		spin_lock(&clp->cl_lock);
++		BUG_ON(!list_empty(&lo->layouts));
++		list_add_tail(&lo->layouts, &clp->cl_layouts);
++		spin_unlock(&clp->cl_lock);
++	}
++	list_for_each_entry (lp, &lo->segs, fi_list) {
++		if (cmp_layout(&lp->range, &lseg->range) > 0)
++			continue;
++		list_add_tail(&lseg->fi_list, &lp->fi_list);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu before "
++			"lp %p iomode %d offset %llu length %llu\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length,
++			lp, lp->range.iomode, lp->range.offset,
++			lp->range.length);
++		found = 1;
++		break;
++	}
++	if (!found) {
++		list_add_tail(&lseg->fi_list, &lo->segs);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu at tail\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length);
++	}
++	get_layout(lo);
++
++	dprintk("%s:Return\n", __func__);
++}
++
++/*
++ * Each layoutdriver embeds pnfs_layout_hdr as the first field in it's
++ * per-layout type layout cache structure and returns it ZEROed
++ * from layoutdriver_io_ops->alloc_layout
++ */
++static struct pnfs_layout_hdr *
++alloc_init_layout(struct inode *ino)
++{
++	struct pnfs_layout_hdr *lo;
++	struct layoutdriver_io_operations *io_ops;
++
++	io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
++	lo = io_ops->alloc_layout(ino);
++	if (!lo) {
++		printk(KERN_ERR
++			"%s: out of memory: io_ops->alloc_layout failed\n",
++			__func__);
++		return NULL;
++	}
++	lo->refcount = 1;
++	INIT_LIST_HEAD(&lo->layouts);
++	INIT_LIST_HEAD(&lo->segs);
++	seqlock_init(&lo->seqlock);
++	lo->inode = ino;
++	return lo;
++}
++
++/*
++ * Retrieve and possibly allocate the inode layout
++ *
++ * ino->i_lock must be taken by the caller.
++ */
++static struct pnfs_layout_hdr *
++pnfs_alloc_layout(struct inode *ino)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_hdr *new = NULL;
++
++	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
++
++	BUG_ON_UNLOCKED_INO(ino);
++	if (likely(nfsi->layout))
++		return nfsi->layout;
++
++	spin_unlock(&ino->i_lock);
++	new = alloc_init_layout(ino);
++	spin_lock(&ino->i_lock);
++
++	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
++		nfsi->layout = new;
++	} else if (new) {
++		/* Reference the layout accross i_lock release and grab */
++		get_layout(nfsi->layout);
++		spin_unlock(&ino->i_lock);
++		NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
++		spin_lock(&ino->i_lock);
++		put_layout_locked(nfsi->layout);
++	}
++	return nfsi->layout;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	true
++ */
++static inline int
++has_matching_lseg(struct pnfs_layout_segment *lseg,
++		  struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_range range1;
++
++	if ((range->iomode == IOMODE_RW && lseg->range.iomode != IOMODE_RW) ||
++	    !lo_seg_intersecting(&lseg->range, range))
++		return 0;
++
++	/* range1 covers only the first byte in the range */
++	range1 = *range;
++	range1.length = 1;
++	return lo_seg_contained(&lseg->range, &range1);
++}
++
++/*
++ * lookup range in layout
++ */
++static struct pnfs_layout_segment *
++pnfs_has_layout(struct pnfs_layout_hdr *lo,
++		struct pnfs_layout_range *range,
++		bool take_ref,
++		bool only_valid)
++{
++	struct pnfs_layout_segment *lseg, *ret = NULL;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list) {
++		if (has_matching_lseg(lseg, range) &&
++		    (lseg->valid || !only_valid)) {
++			ret = lseg;
++			if (take_ref)
++				get_lseg(ret);
++			break;
++		}
++		if (cmp_layout(range, &lseg->range) > 0)
++			break;
++	}
++
++	dprintk("%s:Return lseg %p take_ref %d ref %d valid %d\n",
++		__func__, ret, take_ref,
++		ret ? atomic_read(&ret->kref.refcount) : 0,
++		ret ? ret->valid : 0);
++	return ret;
++}
++
++/* Update the file's layout for the given range and iomode.
++ * Layout is retreived from the server if needed.
++ * If lsegpp is given, the appropriate layout segment is referenced and
++ * returned to the caller.
++ */
++void
++_pnfs_update_layout(struct inode *ino,
++		   struct nfs_open_context *ctx,
++		   loff_t pos,
++		   u64 count,
++		   enum pnfs_iomode iomode,
++		   struct pnfs_layout_segment **lsegpp)
++{
++	struct pnfs_layout_range arg = {
++		.iomode = iomode,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layout_segment *lseg = NULL;
++	bool take_ref = (lsegpp != NULL);
++
++	if (take_ref)
++		*lsegpp = NULL;
++	spin_lock(&ino->i_lock);
++	lo = pnfs_alloc_layout(ino);
++	if (lo == NULL) {
++		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
++		goto out_unlock;
++	}
++
++	/* Check to see if the layout for the given range already exists */
++	lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
++	if (lseg && !lseg->valid) {
++		if (take_ref)
++			put_lseg_locked(lseg);
++		/* someone is cleaning the layout */
++		lseg = NULL;
++		goto out_unlock;
++	}
++
++	if (lseg) {
++		dprintk("%s: Using cached lseg %p for %llu@%llu iomode %d)\n",
++			__func__,
++			lseg,
++			arg.length,
++			arg.offset,
++			arg.iomode);
++
++		goto out_unlock;
++	}
++
++	/* if get layout already failed once goto out */
++	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) {
++		if (unlikely(nfsi->pnfs_layout_suspend &&
++		    get_seconds() >= nfsi->pnfs_layout_suspend)) {
++			dprintk("%s: layout_get resumed\n", __func__);
++			clear_bit(lo_fail_bit(iomode),
++				  &nfsi->layout->state);
++			nfsi->pnfs_layout_suspend = 0;
++		} else
++			goto out_unlock;
++	}
++
++	/* Reference the layout for layoutget matched in pnfs_layout_release */
++	get_layout(lo);
++	spin_unlock(&ino->i_lock);
++
++	send_layoutget(ino, ctx, &arg, lsegpp, lo);
++out:
++	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
++		nfsi->layout->state, lseg);
++	return;
++out_unlock:
++	if (lsegpp)
++		*lsegpp = lseg;
++	spin_unlock(&ino->i_lock);
++	goto out;
++}
++
++void
++pnfs_get_layout_done(struct nfs4_layoutget *lgp, int rpc_status)
++{
++	struct pnfs_layout_segment *lseg = NULL;
++	struct nfs_inode *nfsi = NFS_I(lgp->args.inode);
++	time_t suspend = 0;
++
++	dprintk("-->%s\n", __func__);
++
++	lgp->status = rpc_status;
++	if (likely(!rpc_status)) {
++		if (unlikely(lgp->res.layout.len < 0)) {
++			printk(KERN_ERR
++			       "%s: ERROR Returned layout size is ZERO\n", __func__);
++			lgp->status = -EIO;
++		}
++		goto out;
++	}
++
++	dprintk("%s: ERROR retrieving layout %d\n", __func__, rpc_status);
++	switch (rpc_status) {
++	case -NFS4ERR_BADLAYOUT:
++		lgp->status = -ENOENT;
++		/* FALLTHROUGH */
++	case -EACCES:	/* NFS4ERR_ACCESS */
++		/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
++		goto out;
++
++	case -NFS4ERR_LAYOUTTRYLATER:
++	case -NFS4ERR_RECALLCONFLICT:
++	case -NFS4ERR_OLD_STATEID:
++	case -EAGAIN:	/* NFS4ERR_LOCKED */
++		lgp->status = -NFS4ERR_DELAY;	/* for nfs4_handle_exception */
++		/* FALLTHROUGH */
++	case -NFS4ERR_GRACE:
++	case -NFS4ERR_DELAY:
++		goto out;
++
++	case -NFS4ERR_ADMIN_REVOKED:
++	case -NFS4ERR_DELEG_REVOKED:
++		/* The layout is expected to be returned at this point.
++		 * This should clear the layout stateid as well */
++		suspend = get_seconds() + 1;
++		break;
++
++	case -NFS4ERR_LAYOUTUNAVAILABLE:
++		lgp->status = -ENOTSUPP;
++		break;
++
++	case -NFS4ERR_REP_TOO_BIG:
++	case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
++		lgp->status = -E2BIG;
++		break;
++
++	/* Leave the following errors untranslated */
++	case -NFS4ERR_DEADSESSION:
++	case -NFS4ERR_DQUOT:
++	case -EINVAL:		/* NFS4ERR_INVAL */
++	case -EIO:		/* NFS4ERR_IO */
++	case -NFS4ERR_FHEXPIRED:
++	case -NFS4ERR_MOVED:
++	case -NFS4ERR_NOSPC:
++	case -ESERVERFAULT:	/* NFS4ERR_SERVERFAULT */
++	case -ESTALE:		/* NFS4ERR_STALE */
++	case -ETOOSMALL:	/* NFS4ERR_TOOSMALL */
++		break;
++
++	/* The following errors are our fault and should never happen */
++	case -NFS4ERR_BADIOMODE:
++	case -NFS4ERR_BADXDR:
++	case -NFS4ERR_REQ_TOO_BIG:
++	case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
++	case -NFS4ERR_WRONG_TYPE:
++		lgp->status = -EINVAL;
++		/* FALLTHROUGH */
++	case -NFS4ERR_BAD_STATEID:
++	case -NFS4ERR_NOFILEHANDLE:
++	case -ENOTSUPP:	/* NFS4ERR_NOTSUPP */
++	case -NFS4ERR_OPENMODE:
++	case -NFS4ERR_OP_NOT_IN_SESSION:
++	case -NFS4ERR_TOO_MANY_OPS:
++		dprintk("%s: error %d: should never happen\n", __func__,
++			rpc_status);
++		break;
++
++	/* The following errors are the server's fault */
++	default:
++		dprintk("%s: illegal error %d\n", __func__, rpc_status);
++		lgp->status = -EIO;
++		break;
++	}
++
++	/* remember that get layout failed and suspend trying */
++	nfsi->pnfs_layout_suspend = suspend;
++	set_bit(lo_fail_bit(lgp->args.range.iomode),
++		&nfsi->layout->state);
++	dprintk("%s: layout_get suspended until %ld\n",
++		__func__, suspend);
++out:
++	dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
++		__func__, lgp->status, nfsi->layout->state, lseg);
++	return;
++}
++
++int
++pnfs_layout_process(struct nfs4_layoutget *lgp)
++{
++	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
++	struct nfs4_layoutget_res *res = &lgp->res;
++	struct pnfs_layout_segment *lseg;
++	struct inode *ino = PNFS_INODE(lo);
++	int status = 0;
++
++	/* Inject layout blob into I/O device driver */
++	lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, res);
++	if (!lseg || IS_ERR(lseg)) {
++		if (!lseg)
++			status = -ENOMEM;
++		else
++			status = PTR_ERR(lseg);
++		dprintk("%s: Could not allocate layout: error %d\n",
++		       __func__, status);
++		goto out;
++	}
++
++	spin_lock(&ino->i_lock);
++	init_lseg(lo, lseg);
++	lseg->range = res->range;
++	if (lgp->lsegpp) {
++		get_lseg(lseg);
++		*lgp->lsegpp = lseg;
++	}
++	pnfs_insert_layout(lo, lseg);
++
++	if (res->return_on_close) {
++		lo->roc_iomode |= res->range.iomode;
++		if (!lo->roc_iomode)
++			lo->roc_iomode = IOMODE_ANY;
++	}
++
++	/* Done processing layoutget. Set the layout stateid */
++	pnfs_set_layout_stateid(lo, &res->stateid);
++	spin_unlock(&ino->i_lock);
++out:
++	return status;
++}
++
++void
++readahead_range(struct inode *inode, struct list_head *pages, loff_t *offset,
++		size_t *count)
++{
++	struct page *first, *last;
++	loff_t foff, i_size = i_size_read(inode);
++	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
++	size_t range;
++
++
++	first = list_entry((pages)->prev, struct page, lru);
++	last = list_entry((pages)->next, struct page, lru);
++
++	foff = (loff_t)first->index << PAGE_CACHE_SHIFT;
++
++	range = (last->index - first->index) * PAGE_CACHE_SIZE;
++	if (last->index == end_index)
++		range += ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
++	else
++		range += PAGE_CACHE_SIZE;
++	dprintk("%s foff %lu, range %Zu\n", __func__, (unsigned long)foff,
++		range);
++	*offset = foff;
++	*count = range;
++}
++
++void
++pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)
++{
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layoutdriver_type *ld;
++
++	pgio->pg_test = NULL;
++
++	lo = NFS_I(inode)->layout;
++	ld = NFS_SERVER(inode)->pnfs_curr_ld;
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !lo)
++		return;
++
++	if (ld->ld_policy_ops)
++		pgio->pg_test = ld->ld_policy_ops->pg_test;
++}
++
++static u32
++pnfs_getboundary(struct inode *inode)
++{
++	u32 stripe_size = 0;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct layoutdriver_policy_operations *policy_ops;
++
++	if (!nfss->pnfs_curr_ld)
++		goto out;
++
++	policy_ops = nfss->pnfs_curr_ld->ld_policy_ops;
++	if (!policy_ops || !policy_ops->get_stripesize)
++		goto out;
++
++	/* The default is to not gather across stripes */
++	if (pnfs_ld_gather_across_stripes(nfss->pnfs_curr_ld))
++		goto out;
++
++	spin_lock(&inode->i_lock);
++	if (NFS_I(inode)->layout)
++		stripe_size = policy_ops->get_stripesize(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++out:
++	return stripe_size;
++}
++
++/*
++ * rsize is already set by caller to MDS rsize.
++ */
++void
++pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
++		  struct inode *inode,
++		  struct nfs_open_context *ctx,
++		  struct list_head *pages,
++		  size_t *rsize)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	size_t count = 0;
++	loff_t loff;
++
++	pgio->pg_iswrite = 0;
++	pgio->pg_boundary = 0;
++	pgio->pg_test = NULL;
++	pgio->pg_lseg = NULL;
++
++	if (!pnfs_enabled_sb(nfss))
++		return;
++
++	/* Calculate the total read-ahead count */
++	readahead_range(inode, pages, &loff, &count);
++
++	if (count > 0) {
++		_pnfs_update_layout(inode, ctx, loff, count, IOMODE_READ,
++				    &pgio->pg_lseg);
++		if (!pgio->pg_lseg)
++			return;
++
++		*rsize = NFS_SERVER(inode)->ds_rsize;
++		pgio->pg_boundary = pnfs_getboundary(inode);
++		if (pgio->pg_boundary)
++			pnfs_set_pg_test(inode, pgio);
++	}
++}
++
++void
++pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
++		       size_t *wsize)
++{
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	pgio->pg_iswrite = 1;
++	if (!pnfs_enabled_sb(server)) {
++		pgio->pg_boundary = 0;
++		pgio->pg_test = NULL;
++		return;
++	}
++	pgio->pg_boundary = pnfs_getboundary(inode);
++	pnfs_set_pg_test(inode, pgio);
++	*wsize = server->ds_wsize;
++}
++
++/* Return I/O buffer size for a layout driver
++ * This value will determine what size reads and writes
++ * will be gathered into and sent to the data servers.
++ * blocksize must be a multiple of the page cache size.
++ */
++unsigned int
++pnfs_getiosize(struct nfs_server *server)
++{
++	if (!PNFS_EXISTS_LDPOLICY_OP(server, get_blocksize))
++		return 0;
++	return server->pnfs_curr_ld->ld_policy_ops->get_blocksize();
++}
++
++void
++pnfs_set_ds_iosize(struct nfs_server *server)
++{
++	unsigned dssize = pnfs_getiosize(server);
++
++	/* Set buffer size for data servers */
++	if (dssize > 0) {
++		server->ds_rsize = server->ds_wsize =
++			nfs_block_size(dssize, NULL);
++	} else {
++		server->ds_wsize = server->wsize;
++		server->ds_rsize = server->rsize;
++	}
++}
++
++static int
++pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
++{
++	put_lseg(pdata->lseg);
++	pdata->lseg = NULL;
++	pdata->call_ops->rpc_call_done(task, data);
++	if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
++		return -EAGAIN;
++	if (pdata->pnfsflags & PNFS_NO_RPC) {
++		pdata->call_ops->rpc_release(data);
++	} else {
++		/*
++		 * just restore original rpc call ops
++		 * rpc_release will be called later by the rpc scheduling layer.
++		 */
++		task->tk_ops = pdata->call_ops;
++	}
++	return 0;
++}
++
++/* Post-write completion function
++ * Invoked by all layout drivers when write_pagelist is done.
++ *
++ * NOTE: callers set data->pnfsflags PNFS_NO_RPC
++ * so that the NFS cleanup routines perform only the page cache
++ * cleanup.
++ */
++static void
++pnfs_write_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	struct pnfs_layout_range range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = wdata->args.offset;
++	range.length = wdata->args.count;
++	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
++			    wdata->pdata.call_ops, wdata->pdata.how);
++}
++
++static void
++pnfs_writeback_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	/* update last write offset and need layout commit
++	 * for non-files layout types (files layout calls
++	 * pnfs4_write_done for this)
++	 */
++	if ((pdata->pnfsflags & PNFS_NO_RPC) &&
++	    data->task.tk_status >= 0 && data->res.count > 0) {
++		struct nfs_inode *nfsi = NFS_I(data->inode);
++
++		pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++		pnfs_need_layoutcommit(nfsi, data->args.context);
++	}
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++static void _pnfs_clear_lseg_from_pages(struct list_head *head)
++{
++	struct nfs_page *req;
++
++	list_for_each_entry(req, head, wb_list) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem write function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ *
++ * TODO: Is wdata->how and wdata->args.stable always the same value?
++ * TODO: It seems in NFS, the server may not do a stable write even
++ * though it was requested (and vice-versa?).  To check, it looks
++ * in data->res.verf->committed.  Do we need this ability
++ * for non-file layout drivers?
++ */
++enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *wdata,
++			const struct rpc_call_ops *call_ops, int how)
++{
++	struct inode *inode = wdata->inode;
++	enum pnfs_try_status trypnfs;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = wdata->req->wb_lseg;
++
++	wdata->pdata.call_ops = call_ops;
++	wdata->pdata.pnfs_error = 0;
++	wdata->pdata.how = how;
++
++	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
++		inode->i_ino, wdata->args.count, wdata->args.offset, how);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		wdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	wdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->write_pagelist(wdata,
++		nfs_page_array_len(wdata->args.pgbase, wdata->args.count),
++								how);
++
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		wdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		wdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&wdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/* Post-read completion function.  Invoked by all layout drivers when
++ * read_pagelist is done
++ */
++static void
++pnfs_read_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	struct pnfs_layout_range range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = rdata->args.offset;
++	range.length = rdata->args.count;
++	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
++			   rdata->pdata.call_ops);
++}
++
++static void
++pnfs_read_done(struct nfs_read_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem read function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ */
++enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *rdata,
++		       const struct rpc_call_ops *call_ops)
++{
++	struct inode *inode = rdata->inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = rdata->req->wb_lseg;
++	enum pnfs_try_status trypnfs;
++
++	rdata->pdata.call_ops = call_ops;
++	rdata->pdata.pnfs_error = 0;
++
++	dprintk("%s: Reading ino:%lu %u@%llu\n",
++		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		rdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	rdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->read_pagelist(rdata,
++		nfs_page_array_len(rdata->args.pgbase, rdata->args.count));
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		rdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		rdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&rdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/*
++ * This gives the layout driver an opportunity to read in page "around"
++ * the data to be written.  It returns 0 on success, otherwise an error code
++ * which will either be passed up to user, or ignored if
++ * some previous part of write succeeded.
++ * Note the range [pos, pos+len-1] is entirely within the page.
++ */
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata)
++{
++	struct pnfs_fsdata *data;
++	int status = 0;
++
++	dprintk("--> %s: pos=%llu len=%u\n",
++		__func__, (unsigned long long)pos, len);
++	data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
++	if (!data) {
++		status = -ENOMEM;
++		goto out;
++	}
++	data->lseg = lseg; /* refcount passed into data to be managed there */
++	status = NFS_SERVER(inode)->pnfs_curr_ld->ld_io_ops->write_begin(
++						lseg, page, pos, len, data);
++	if (status) {
++		kfree(data);
++		data = NULL;
++	}
++out:
++	*fsdata = data;
++	dprintk("<-- %s: status=%d\n", __func__, status);
++	return status;
++}
++
++/* Return 0 on succes, negative on failure */
++/* CAREFUL - what happens if copied < len??? */
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status;
++
++	status = nfss->pnfs_curr_ld->ld_io_ops->write_end(inode, page,
++						pos, len, copied, lseg);
++	return status;
++}
++
++/* pNFS Commit callback function for all layout drivers */
++static void
++pnfs_commit_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		struct pnfs_layout_range range = {
++			.iomode = IOMODE_RW,
++			.offset = data->args.offset,
++			.length = data->args.count,
++		};
++		dprintk("%s: retrying\n", __func__);
++		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
++				    true);
++		pnfs_initiate_commit(data, NFS_CLIENT(data->inode),
++				     pdata->call_ops, pdata->how, 1);
++	}
++}
++
++enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		    const struct rpc_call_ops *call_ops, int sync)
++{
++	struct inode *inode = data->inode;
++	struct nfs_server *nfss = NFS_SERVER(data->inode);
++	enum pnfs_try_status trypnfs;
++
++	dprintk("%s: Begin\n", __func__);
++
++	/* We need to account for possibility that
++	 * each nfs_page can point to a different lseg (or be NULL).
++	 * For the immediate case of whole-file-only layouts, we at
++	 * least know there can be only a single lseg.
++	 * We still have to account for the possibility of some being NULL.
++	 * This will be done by passing the buck to the layout driver.
++	 */
++	data->pdata.call_ops = call_ops;
++	data->pdata.pnfs_error = 0;
++	data->pdata.how = sync;
++	data->pdata.lseg = NULL;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(data, sync);
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		data->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		_pnfs_clear_lseg_from_pages(&data->pages);
++	} else
++		nfs_inc_stats(inode, NFSIOS_PNFS_COMMIT);
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
++{
++	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
++
++	/* TODO: Maybe we should avoid this by allowing the layout driver
++	* to directly xdr its layout on the wire.
++	*/
++	if (nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit)
++		nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit(
++					NFS_I(data->args.inode)->layout,
++					&data->args, data->status);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int
++pnfs_layoutcommit_setup(struct inode *inode,
++			struct nfs4_layoutcommit_data *data,
++			loff_t write_begin_pos, loff_t write_end_pos)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int result = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	data->args.inode = inode;
++	data->args.fh = NFS_FH(inode);
++	data->args.layout_type = nfss->pnfs_curr_ld->id;
++	data->res.fattr = &data->fattr;
++	nfs_fattr_init(&data->fattr);
++
++	/* TODO: Need to determine the correct values */
++	data->args.time_modify_changed = 0;
++
++	/* Set values from inode so it can be reset
++	 */
++	data->args.range.iomode = IOMODE_RW;
++	data->args.range.offset = write_begin_pos;
++	data->args.range.length = write_end_pos - write_begin_pos + 1;
++	data->args.lastbytewritten =  min(write_end_pos,
++					  i_size_read(inode) - 1);
++	data->args.bitmask = nfss->attr_bitmask;
++	data->res.server = nfss;
++
++	/* Call layout driver to set the arguments */
++	if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit)
++		result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
++				NFS_I(inode)->layout, &data->args);
++
++	dprintk("<-- %s Status %d\n", __func__, result);
++	return result;
++}
++
++/* Issue a async layoutcommit for an inode.
++ */
++int
++pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	struct nfs4_layoutcommit_data *data;
++	struct nfs_inode *nfsi = NFS_I(inode);
++	loff_t write_begin_pos;
++	loff_t write_end_pos;
++
++	int status = 0;
++
++	dprintk("%s Begin (sync:%d)\n", __func__, sync);
++
++	BUG_ON(!has_layout(nfsi));
++
++	data = pnfs_layoutcommit_alloc();
++	if (!data)
++		return -ENOMEM;
++
++	spin_lock(&inode->i_lock);
++	if (!layoutcommit_needed(nfsi)) {
++		spin_unlock(&inode->i_lock);
++		goto out_free;
++	}
++
++	/* Clear layoutcommit properties in the inode so
++	 * new lc info can be generated
++	 */
++	write_begin_pos = nfsi->layout->write_begin_pos;
++	write_end_pos = nfsi->layout->write_end_pos;
++	data->cred = nfsi->layout->cred;
++	nfsi->layout->write_begin_pos = 0;
++	nfsi->layout->write_end_pos = 0;
++	nfsi->layout->cred = NULL;
++	__clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state);
++	pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout);
++
++	/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
++	get_layout(NFS_I(inode)->layout);
++
++	spin_unlock(&inode->i_lock);
++
++	/* Set up layout commit args */
++	status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
++					 write_end_pos);
++	if (status) {
++		/* The layout driver failed to setup the layoutcommit */
++		put_rpccred(data->cred);
++		put_layout(inode);
++		goto out_free;
++	}
++	status = nfs4_proc_layoutcommit(data, sync);
++out:
++	dprintk("%s end (err:%d)\n", __func__, status);
++	return status;
++out_free:
++	pnfs_layoutcommit_free(data);
++	goto out;
++}
++
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
++{
++	if (fsdata) {
++		/* lseg refcounting handled directly in nfs_Write_end */
++		kfree(fsdata);
++	}
++}
++
++/* Callback operations for layout drivers.
++ */
++struct pnfs_client_operations pnfs_ops = {
++	.nfs_getdevicelist = nfs4_proc_getdevicelist,
++	.nfs_getdeviceinfo = nfs4_proc_getdeviceinfo,
++	.nfs_readlist_complete = pnfs_read_done,
++	.nfs_writelist_complete = pnfs_writeback_done,
++	.nfs_commit_complete = pnfs_commit_done,
++};
++
++EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
++EXPORT_SYMBOL(pnfs_register_layoutdriver);
++
++
++/* Device ID cache. Supports one layout type per struct nfs_client */
++int
++nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
++			 void (*free_callback)(struct kref *))
++{
++	struct nfs4_deviceid_cache *c;
++
++	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
++	if (!c)
++		return -ENOMEM;
++	spin_lock(&clp->cl_lock);
++	if (clp->cl_devid_cache != NULL) {
++		kref_get(&clp->cl_devid_cache->dc_kref);
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [kref [%d]]\n", __func__,
++			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
++		kfree(c);
++	} else {
++		int i;
++
++		spin_lock_init(&c->dc_lock);
++		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
++			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
++		kref_init(&c->dc_kref);
++		c->dc_free_callback = free_callback;
++		clp->cl_devid_cache = c;
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [new]\n", __func__);
++	}
++	return 0;
++}
++EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
++
++void
++nfs4_init_deviceid_node(struct nfs4_deviceid *d)
++{
++	INIT_HLIST_NODE(&d->de_node);
++	kref_init(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_init_deviceid_node);
++
++/* Called from layoutdriver_io_operations->alloc_lseg */
++void
++nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = d;
++}
++EXPORT_SYMBOL(nfs4_set_layout_deviceid);
++
++/* Called from layoutdriver_io_operations->free_lseg */
++void
++nfs4_put_unset_layout_deviceid(struct pnfs_layout_segment *l,
++			   struct nfs4_deviceid *d,
++			   void (*free_callback)(struct kref *))
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = NULL;
++	kref_put(&d->de_kref, free_callback);
++}
++EXPORT_SYMBOL(nfs4_put_unset_layout_deviceid);
++
++/* Find and reference a deviceid */
++struct nfs4_deviceid *
++nfs4_find_get_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			if (!atomic_inc_not_zero(&d->de_kref.refcount)) {
++				goto fail;
++			} else {
++				rcu_read_unlock();
++				return d;
++			}
++		}
++	}
++fail:
++	rcu_read_unlock();
++	return NULL;
++}
++EXPORT_SYMBOL(nfs4_find_get_deviceid);
++
++/*
++ * Add and kref_get a deviceid.
++ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
++ */
++struct nfs4_deviceid *
++nfs4_add_get_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(&new->de_id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			kref_get(&d->de_kref);
++			spin_unlock(&c->dc_lock);
++			dprintk("%s [discard]\n", __func__);
++			c->dc_free_callback(&new->de_kref);
++			return d;
++		}
++	}
++	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
++	kref_get(&new->de_kref);
++	spin_unlock(&c->dc_lock);
++	dprintk("%s [new]\n", __func__);
++	return new;
++}
++EXPORT_SYMBOL(nfs4_add_get_deviceid);
++
++/*
++ * Remove the first deviceid from a hash bucket, or return 0 if bucket list
++ * is empty.
++ */
++static int
++nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash,
++		     struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (id && memcmp(id, &d->de_id, NFS4_PNFS_DEVICEID4_SIZE))
++			continue;
++		hlist_del_rcu(&d->de_node);
++		spin_unlock(&c->dc_lock);
++		synchronize_rcu();
++		dprintk("%s [%d]\n", __func__,
++			atomic_read(&d->de_kref.refcount));
++		kref_put(&d->de_kref, c->dc_free_callback);
++		return 1;
++	}
++	spin_unlock(&c->dc_lock);
++	return 0;
++}
++
++void
++nfs4_delete_device(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	long hash = nfs4_deviceid_hash(id);
++
++	nfs4_remove_deviceid(c, hash, id);
++}
++EXPORT_SYMBOL(nfs4_delete_device);
++
++static void
++nfs4_free_deviceid_cache(struct kref *kref)
++{
++	struct nfs4_deviceid_cache *cache =
++		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
++	long i;
++
++	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
++		while (nfs4_remove_deviceid(cache, i, NULL))
++			;
++	kfree(cache);
++}
++
++void
++nfs4_put_deviceid_cache(struct nfs_client *clp)
++{
++	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
++	int refcount;
++
++	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
++	spin_lock(&clp->cl_lock);
++	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
++	if (refcount == 1)
++		clp->cl_devid_cache = NULL;
++	spin_unlock(&clp->cl_lock);
++	dprintk("%s [%d]\n", __func__, refcount);
++	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
++}
++EXPORT_SYMBOL(nfs4_put_deviceid_cache);
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-31 20:42:05.542222767 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-31 20:42:05.542222767 -0400
+@@ -0,0 +1,354 @@
++/*
++ *  fs/nfs/pnfs.h
++ *
++ *  pNFS client data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_PNFS_H
++#define FS_NFS_PNFS_H
++
++#include <linux/nfs4_pnfs.h>
++
++#ifdef CONFIG_NFS_V4_1
++
++#include <linux/nfs_page.h>
++#include <linux/nfs_iostat.h>
++#include "iostat.h"
++
++/* nfs4proc.c */
++extern int nfs4_proc_getdevicelist(struct nfs_server *server,
++				   const struct nfs_fh *fh,
++				   struct pnfs_devicelist *devlist);
++extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
++				   struct pnfs_device *dev);
++extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
++extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
++				   int issync);
++extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
++
++/* pnfs.c */
++extern const nfs4_stateid zero_stateid;
++
++void _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp);
++
++int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *,
++			const nfs4_stateid *stateid, /* optional */
++			enum pnfs_layoutreturn_type, bool wait);
++void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *mntfh, u32 id);
++void unmount_pnfs_layoutdriver(struct nfs_server *);
++enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
++					     const struct rpc_call_ops *, int);
++enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
++					    const struct rpc_call_ops *);
++int pnfs_initialize(void);
++void pnfs_uninitialize(void);
++void pnfs_layoutcommit_free(struct nfs4_layoutcommit_data *data);
++void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
++int pnfs_layoutcommit_inode(struct inode *inode, int sync);
++void pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent);
++void pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx);
++unsigned int pnfs_getiosize(struct nfs_server *server);
++void pnfs_set_ds_iosize(struct nfs_server *server);
++enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
++					 const struct rpc_call_ops *, int);
++void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
++			   struct nfs_open_context *, struct list_head *,
++			   size_t *);
++void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
++			    size_t *);
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
++void pnfs_get_layout_done(struct nfs4_layoutget *, int rpc_status);
++int pnfs_layout_process(struct nfs4_layoutget *lgp);
++void pnfs_layout_release(struct pnfs_layout_hdr *, struct pnfs_layout_range *range);
++void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
++			     const nfs4_stateid *stateid);
++void pnfs_destroy_layout(struct nfs_inode *);
++void pnfs_destroy_all_layouts(struct nfs_client *);
++void put_layout(struct inode *inode);
++void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata);
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg);
++
++#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops->opname)
++#define PNFS_EXISTS_LDPOLICY_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_policy_ops && \
++				     (srv)->pnfs_curr_ld->ld_policy_ops->opname)
++
++#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
++
++static inline int lo_fail_bit(u32 iomode)
++{
++	return iomode == IOMODE_RW ?
++			 NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED;
++}
++
++/* Return true if a layout driver is being used for this mountpoint */
++static inline int pnfs_enabled_sb(struct nfs_server *nfss)
++{
++	return nfss->pnfs_curr_ld != NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return !fsdata  || ((struct pnfs_layout_segment *)fsdata == lseg) ||
++		!fsdata->bypass_eof;
++}
++
++/* Should the pNFS client commit and return the layout upon a setattr */
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
++		return false;
++	return NFS_SERVER(inode)->pnfs_curr_ld->ld_policy_ops->flags &
++		PNFS_LAYOUTRET_ON_SETATTR;
++}
++
++/* Should the pNFS client commit and return the layout on close
++ */
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return nfsi->layout->roc_iomode;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status = 0;
++
++	*fsdata = lseg;
++	if (lseg && PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++		status = _pnfs_write_begin(inode, page, pos, len, lseg,
++					   (struct pnfs_fsdata **) fsdata);
++	return status;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++
++	if (PNFS_EXISTS_LDIO_OP(nfss, write_end))
++		return _pnfs_write_end(inode, page, pos, len, copied, lseg);
++	else
++		return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_end_cleanup))
++			nfss->pnfs_curr_ld->ld_io_ops->write_end_cleanup(filp, fsdata);
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			pnfs_free_fsdata(fsdata);
++	}
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct pnfs_layout_range *range,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss) &&
++	    (type != RETURN_FILE || has_layout(nfsi)))
++		return _pnfs_return_layout(ino, range, stateid, type, wait);
++
++	return 0;
++}
++
++static inline void pnfs_update_layout(struct inode *ino,
++	struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss))
++		_pnfs_update_layout(ino, ctx, pos, count, access_type, lsegpp);
++	else {
++		if (lsegpp)
++			*lsegpp = NULL;
++	}
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	if (pnfs_enabled_sb(nfss))
++		return pnfs_ld_use_rpc_code(nfss->pnfs_curr_ld);
++
++	return 1;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			return ((struct pnfs_fsdata *) fsdata)->lseg;
++	}
++	return fsdata;
++}
++#else  /* CONFIG_NFS_V4_1 */
++
++static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++}
++
++static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++}
++
++static inline void get_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void put_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void
++pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	if (lsegpp)
++		*lsegpp = NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return 1;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *data,
++		      const struct rpc_call_ops *call_ops)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *data,
++		       const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		   const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	*fsdata = NULL;
++	return 0;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	return 1;
++}
++
++static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	return 0;
++}
++
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	return false;
++}
++
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct pnfs_layout_range *range,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	return 0;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	return NULL;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++#endif /* FS_NFS_PNFS_H */
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-31 20:41:19.163155499 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-31 20:42:05.543103394 -0400
+@@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	fattr = nfs_alloc_fattr();
+ 	status = -ENOMEM;
+ 	if (fh == NULL || fattr == NULL)
+-		goto out;
++		goto out_free;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -455,7 +455,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 */
+ 	if (status == 0)
+ 		status = nfs_instantiate(dentry, fh, fattr);
+-
++out_free:
+ 	nfs_free_fattr(fattr);
+ 	nfs_free_fhandle(fh);
+ out:
+@@ -694,6 +694,7 @@ const struct nfs_rpc_ops nfs_v2_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs_dir_inode_operations,
+ 	.file_inode_ops	= &nfs_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs_proc_get_root,
+ 	.getattr	= nfs_proc_getattr,
+ 	.setattr	= nfs_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-31 20:41:19.164160482 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-31 20:42:05.544233042 -0400
+@@ -18,8 +18,12 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_page.h>
++#include <linux/smp_lock.h>
++#include <linux/module.h>
+ 
+ #include <asm/system.h>
++#include <linux/module.h>
++#include "pnfs.h"
+ 
+ #include "nfs4_fs.h"
+ #include "internal.h"
+@@ -117,11 +121,14 @@ int nfs_readpage_async(struct nfs_open_c
+ 	LIST_HEAD(one_request);
+ 	struct nfs_page	*new;
+ 	unsigned int len;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	len = nfs_page_length(page);
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+-	new = nfs_create_request(ctx, inode, page, 0, len);
++	pnfs_update_layout(inode, ctx, 0, NFS4_MAX_UINT64, IOMODE_READ, &lseg);
++	new = nfs_create_request(ctx, inode, page, 0, len, lseg);
++	put_lseg(lseg);
+ 	if (IS_ERR(new)) {
+ 		unlock_page(page);
+ 		return PTR_ERR(new);
+@@ -155,24 +162,20 @@ static void nfs_readpage_release(struct 
+ 	nfs_release_request(req);
+ }
+ 
+-/*
+- * Set up the NFS read request struct
+- */
+-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset)
++int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		      const struct rpc_call_ops *call_ops)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+ 		.callback_data = data,
+@@ -180,9 +183,46 @@ static int nfs_read_rpcsetup(struct nfs_
+ 		.flags = RPC_TASK_ASYNC | swap_flags,
+ 	};
+ 
++	/* Set up the initial task struct. */
++	NFS_PROTO(inode)->read_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
++			data->task.tk_pid,
++			inode->i_sb->s_id,
++			(long long)NFS_FILEID(inode),
++			data->args.count,
++			(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_read);
++
++int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
++		return pnfs_get_read_status(data);
++
++	return nfs_initiate_read(data, clnt, call_ops);
++}
++
++/*
++ * Set up the NFS read request struct
++ */
++static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	data->req	  = req;
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -190,27 +230,14 @@ static int nfs_read_rpcsetup(struct nfs_
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.count   = count;
+ 	data->res.eof     = 0;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct. */
+-	NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-			data->task.tk_pid,
+-			inode->i_sb->s_id,
+-			(long long)NFS_FILEID(inode),
+-			count,
+-			(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	return pnfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ }
+ 
+ static void
+@@ -354,7 +381,14 @@ static void nfs_readpage_retry(struct rp
+ {
+ 	struct nfs_readargs *argp = &data->args;
+ 	struct nfs_readres *resp = &data->res;
++	struct nfs_client *clp = NFS_SERVER(data->inode)->nfs_client;
+ 
++#ifdef CONFIG_NFS_V4_1
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (resp->eof || resp->count == argp->count)
+ 		return;
+ 
+@@ -368,7 +402,10 @@ static void nfs_readpage_retry(struct rp
+ 	argp->offset += resp->count;
+ 	argp->pgbase += resp->count;
+ 	argp->count -= resp->count;
+-	nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++	data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++	nfs_restart_rpc(task, clp);
+ }
+ 
+ /*
+@@ -409,13 +446,19 @@ static void nfs_readpage_release_partial
+ void nfs_read_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_read_data *data = calldata;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	}
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
+ 				&data->args.seq_args, &data->res.seq_res,
+ 				0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_read_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_read_partial_ops = {
+@@ -568,7 +611,8 @@ readpage_async_filler(void *data, struct
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+ 
+-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
++	new = nfs_create_request(desc->ctx, inode, page, 0, len,
++				 desc->pgio->pg_lseg);
+ 	if (IS_ERR(new))
+ 		goto out_error;
+ 
+@@ -624,6 +668,9 @@ int nfs_readpages(struct file *filp, str
+ 	if (ret == 0)
+ 		goto read_complete; /* all pages were read */
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_read(&pgio, inode, desc.ctx, pages, &rsize);
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (rsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
+ 	else
+@@ -632,6 +679,7 @@ int nfs_readpages(struct file *filp, str
+ 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ 
+ 	nfs_pageio_complete(&pgio);
++	put_lseg(pgio.pg_lseg);
+ 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+ read_complete:
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-31 20:41:19.165170508 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-31 20:42:05.545114737 -0400
+@@ -64,6 +64,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -676,6 +677,28 @@ static int nfs_show_options(struct seq_f
+ 
+ 	return 0;
+ }
++#ifdef CONFIG_NFS_V4_1
++void show_sessions(struct seq_file *m, struct nfs_server *server)
++{
++	if (nfs4_has_session(server->nfs_client))
++		seq_printf(m, ",sessions");
++}
++#else
++void show_sessions(struct seq_file *m, struct nfs_server *server) {}
++#endif
++
++#ifdef CONFIG_NFS_V4_1
++void show_pnfs(struct seq_file *m, struct nfs_server *server)
++{
++	seq_printf(m, ",pnfs=");
++	if (server->pnfs_curr_ld)
++		seq_printf(m, "%s", server->pnfs_curr_ld->name);
++	else
++		seq_printf(m, "not configured");
++}
++#else  /* CONFIG_NFS_V4_1 */
++void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ /*
+  * Present statistical information for this VFS mountpoint
+@@ -714,6 +737,8 @@ static int nfs_show_stats(struct seq_fil
+ 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ 		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
++		show_sessions(m, nfss);
++		show_pnfs(m, nfss);
+ 	}
+ #endif
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-31 20:41:19.166151095 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-31 20:42:05.546131839 -0400
+@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
+ 	struct nfs_unlinkdata *data = calldata;
+ 	struct nfs_server *server = NFS_SERVER(data->dir);
+ 
+-	if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
++	if (nfs4_setup_sequence(server, NULL, &data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-31 20:41:17.273213379 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-31 20:42:05.548212682 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ #include <linux/nfs_page.h>
+ #include <linux/backing-dev.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -28,6 +29,7 @@
+ #include "iostat.h"
+ #include "nfs4_fs.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+ 
+@@ -59,6 +61,7 @@ struct nfs_write_data *nfs_commitdata_al
+ 	}
+ 	return p;
+ }
++EXPORT_SYMBOL(nfs_commitdata_alloc);
+ 
+ void nfs_commit_free(struct nfs_write_data *p)
+ {
+@@ -66,6 +69,7 @@ void nfs_commit_free(struct nfs_write_da
+ 		kfree(p->pagevec);
+ 	mempool_free(p, nfs_commit_mempool);
+ }
++EXPORT_SYMBOL(nfs_commit_free);
+ 
+ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+ {
+@@ -418,6 +422,17 @@ static void nfs_inode_remove_request(str
+ 	nfs_clear_request(req);
+ 	nfs_release_request(req);
+ }
++static void
++nfs_mark_request_nopnfs(struct nfs_page *req)
++{
++	struct pnfs_layout_segment *lseg = req->wb_lseg;
++
++	if (req->wb_lseg == NULL)
++		return;
++	req->wb_lseg = NULL;
++	put_lseg(lseg);
++	dprintk(" retry through MDS\n");
++}
+ 
+ static void
+ nfs_mark_request_dirty(struct nfs_page *req)
+@@ -523,7 +538,7 @@ nfs_need_commit(struct nfs_inode *nfsi)
+  * The requests are *not* checked to ensure that they form a contiguous set.
+  */
+ static int
+-nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
++nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int *use_pnfs)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	int ret;
+@@ -531,7 +546,8 @@ nfs_scan_commit(struct inode *inode, str
+ 	if (!nfs_need_commit(nfsi))
+ 		return 0;
+ 
+-	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
++	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT,
++			    use_pnfs);
+ 	if (ret > 0)
+ 		nfsi->ncommit -= ret;
+ 	if (nfs_need_commit(NFS_I(inode)))
+@@ -560,7 +576,8 @@ static inline int nfs_scan_commit(struct
+ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+ 		struct page *page,
+ 		unsigned int offset,
+-		unsigned int bytes)
++		unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page *req;
+ 	unsigned int rqend;
+@@ -585,8 +602,8 @@ static struct nfs_page *nfs_try_to_updat
+ 		 * Note: nfs_flush_incompatible() will already
+ 		 * have flushed out requests having wrong owners.
+ 		 */
+-		if (offset > rqend
+-		    || end < req->wb_offset)
++		if (offset > rqend || end < req->wb_offset ||
++		    req->wb_lseg != lseg)
+ 			goto out_flushme;
+ 
+ 		if (nfs_set_page_tag_locked(req))
+@@ -634,16 +651,17 @@ out_err:
+  * already called nfs_flush_incompatible() if necessary.
+  */
+ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+-		struct page *page, unsigned int offset, unsigned int bytes)
++		struct page *page, unsigned int offset, unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	struct nfs_page	*req;
+ 	int error;
+ 
+-	req = nfs_try_to_update_request(inode, page, offset, bytes);
++	req = nfs_try_to_update_request(inode, page, offset, bytes, lseg);
+ 	if (req != NULL)
+ 		goto out;
+-	req = nfs_create_request(ctx, inode, page, offset, bytes);
++	req = nfs_create_request(ctx, inode, page, offset, bytes, lseg);
+ 	if (IS_ERR(req))
+ 		goto out;
+ 	error = nfs_inode_add_request(inode, req);
+@@ -656,23 +674,27 @@ out:
+ }
+ 
+ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+-		unsigned int offset, unsigned int count)
++			       unsigned int offset, unsigned int count,
++			       struct pnfs_layout_segment *lseg,
++			       void *fsdata)
+ {
+ 	struct nfs_page	*req;
+ 
+-	req = nfs_setup_write_request(ctx, page, offset, count);
++	req = nfs_setup_write_request(ctx, page, offset, count, lseg);
+ 	if (IS_ERR(req))
+ 		return PTR_ERR(req);
+ 	nfs_mark_request_dirty(req);
+ 	/* Update file length */
+-	nfs_grow_file(page, offset, count);
++	if (pnfs_grow_ok(lseg, fsdata))
++		nfs_grow_file(page, offset, count);
+ 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	return 0;
+ }
+ 
+-int nfs_flush_incompatible(struct file *file, struct page *page)
++int nfs_flush_incompatible(struct file *file, struct page *page,
++			   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct nfs_page	*req;
+@@ -689,7 +711,10 @@ int nfs_flush_incompatible(struct file *
+ 		req = nfs_page_find_request(page);
+ 		if (req == NULL)
+ 			return 0;
+-		do_flush = req->wb_page != page || req->wb_context != ctx;
++		do_flush = req->wb_page != page || req->wb_context != ctx ||
++			req->wb_lock_context->lockowner != current->files ||
++			req->wb_lock_context->pid != current->tgid ||
++			req->wb_lseg != lseg;
+ 		nfs_release_request(req);
+ 		if (!do_flush)
+ 			return 0;
+@@ -716,7 +741,8 @@ static int nfs_write_pageuptodate(struct
+  * things with a page scheduled for an RPC call (e.g. invalidate it).
+  */
+ int nfs_updatepage(struct file *file, struct page *page,
+-		unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg, void *fsdata)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct inode	*inode = page->mapping->host;
+@@ -741,7 +767,7 @@ int nfs_updatepage(struct file *file, st
+ 		offset = 0;
+ 	}
+ 
+-	status = nfs_writepage_setup(ctx, page, offset, count);
++	status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata);
+ 	if (status < 0)
+ 		nfs_set_pageerror(page);
+ 
+@@ -771,25 +797,21 @@ static int flush_task_priority(int how)
+ 	return RPC_PRIORITY_NORMAL;
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_write_rpcsetup(struct nfs_page *req,
+-		struct nfs_write_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset,
+-		int how)
++int nfs_initiate_write(struct nfs_write_data *data,
++		       struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops,
++		       int how)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.task = &data->task,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+@@ -800,12 +822,62 @@ static int nfs_write_rpcsetup(struct nfs
+ 	};
+ 	int ret = 0;
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->write_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	if (how & FLUSH_SYNC) {
++		ret = rpc_wait_for_completion_task(task);
++		if (ret == 0)
++			ret = task->tk_status;
++	}
++	rpc_put_task(task);
++out:
++	return ret;
++}
++EXPORT_SYMBOL(nfs_initiate_write);
++
++int pnfs_initiate_write(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_write(data, clnt, call_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_write_rpcsetup(struct nfs_page *req,
++		struct nfs_write_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset,
++		int how)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	data->req = req;
+ 	data->inode = inode = req->wb_context->path.dentry->d_inode;
+-	data->cred = msg.rpc_cred;
++	data->cred = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -813,6 +885,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 	data->args.stable  = NFS_UNSTABLE;
+ 	if (how & FLUSH_STABLE) {
+ 		data->args.stable = NFS_DATA_SYNC;
+@@ -825,30 +898,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated write call "
+-		"(req %s/%lld, %u bytes @ offset %llu)\n",
+-		data->task.tk_pid,
+-		inode->i_sb->s_id,
+-		(long long)NFS_FILEID(inode),
+-		count,
+-		(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task)) {
+-		ret = PTR_ERR(task);
+-		goto out;
+-	}
+-	if (how & FLUSH_SYNC) {
+-		ret = rpc_wait_for_completion_task(task);
+-		if (ret == 0)
+-			ret = task->tk_status;
+-	}
+-	rpc_put_task(task);
+-out:
+-	return ret;
++	return pnfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ }
+ 
+ /* If a nfs_flush_* function fails, it should remove reqs from @head and
+@@ -859,6 +909,7 @@ static void nfs_redirty_request(struct n
+ {
+ 	struct page *page = req->wb_page;
+ 
++	nfs_mark_request_nopnfs(req);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	nfs_end_page_writeback(page);
+@@ -971,6 +1022,10 @@ static void nfs_pageio_init_write(struct
+ {
+ 	size_t wsize = NFS_SERVER(inode)->wsize;
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_write(pgio, inode, &wsize);
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	if (wsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
+ 	else
+@@ -1036,13 +1091,27 @@ out:
+ void nfs_write_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_write_data *data = calldata;
+-	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(clp, &data->args.seq_args,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	} else if (data->args.count > NFS_SERVER(data->inode)->wsize) {
++		/* retrying via MDS? */
++		data->pdata.orig_count = data->args.count;
++		data->args.count = NFS_SERVER(data->inode)->wsize;
++		dprintk("%s: trimmed count %u to wsize %u\n", __func__,
++		data->pdata.orig_count, data->args.count);
++	} else
++		data->pdata.orig_count = 0;
++
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
++				&data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_write_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_write_partial_ops = {
+@@ -1126,10 +1195,11 @@ int nfs_writeback_done(struct rpc_task *
+ 	struct nfs_writeargs	*argp = &data->args;
+ 	struct nfs_writeres	*resp = &data->res;
+ 	struct nfs_server	*server = NFS_SERVER(data->inode);
++	struct nfs_client	*clp = server->nfs_client;
+ 	int status;
+ 
+-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
+-		task->tk_pid, task->tk_status);
++	dprintk("NFS: %5u nfs_writeback_done (status %d count %u)\n",
++		task->tk_pid, task->tk_status, resp->count);
+ 
+ 	/*
+ 	 * ->write_done will attempt to use post-op attributes to detect
+@@ -1142,6 +1212,13 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (status != 0)
+ 		return status;
+ 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
++#ifdef CONFIG_NFS_V4_1
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+@@ -1158,7 +1235,7 @@ int nfs_writeback_done(struct rpc_task *
+ 		if (time_before(complain, jiffies)) {
+ 			dprintk("NFS:       faulty NFS server %s:"
+ 				" (committed = %d) != (stable = %d)\n",
+-				server->nfs_client->cl_hostname,
++				clp->cl_hostname,
+ 				resp->verf->committed, argp->stable);
+ 			complain = jiffies + 300 * HZ;
+ 		}
+@@ -1168,6 +1245,9 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (task->tk_status >= 0 && resp->count < argp->count) {
+ 		static unsigned long    complain;
+ 
++		dprintk("NFS:       short write:"
++			" (resp->count %u) < (argp->count = %u)\n",
++			resp->count, argp->count);
+ 		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ 
+ 		/* Has the server at least made some progress? */
+@@ -1184,7 +1264,10 @@ int nfs_writeback_done(struct rpc_task *
+ 				 */
+ 				argp->stable = NFS_FILE_SYNC;
+ 			}
+-			nfs_restart_rpc(task, server->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++			data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++			nfs_restart_rpc(task, clp);
+ 			return -EAGAIN;
+ 		}
+ 		if (time_before(complain, jiffies)) {
+@@ -1228,40 +1311,73 @@ static void nfs_commitdata_release(void 
+ 	nfs_commit_free(wdata);
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_commit_rpcsetup(struct list_head *head,
+-		struct nfs_write_data *data,
+-		int how)
++int nfs_initiate_commit(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
+ {
+-	struct nfs_page *first = nfs_list_entry(head->next);
+-	struct inode *inode = first->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = first->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+-		.callback_ops = &nfs_commit_ops,
++		.callback_ops = call_ops,
+ 		.callback_data = data,
+ 		.workqueue = nfsiod_workqueue,
+ 		.flags = RPC_TASK_ASYNC,
+ 		.priority = priority,
+ 	};
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->commit_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_commit);
++
++
++int pnfs_initiate_commit(struct nfs_write_data *data,
++			 struct rpc_clnt *clnt,
++			 const struct rpc_call_ops *call_ops,
++			 int how, int pnfs)
++{
++	if (pnfs &&
++	    (pnfs_try_to_commit(data, &nfs_commit_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_commit(data, clnt, &nfs_commit_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_commit_rpcsetup(struct list_head *head,
++		struct nfs_write_data *data,
++		int how, int pnfs)
++{
++	struct nfs_page *first = nfs_list_entry(head->next);
++	struct inode *inode = first->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	list_splice_init(head, &data->pages);
+ 
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = first->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(data->inode);
+ 	/* Note: we always request a commit of the entire inode */
+@@ -1272,45 +1388,47 @@ static int nfs_commit_rpcsetup(struct li
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
++	kref_init(&data->refcount);
++	data->parent      = NULL;
++	data->args.context = first->wb_context;  /* used by commit done */
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->commit_setup(data, &msg);
++	return pnfs_initiate_commit(data, NFS_CLIENT(inode), &nfs_commit_ops,
++				    how, pnfs);
++}
+ 
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++/* Handle memory error during commit */
++void nfs_mark_list_commit(struct list_head *head)
++{
++	struct nfs_page         *req;
+ 
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	while (!list_empty(head)) {
++		req = nfs_list_entry(head->next);
++		nfs_list_remove_request(req);
++		nfs_mark_request_commit(req);
++		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
++		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
++				BDI_RECLAIMABLE);
++		nfs_clear_page_tag_locked(req);
++	}
+ }
++EXPORT_SYMBOL(nfs_mark_list_commit);
+ 
+ /*
+  * Commit dirty pages
+  */
+ static int
+-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
++nfs_commit_list(struct inode *inode, struct list_head *head, int how, int pnfs)
+ {
+ 	struct nfs_write_data	*data;
+-	struct nfs_page         *req;
+ 
+ 	data = nfs_commitdata_alloc();
+-
+ 	if (!data)
+ 		goto out_bad;
+ 
+ 	/* Set up the argument struct */
+-	return nfs_commit_rpcsetup(head, data, how);
++	return nfs_commit_rpcsetup(head, data, how, pnfs);
+  out_bad:
+-	while (!list_empty(head)) {
+-		req = nfs_list_entry(head->next);
+-		nfs_list_remove_request(req);
+-		nfs_mark_request_commit(req);
+-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+-				BDI_RECLAIMABLE);
+-		nfs_clear_page_tag_locked(req);
+-	}
++	nfs_mark_list_commit(head);
+ 	nfs_commit_clear_lock(NFS_I(inode));
+ 	return -ENOMEM;
+ }
+@@ -1330,6 +1448,19 @@ static void nfs_commit_done(struct rpc_t
+ 		return;
+ }
+ 
++static inline void nfs_commit_cleanup(struct kref *kref)
++{
++	struct nfs_write_data *data;
++
++	data = container_of(kref, struct nfs_write_data, refcount);
++	/* Clear lock only when all cloned commits are finished */
++	if (data->parent)
++		kref_put(&data->parent->refcount, nfs_commit_cleanup);
++	else
++		nfs_commit_clear_lock(NFS_I(data->inode));
++	nfs_commitdata_release(data);
++}
++
+ static void nfs_commit_release(void *calldata)
+ {
+ 	struct nfs_write_data	*data = calldata;
+@@ -1347,6 +1478,11 @@ static void nfs_commit_release(void *cal
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 		if (status < 0) {
++			if (req->wb_lseg) {
++				nfs_mark_request_nopnfs(req);
++				nfs_mark_request_dirty(req);
++				goto next;
++			}
+ 			nfs_context_set_write_error(req->wb_context, status);
+ 			nfs_inode_remove_request(req);
+ 			dprintk(", error = %d\n", status);
+@@ -1363,12 +1499,12 @@ static void nfs_commit_release(void *cal
+ 		}
+ 		/* We have a mismatch. Write the page again */
+ 		dprintk(" mismatch\n");
++		nfs_mark_request_nopnfs(req);
+ 		nfs_mark_request_dirty(req);
+ 	next:
+ 		nfs_clear_page_tag_locked(req);
+ 	}
+-	nfs_commit_clear_lock(NFS_I(data->inode));
+-	nfs_commitdata_release(calldata);
++	kref_put(&data->refcount, nfs_commit_cleanup);
+ }
+ 
+ static const struct rpc_call_ops nfs_commit_ops = {
+@@ -1384,21 +1520,22 @@ int nfs_commit_inode(struct inode *inode
+ 	LIST_HEAD(head);
+ 	int may_wait = how & FLUSH_SYNC;
+ 	int res = 0;
++	int use_pnfs = 0;
+ 
+ 	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ 		goto out_mark_dirty;
+ 	spin_lock(&inode->i_lock);
+-	res = nfs_scan_commit(inode, &head, 0, 0);
++	res = nfs_scan_commit(inode, &head, 0, 0, &use_pnfs);
+ 	spin_unlock(&inode->i_lock);
+ 	if (res) {
+-		int error = nfs_commit_list(inode, &head, how);
++		int error = nfs_commit_list(inode, &head, how, use_pnfs);
+ 		if (error < 0)
+ 			return error;
+-		if (may_wait)
++		if (may_wait) {
+ 			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+ 					nfs_wait_bit_killable,
+ 					TASK_KILLABLE);
+-		else
++		} else
+ 			goto out_mark_dirty;
+ 	} else
+ 		nfs_commit_clear_lock(NFS_I(inode));
+@@ -1451,7 +1588,18 @@ static int nfs_commit_unstable_pages(str
+ 
+ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ {
+-	return nfs_commit_unstable_pages(inode, wbc);
++	int ret;
++	ret = nfs_commit_unstable_pages(inode, wbc);
++	if (ret >= 0 && layoutcommit_needed(NFS_I(inode))) {
++		int err, sync = wbc->sync_mode;
++
++		if (wbc->nonblocking || wbc->for_background)
++			sync = 0;
++		err = pnfs_layoutcommit_inode(inode, sync);
++		if (err < 0)
++			ret = err;
++	}
++	return ret;
+ }
+ 
+ /*
+@@ -1459,6 +1607,7 @@ int nfs_write_inode(struct inode *inode,
+  */
+ int nfs_wb_all(struct inode *inode)
+ {
++	int ret;
+ 	struct writeback_control wbc = {
+ 		.sync_mode = WB_SYNC_ALL,
+ 		.nr_to_write = LONG_MAX,
+@@ -1466,7 +1615,8 @@ int nfs_wb_all(struct inode *inode)
+ 		.range_end = LLONG_MAX,
+ 	};
+ 
+-	return sync_inode(inode, &wbc);
++	ret = sync_inode(inode, &wbc);
++	return ret;
+ }
+ 
+ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
+--- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-31 20:42:05.577222704 -0400
+@@ -2,6 +2,7 @@
+ #define LINUX_EXPORTFS_H 1
+ 
+ #include <linux/types.h>
++#include <linux/exp_xdr.h>
+ 
+ struct dentry;
+ struct inode;
+@@ -175,4 +176,62 @@ extern struct dentry *generic_fh_to_pare
+ 	struct fid *fid, int fh_len, int fh_type,
+ 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+ 
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct pnfs_filelayout_device;
++struct pnfs_filelayout_layout;
++
++extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				     const struct pnfs_filelayout_device *fdev);
++extern enum nfsstat4 filelayout_encode_layout(struct exp_xdr_stream *xdr,
++				      const struct pnfs_filelayout_layout *flp);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct list_head;
++
++extern int blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				      const struct list_head *volumes);
++
++extern enum nfsstat4 blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++					       const struct list_head *layouts);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_PNFSD)
++#include <linux/module.h>
++
++struct pnfsd_cb_operations;
++
++struct pnfsd_cb_ctl {
++	spinlock_t lock;
++	struct module *module;
++	const struct pnfsd_cb_operations *cb_op;
++};
++
++/* in expfs.c so that file systems can depend on it */
++extern struct pnfsd_cb_ctl pnfsd_cb_ctl;
++
++static inline int
++pnfsd_get_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	int ret = -ENOENT;
++
++	spin_lock(&pnfsd_cb_ctl.lock);
++	if (!pnfsd_cb_ctl.cb_op)
++		goto out;
++	if (!try_module_get(pnfsd_cb_ctl.module))
++		goto out;
++	ctl->cb_op = pnfsd_cb_ctl.cb_op;
++	ctl->module = pnfsd_cb_ctl.module;
++	ret = 0;
++out:
++	spin_unlock(&pnfsd_cb_ctl.lock);
++	return ret;
++}
++
++static inline void
++pnfsd_put_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	module_put(ctl->module);
++}
++#endif /* CONFIG_PNFSD */
+ #endif /* LINUX_EXPORTFS_H */
+diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-31 20:42:05.576053304 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-31 20:42:05.576053304 -0400
+@@ -0,0 +1,141 @@
++#ifndef _LINUX_EXP_XDR_H
++#define _LINUX_EXP_XDR_H
++
++#include <asm/byteorder.h>
++#include <asm/unaligned.h>
++#include <linux/string.h>
++
++struct exp_xdr_stream {
++	__be32 *p;
++	__be32 *end;
++};
++
++/**
++ * exp_xdr_qwords - Calculate the number of quad-words holding nbytes
++ * @nbytes: number of bytes to encode
++ */
++static inline size_t
++exp_xdr_qwords(__u32 nbytes)
++{
++	return DIV_ROUND_UP(nbytes, 4);
++}
++
++/**
++ * exp_xdr_qbytes - Calculate the number of bytes holding qwords
++ * @qwords: number of quad-words to encode
++ */
++static inline size_t
++exp_xdr_qbytes(size_t qwords)
++{
++	return qwords << 2;
++}
++
++/**
++ * exp_xdr_reserve_space - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nbytes: number of bytes to reserve
++ *
++ * Checks that we have enough buffer space to encode 'nbytes' more
++ * bytes of data. If so, update the xdr stream.
++ */
++static inline __be32 *
++exp_xdr_reserve_space(struct exp_xdr_stream *xdr, size_t nbytes)
++{
++	__be32 *p = xdr->p;
++	__be32 *q;
++
++	/* align nbytes on the next 32-bit boundary */
++	q = p + exp_xdr_qwords(nbytes);
++	if (unlikely(q > xdr->end || q < p))
++		return NULL;
++	xdr->p = q;
++	return p;
++}
++
++/**
++ * exp_xdr_reserve_qwords - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nwords: number of quad words (u32's) to reserve
++ */
++static inline __be32 *
++exp_xdr_reserve_qwords(struct exp_xdr_stream *xdr, size_t qwords)
++{
++	return exp_xdr_reserve_space(xdr, exp_xdr_qbytes(qwords));
++}
++
++/**
++ * exp_xdr_encode_u32 - Encode an unsigned 32-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u32(__be32 *p, __u32 val)
++{
++	*p = cpu_to_be32(val);
++	return p + 1;
++}
++
++/**
++ * exp_xdr_encode_u64 - Encode an unsigned 64-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u64(__be32 *p, __u64 val)
++{
++	put_unaligned_be64(val, p);
++	return p + 2;
++}
++
++/**
++ * exp_xdr_encode_bytes - Encode an array of bytes onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the array of bytes
++ * @nbytes: number of bytes to encode
++ */
++static inline __be32 *
++exp_xdr_encode_bytes(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	if (likely(nbytes != 0)) {
++		unsigned int qwords = exp_xdr_qwords(nbytes);
++		unsigned int padding = exp_xdr_qbytes(qwords) - nbytes;
++
++		memcpy(p, ptr, nbytes);
++		if (padding != 0)
++			memset((char *)p + nbytes, 0, padding);
++		p += qwords;
++	}
++	return p;
++}
++
++/**
++ * exp_xdr_encode_opaque - Encode an opaque type onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the opaque array
++ * @nbytes: number of bytes to encode
++ *
++ * Encodes the 32-bit opaque size in bytes followed by the opaque value.
++ */
++static inline __be32 *
++exp_xdr_encode_opaque(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	p = exp_xdr_encode_u32(p, nbytes);
++	return exp_xdr_encode_bytes(p, ptr, nbytes);
++}
++
++/**
++ * exp_xdr_encode_opaque_qlen - Encode the opaque length onto a xdr stream
++ * @lenp: pointer to the opaque length destination
++ * @endp: pointer to the end of the opaque array
++ *
++ * Encodes the 32-bit opaque size in bytes given the start and end pointers
++ */
++static inline __be32 *
++exp_xdr_encode_opaque_len(__be32 *lenp, const void *endp)
++{
++	size_t nbytes = (char *)endp - (char *)(lenp + 1);
++
++	exp_xdr_encode_u32(lenp, nbytes);
++	return lenp + 1 + exp_xdr_qwords(nbytes);
++}
++#endif /* _LINUX_EXP_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-31 20:41:19.120034834 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-31 20:42:05.579212604 -0400
+@@ -387,6 +387,7 @@ struct inodes_stat_t {
+ #include <asm/byteorder.h>
+ 
+ struct export_operations;
++struct pnfs_export_operations;
+ struct hd_geometry;
+ struct iovec;
+ struct nameidata;
+@@ -1329,6 +1330,7 @@ struct super_block {
+ 	const struct dquot_operations	*dq_op;
+ 	const struct quotactl_ops	*s_qcop;
+ 	const struct export_operations *s_export_op;
++	const struct pnfs_export_operations *s_pnfs_op;
+ 	unsigned long		s_flags;
+ 	unsigned long		s_magic;
+ 	struct dentry		*s_root;
+diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
+--- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-31 20:42:05.581035627 -0400
+@@ -17,7 +17,10 @@
+ 
+ #define NFS4_BITMAP_SIZE	2
+ #define NFS4_VERIFIER_SIZE	8
+-#define NFS4_STATEID_SIZE	16
++#define NFS4_CLIENTID_SIZE	8
++#define NFS4_STATEID_SEQID_SIZE 4
++#define NFS4_STATEID_OTHER_SIZE 12
++#define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
+ #define NFS4_FHSIZE		128
+ #define NFS4_MAXPATHLEN		PATH_MAX
+ #define NFS4_MAXNAMLEN		NAME_MAX
+@@ -119,6 +122,13 @@
+ #define EXCHGID4_FLAG_MASK_A			0x40070003
+ #define EXCHGID4_FLAG_MASK_R			0x80070003
+ 
++static inline bool
++is_ds_only_session(u32 exchange_flags)
++{
++	u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;
++	return (exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
++}
++
+ #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED	0x00000004
+@@ -166,8 +176,25 @@ struct nfs4_acl {
+ 	struct nfs4_ace	aces[0];
+ };
+ 
++struct nfs4_fsid {
++	u64	major;
++	u64	minor;
++};
++
+ typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
+-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
++typedef struct { char data[NFS4_CLIENTID_SIZE]; } nfs4_clientid;
++
++struct nfs41_stateid {
++	__be32 seqid;
++	char other[NFS4_STATEID_OTHER_SIZE];
++} __attribute__ ((packed));
++
++typedef struct {
++	union {
++		char data[NFS4_STATEID_SIZE];
++		struct nfs41_stateid stateid;
++	} u;
++} nfs4_stateid;
+ 
+ enum nfs_opnum4 {
+ 	OP_ACCESS = 3,
+@@ -471,6 +498,8 @@ enum lock_type4 {
+ #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
+ #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
+ #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
++#define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
++#define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+ 
+ #define NFSPROC4_NULL 0
+ #define NFSPROC4_COMPOUND 1
+@@ -523,6 +552,7 @@ enum {
+ 	NFSPROC4_CLNT_GETACL,
+ 	NFSPROC4_CLNT_SETACL,
+ 	NFSPROC4_CLNT_FS_LOCATIONS,
++	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
+ 
+ 	/* nfs41 */
+ 	NFSPROC4_CLNT_EXCHANGE_ID,
+@@ -531,6 +561,13 @@ enum {
+ 	NFSPROC4_CLNT_SEQUENCE,
+ 	NFSPROC4_CLNT_GET_LEASE_TIME,
+ 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
++	NFSPROC4_CLNT_LAYOUTGET,
++	NFSPROC4_CLNT_LAYOUTCOMMIT,
++	NFSPROC4_CLNT_LAYOUTRETURN,
++	NFSPROC4_CLNT_GETDEVICELIST,
++	NFSPROC4_CLNT_GETDEVICEINFO,
++	NFSPROC4_CLNT_PNFS_WRITE,
++	NFSPROC4_CLNT_PNFS_COMMIT,
+ };
+ 
+ /* nfs41 types */
+@@ -549,6 +586,43 @@ enum state_protect_how4 {
+ 	SP4_SSV		= 2
+ };
+ 
++enum pnfs_layouttype {
++	LAYOUT_NFSV4_1_FILES  = 1,
++	LAYOUT_OSD2_OBJECTS = 2,
++	LAYOUT_BLOCK_VOLUME = 3,
++};
++
++/* used for both layout return and recall */
++enum pnfs_layoutreturn_type {
++	RETURN_FILE = 1,
++	RETURN_FSID = 2,
++	RETURN_ALL  = 3
++};
++
++enum pnfs_iomode {
++	IOMODE_READ = 1,
++	IOMODE_RW = 2,
++	IOMODE_ANY = 3,
++};
++
++enum pnfs_notify_deviceid_type4 {
++	NOTIFY_DEVICEID4_CHANGE = 1 << 1,
++	NOTIFY_DEVICEID4_DELETE = 1 << 2,
++};
++
++#define NFL4_UFLG_MASK			0x0000003F
++#define NFL4_UFLG_DENSE			0x00000001
++#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
++#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
++
++/* Encoded in the loh_body field of type layouthint4 */
++enum filelayout_hint_care4 {
++	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
++	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
++	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
++	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
++};
++
+ #endif
+ #endif
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-31 20:42:05.583087731 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-31 20:42:05.583087731 -0400
+@@ -0,0 +1,329 @@
++/*
++ *  include/linux/nfs4_pnfs.h
++ *
++ *  Common data structures needed by the pnfs client and pnfs layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_NFS4_PNFS_H
++#define LINUX_NFS4_PNFS_H
++
++#include <linux/nfs_page.h>
++
++enum pnfs_try_status {
++	PNFS_ATTEMPTED     = 0,
++	PNFS_NOT_ATTEMPTED = 1,
++};
++
++#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
++
++/* Per-layout driver specific registration structure */
++struct pnfs_layoutdriver_type {
++	const u32 id;
++	const char *name;
++	struct layoutdriver_io_operations *ld_io_ops;
++	struct layoutdriver_policy_operations *ld_policy_ops;
++};
++
++struct pnfs_fsdata {
++	int bypass_eof;
++	struct pnfs_layout_segment *lseg;
++	void *private;
++};
++
++#if defined(CONFIG_NFS_V4_1)
++
++static inline struct nfs_inode *
++PNFS_NFS_INODE(struct pnfs_layout_hdr *lo)
++{
++	return NFS_I(lo->inode);
++}
++
++static inline struct inode *
++PNFS_INODE(struct pnfs_layout_hdr *lo)
++{
++	return lo->inode;
++}
++
++static inline struct nfs_server *
++PNFS_NFS_SERVER(struct pnfs_layout_hdr *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo));
++}
++
++static inline struct pnfs_layoutdriver_type *
++PNFS_LD(struct pnfs_layout_hdr *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo))->pnfs_curr_ld;
++}
++
++static inline struct layoutdriver_io_operations *
++PNFS_LD_IO_OPS(struct pnfs_layout_hdr *lo)
++{
++	return PNFS_LD(lo)->ld_io_ops;
++}
++
++static inline struct layoutdriver_policy_operations *
++PNFS_LD_POLICY_OPS(struct pnfs_layout_hdr *lo)
++{
++	return PNFS_LD(lo)->ld_policy_ops;
++}
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return nfsi->layout != NULL;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return has_layout(nfsi) &&
++	       test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state);
++}
++
++extern void put_lseg(struct pnfs_layout_segment *lseg);
++extern void get_lseg(struct pnfs_layout_segment *lseg);
++
++#else /* CONFIG_NFS_V4_1 */
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return false;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++struct pnfs_layout_segment {
++	struct list_head fi_list;
++	struct pnfs_layout_range range;
++	struct kref kref;
++	bool valid;
++	struct pnfs_layout_hdr *layout;
++	struct nfs4_deviceid *deviceid;
++	u8 ld_data[];			/* layout driver private data */
++};
++
++static inline void *
++LSEG_LD_DATA(struct pnfs_layout_segment *lseg)
++{
++	return lseg->ld_data;
++}
++
++/* Layout driver I/O operations.
++ * Either the pagecache or non-pagecache read/write operations must be implemented
++ */
++struct layoutdriver_io_operations {
++	/* Functions that use the pagecache.
++	 * If use_pagecache == 1, then these functions must be implemented.
++	 */
++	/* read and write pagelist should return just 0 (to indicate that
++	 * the layout code has taken control) or 1 (to indicate that the
++	 * layout code wishes to fall back to normal nfs.)  If 0 is returned,
++	 * information can be passed back through nfs_data->res and
++	 * nfs_data->task.tk_status, and the appropriate pnfs done function
++	 * MUST be called.
++	 */
++	enum pnfs_try_status
++	(*read_pagelist) (struct nfs_read_data *nfs_data, unsigned nr_pages);
++	enum pnfs_try_status
++	(*write_pagelist) (struct nfs_write_data *nfs_data, unsigned nr_pages, int how);
++	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
++			    loff_t pos, unsigned count,
++			    struct pnfs_fsdata *fsdata);
++	int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
++			 unsigned count, unsigned copied,
++			 struct pnfs_layout_segment *lseg);
++	void (*write_end_cleanup)(struct file *filp,
++				  struct pnfs_fsdata *fsdata);
++
++	/* Consistency ops */
++	/* 2 problems:
++	 * 1) the page list contains nfs_pages, NOT pages
++	 * 2) currently the NFS code doesn't create a page array (as it does with read/write)
++	 */
++	enum pnfs_try_status
++	(*commit) (struct nfs_write_data *nfs_data, int how);
++
++	/* Layout information. For each inode, alloc_layout is executed once to retrieve an
++	 * inode specific layout structure.  Each subsequent layoutget operation results in
++	 * a set_layout call to set the opaque layout in the layout driver.*/
++	struct pnfs_layout_hdr * (*alloc_layout) (struct inode *inode);
++	void (*free_layout) (struct pnfs_layout_hdr *);
++	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
++	void (*free_lseg) (struct pnfs_layout_segment *lseg);
++
++	int (*setup_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				   struct nfs4_layoutcommit_args *args);
++	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				     struct xdr_stream *xdr,
++				     const struct nfs4_layoutcommit_args *args);
++	void (*cleanup_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				      struct nfs4_layoutcommit_args *args,
++				      int status);
++	void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid,
++				struct xdr_stream *xdr,
++				const struct nfs4_layoutreturn_args *args);
++
++	/* Registration information for a new mounted file system
++	 */
++	int (*initialize_mountpoint) (struct nfs_server *,
++				      const struct nfs_fh * mntfh);
++	int (*uninitialize_mountpoint) (struct nfs_server *server);
++};
++
++enum layoutdriver_policy_flags {
++	/* Should the full nfs rpc cleanup code be used after io */
++	PNFS_USE_RPC_CODE		= 1 << 0,
++
++	/* Should the NFS req. gather algorithm cross stripe boundaries? */
++	PNFS_GATHER_ACROSS_STRIPES	= 1 << 1,
++
++	/* Should the pNFS client commit and return the layout upon a setattr */
++	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 3,
++};
++
++struct layoutdriver_policy_operations {
++	unsigned flags;
++
++	/* The stripe size of the file system */
++	ssize_t (*get_stripesize) (struct pnfs_layout_hdr *layoutid);
++
++	/* test for nfs page cache coalescing */
++	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++
++	/* Retreive the block size of the file system.
++	 * If gather_across_stripes == 1, then the file system will gather
++	 * requests into the block size.
++	 * TODO: Where will the layout driver get this info?  It is hard
++	 * coded in PVFS2.
++	 */
++	ssize_t (*get_blocksize) (void);
++};
++
++/* Should the full nfs rpc cleanup code be used after io */
++static inline int
++pnfs_ld_use_rpc_code(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_USE_RPC_CODE;
++}
++
++/* Should the NFS req. gather algorithm cross stripe boundaries? */
++static inline int
++pnfs_ld_gather_across_stripes(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_GATHER_ACROSS_STRIPES;
++}
++
++struct pnfs_device {
++	struct pnfs_deviceid dev_id;
++	unsigned int  layout_type;
++	unsigned int  mincount;
++	struct page **pages;
++	void          *area;
++	unsigned int  pgbase;
++	unsigned int  pglen;
++	unsigned int  dev_notify_types;
++};
++
++struct pnfs_devicelist {
++	unsigned int		eof;
++	unsigned int		num_devs;
++	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
++};
++
++/*
++ * Device ID RCU cache. A device ID is unique per client ID and layout type.
++ */
++#define NFS4_DEVICE_ID_HASH_BITS	5
++#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
++#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
++
++static inline u32
++nfs4_deviceid_hash(struct pnfs_deviceid *id)
++{
++	unsigned char *cptr = (unsigned char *)id->data;
++	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
++	u32 x = 0;
++
++	while (nbytes--) {
++		x *= 37;
++		x += *cptr++;
++	}
++	return x & NFS4_DEVICE_ID_HASH_MASK;
++}
++
++struct nfs4_deviceid_cache {
++	spinlock_t		dc_lock;
++	struct kref		dc_kref;
++	void			(*dc_free_callback)(struct kref *);
++	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
++};
++
++/* Device ID cache node */
++struct nfs4_deviceid {
++	struct hlist_node	de_node;
++	struct pnfs_deviceid	de_id;
++	struct kref		de_kref;
++};
++
++extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_put_deviceid_cache(struct nfs_client *);
++extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
++extern struct nfs4_deviceid *nfs4_find_get_deviceid(
++				struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++extern struct nfs4_deviceid *nfs4_add_get_deviceid(struct nfs4_deviceid_cache *,
++				struct nfs4_deviceid *);
++extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *);
++extern void nfs4_put_unset_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_delete_device(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++
++/* pNFS client callback functions.
++ * These operations allow the layout driver to access pNFS client
++ * specific information or call pNFS client->server operations.
++ * E.g., getdeviceinfo, I/O callbacks, etc
++ */
++struct pnfs_client_operations {
++	int (*nfs_getdevicelist) (struct nfs_server *,
++				  const struct nfs_fh *fh,
++				  struct pnfs_devicelist *devlist);
++	int (*nfs_getdeviceinfo) (struct nfs_server *,
++				  struct pnfs_device *dev);
++
++	/* Post read callback. */
++	void (*nfs_readlist_complete) (struct nfs_read_data *nfs_data);
++
++	/* Post write callback. */
++	void (*nfs_writelist_complete) (struct nfs_write_data *nfs_data);
++
++	/* Post commit callback. */
++	void (*nfs_commit_complete) (struct nfs_write_data *nfs_data);
++	void (*nfs_return_layout) (struct inode *);
++};
++
++extern struct pnfs_client_operations pnfs_ops;
++
++extern struct pnfs_client_operations *pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
++extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
++
++#define NFS4_PNFS_MAX_LAYOUTS 4
++#define NFS4_PNFS_PRIVATE_LAYOUT 0x80000000
++
++#endif /* LINUX_NFS4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-31 20:42:05.596098115 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-31 20:42:05.596098115 -0400
+@@ -0,0 +1,101 @@
++#ifndef NFSD4_BLOCK
++#define NFSD4_BLOCK
++
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#define PNFS_BLOCK_SUCCESS		1
++#define PNFS_BLOCK_FAILURE		0
++
++#define PNFS_BLOCK_CTL_START		1
++#define PNFS_BLOCK_CTL_STOP		2
++#define PNFS_BLOCK_CTL_VERS		3 /* Allows daemon to request current
++					   * version from kernel via an upcall.
++					   */
++
++#define PNFS_UPCALL_MSG_STOP	0
++#define PNFS_UPCALL_MSG_GETSIG	1
++#define PNFS_UPCALL_MSG_GETSLICE	2
++#define PNFS_UPCALL_MSG_DMCHK	3	// See if dev_t is a DM volume
++#define PNFS_UPCALL_MSG_DMGET	4
++#define PNFS_UPCALL_MSG_VERS	5
++
++#define PNFS_UPCALL_VERS		8
++
++typedef struct stripe_dev {
++	int	major,
++		minor,
++		offset;
++} stripe_dev_t;
++
++typedef struct bl_comm_res {
++	int				res_status;
++	union {
++		struct {
++			long long	start,
++					length;
++		} slice;
++		struct {
++			int		num_stripes,
++					stripe_size;
++			stripe_dev_t	devs[];
++		} stripe;
++		struct {
++			long long	sector;
++			int		offset,
++					len;
++			char		sig[];
++		} sig;
++		int			vers,
++					dm_vol;
++	} u;
++} bl_comm_res_t;
++
++typedef struct bl_comm_msg {
++	int		msg_type,
++			msg_status;
++	union {
++		dev_t	msg_dev;
++		int	msg_vers;
++	} u;
++	bl_comm_res_t	*msg_res;
++} bl_comm_msg_t;
++
++#ifdef __KERNEL__
++
++typedef struct bl_comm {
++	/* ---- protects access to this structure ---- */
++	struct mutex		lock;
++	/* ---- protects access to rpc pipe ---- */
++	struct mutex		pipe_lock;
++	struct dentry		*pipe_dentry;
++	wait_queue_head_t	pipe_wq;
++	bl_comm_msg_t		msg;
++} bl_comm_t;
++
++int pnfs_block_enabled(struct inode *, int);
++int bl_layout_type(struct super_block *sb);
++int bl_getdeviceiter(struct super_block *, u32 layout_type,
++		     struct nfsd4_pnfs_dev_iter_res *);
++int bl_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++		     u32 layout_type,
++		     const struct nfsd4_pnfs_deviceid *);
++enum nfsstat4 bl_layoutget(struct inode *, struct exp_xdr_stream *,
++			   const struct nfsd4_pnfs_layoutget_arg *,
++			   struct nfsd4_pnfs_layoutget_res *);
++int bl_layoutcommit(struct inode *,
++		    const struct nfsd4_pnfs_layoutcommit_arg *,
++		    struct nfsd4_pnfs_layoutcommit_res *);
++int bl_layoutreturn(struct inode *,
++		    const struct nfsd4_pnfs_layoutreturn_arg *);
++int bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len);
++int bl_init_proc(void);
++int bl_upcall(bl_comm_t *, bl_comm_msg_t *, bl_comm_res_t **);
++
++extern bl_comm_t	*bl_comm_global;	// Ugly...
++#endif /* __KERNEL__ */
++
++#endif /* NFSD4_BLOCK */
++
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-31 20:42:05.597097942 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-31 20:42:05.597097942 -0400
+@@ -0,0 +1,345 @@
++/*
++ * include/linux/nfsd4_spnfs.h
++ *
++ * spNFS - simple pNFS implementation with userspace daemon
++ *
++ */
++
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#ifndef NFS_SPNFS_H
++#define NFS_SPNFS_H
++
++
++#ifdef __KERNEL__
++#include "exportfs.h"
++#include "sunrpc/svc.h"
++#include "nfsd/nfsfh.h"
++#else
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#define SPNFS_STATUS_INVALIDMSG		0x01
++#define SPNFS_STATUS_AGAIN		0x02
++#define SPNFS_STATUS_FAIL		0x04
++#define SPNFS_STATUS_SUCCESS		0x08
++
++#define SPNFS_TYPE_LAYOUTGET		0x01
++#define SPNFS_TYPE_LAYOUTCOMMIT		0x02
++#define SPNFS_TYPE_LAYOUTRETURN		0x03
++#define SPNFS_TYPE_GETDEVICEITER	0x04
++#define SPNFS_TYPE_GETDEVICEINFO	0x05
++#define SPNFS_TYPE_SETATTR		0x06
++#define SPNFS_TYPE_OPEN			0x07
++#define	SPNFS_TYPE_CLOSE		0x08
++#define SPNFS_TYPE_CREATE		0x09
++#define SPNFS_TYPE_REMOVE		0x0a
++#define SPNFS_TYPE_COMMIT		0x0b
++#define SPNFS_TYPE_READ			0x0c
++#define SPNFS_TYPE_WRITE		0x0d
++
++#define	SPNFS_MAX_DEVICES		1
++#define	SPNFS_MAX_DATA_SERVERS		16
++#define SPNFS_MAX_IO			512
++
++/* layout */
++struct spnfs_msg_layoutget_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_filelayout_list {
++	u_int32_t       fh_len;
++	unsigned char   fh_val[128]; /* DMXXX fix this const */
++};
++
++struct spnfs_msg_layoutget_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t stripe_size;
++	u_int32_t stripe_type;
++	u_int32_t stripe_count;
++	struct spnfs_filelayout_list flist[SPNFS_MAX_DATA_SERVERS];
++};
++
++/* layoutcommit */
++struct spnfs_msg_layoutcommit_args {
++	unsigned long inode;
++	unsigned long generation;
++	u_int64_t file_size;
++};
++
++struct spnfs_msg_layoutcommit_res {
++	int status;
++};
++
++/* layoutreturn */
++/* No op for the daemon */
++/*
++struct spnfs_msg_layoutreturn_args {
++};
++
++struct spnfs_msg_layoutreturn_res {
++};
++*/
++
++/* getdeviceiter */
++struct spnfs_msg_getdeviceiter_args {
++	unsigned long inode;
++	u_int64_t cookie;
++	u_int64_t verf;
++};
++
++struct spnfs_msg_getdeviceiter_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t cookie;
++	u_int64_t verf;
++	u_int32_t eof;
++};
++
++/* getdeviceinfo */
++struct spnfs_data_server {
++	u_int32_t dsid;
++	char netid[5];
++	char addr[29];
++};
++
++struct spnfs_device {
++	u_int64_t devid;
++	int dscount;
++	struct spnfs_data_server dslist[SPNFS_MAX_DATA_SERVERS];
++};
++
++struct spnfs_msg_getdeviceinfo_args {
++	u_int64_t devid;
++};
++
++struct spnfs_msg_getdeviceinfo_res {
++	int status;
++	struct spnfs_device devinfo;
++};
++
++/* setattr */
++struct spnfs_msg_setattr_args {
++	unsigned long inode;
++	unsigned long generation;
++	int file_size;
++};
++
++struct spnfs_msg_setattr_res {
++	int status;
++};
++
++/* open */
++struct spnfs_msg_open_args {
++	unsigned long inode;
++	unsigned long generation;
++	int create;
++	int createmode;
++	int truncate;
++};
++
++struct spnfs_msg_open_res {
++	int status;
++};
++
++/* close */
++/* No op for daemon */
++struct spnfs_msg_close_args {
++	int x;
++};
++
++struct spnfs_msg_close_res {
++	int y;
++};
++
++/* create */
++/*
++struct spnfs_msg_create_args {
++	int x;
++};
++
++struct spnfs_msg_create_res {
++	int y;
++};
++*/
++
++/* remove */
++struct spnfs_msg_remove_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_msg_remove_res {
++	int status;
++};
++
++/* commit */
++/*
++struct spnfs_msg_commit_args {
++	int x;
++};
++
++struct spnfs_msg_commit_res {
++	int y;
++};
++*/
++
++/* read */
++struct spnfs_msg_read_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++};
++
++struct spnfs_msg_read_res {
++	int status;
++	char data[SPNFS_MAX_IO];
++};
++
++/* write */
++struct spnfs_msg_write_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++	char data[SPNFS_MAX_IO];
++};
++
++struct spnfs_msg_write_res {
++	int status;
++};
++
++/* bundle args and responses */
++union spnfs_msg_args {
++	struct spnfs_msg_layoutget_args		layoutget_args;
++	struct spnfs_msg_layoutcommit_args	layoutcommit_args;
++/*
++	struct spnfs_msg_layoutreturn_args	layoutreturn_args;
++*/
++	struct spnfs_msg_getdeviceiter_args     getdeviceiter_args;
++	struct spnfs_msg_getdeviceinfo_args     getdeviceinfo_args;
++	struct spnfs_msg_setattr_args		setattr_args;
++	struct spnfs_msg_open_args		open_args;
++	struct spnfs_msg_close_args		close_args;
++/*
++	struct spnfs_msg_create_args		create_args;
++*/
++	struct spnfs_msg_remove_args		remove_args;
++/*
++	struct spnfs_msg_commit_args		commit_args;
++*/
++	struct spnfs_msg_read_args		read_args;
++	struct spnfs_msg_write_args		write_args;
++};
++
++union spnfs_msg_res {
++	struct spnfs_msg_layoutget_res		layoutget_res;
++	struct spnfs_msg_layoutcommit_res	layoutcommit_res;
++/*
++	struct spnfs_msg_layoutreturn_res	layoutreturn_res;
++*/
++	struct spnfs_msg_getdeviceiter_res      getdeviceiter_res;
++	struct spnfs_msg_getdeviceinfo_res      getdeviceinfo_res;
++	struct spnfs_msg_setattr_res		setattr_res;
++	struct spnfs_msg_open_res		open_res;
++	struct spnfs_msg_close_res		close_res;
++/*
++	struct spnfs_msg_create_res		create_res;
++*/
++	struct spnfs_msg_remove_res		remove_res;
++/*
++	struct spnfs_msg_commit_res		commit_res;
++*/
++	struct spnfs_msg_read_res		read_res;
++	struct spnfs_msg_write_res		write_res;
++};
++
++/* a spnfs message, args and response */
++struct spnfs_msg {
++	unsigned char		im_type;
++	unsigned char		im_status;
++	union spnfs_msg_args	im_args;
++	union spnfs_msg_res	im_res;
++};
++
++/* spnfs configuration info */
++struct spnfs_config {
++	unsigned char		dense_striping;
++	int			stripe_size;
++	int			num_ds;
++	char			ds_dir[SPNFS_MAX_DATA_SERVERS][80];  /* XXX */
++};
++
++#if defined(__KERNEL__) && defined(CONFIG_SPNFS)
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* pipe mgmt structure.  messages flow through here */
++struct spnfs {
++	struct dentry		*spnfs_dentry;    /* dentry for pipe */
++	wait_queue_head_t	spnfs_wq;
++	struct spnfs_msg	spnfs_im;         /* spnfs message */
++	struct mutex		spnfs_lock;       /* Serializes upcalls */
++	struct mutex		spnfs_plock;
++};
++
++struct nfsd4_open;
++
++int spnfs_layout_type(struct super_block *);
++enum nfsstat4 spnfs_layoutget(struct inode *, struct exp_xdr_stream *xdr,
++			      const struct nfsd4_pnfs_layoutget_arg *,
++			      struct nfsd4_pnfs_layoutget_res *);
++int spnfs_layoutcommit(void);
++int spnfs_layoutreturn(struct inode *,
++		       const struct nfsd4_pnfs_layoutreturn_arg *);
++int spnfs_getdeviceiter(struct super_block *,
++			u32 layout_type,
++			struct nfsd4_pnfs_dev_iter_res *);
++int spnfs_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++			u32 layout_type,
++			const struct nfsd4_pnfs_deviceid *);
++int spnfs_setattr(void);
++int spnfs_open(struct inode *, struct nfsd4_open *);
++int spnfs_get_state(struct inode *, struct knfsd_fh *, struct pnfs_get_state *);
++int spnfs_remove(unsigned long, unsigned long);
++__be32 spnfs_read(struct inode *, loff_t, unsigned long *,
++		  int, struct svc_rqst *);
++__be32 spnfs_write(struct inode *, loff_t, size_t, int, struct svc_rqst *);
++int spnfs_getfh(int, struct nfs_fh *);
++int spnfs_test_layoutrecall(char *, u64, u64);
++int spnfs_layoutrecall(struct inode *, int, u64, u64);
++
++int nfsd_spnfs_new(void);
++void nfsd_spnfs_delete(void);
++int spnfs_upcall(struct spnfs *, struct spnfs_msg *, union spnfs_msg_res *);
++int spnfs_enabled(void);
++int spnfs_init_proc(void);
++
++extern struct spnfs_config *spnfs_config;
++
++#endif /* __KERNEL__ && CONFIG_SPNFS */
++
++#endif /* NFS_SPNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
+--- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-31 20:42:05.591097762 -0400
+@@ -29,6 +29,7 @@
+ #ifdef __KERNEL__
+ 
+ #include <linux/sunrpc/msg_prot.h>
++#include <linux/sunrpc/svc.h>
+ 
+ /*
+  * Largest number of bytes we need to allocate for an NFS
+diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
+--- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-31 20:42:05.591097762 -0400
+@@ -32,6 +32,8 @@
+ #define NFSDDBG_REPCACHE	0x0080
+ #define NFSDDBG_XDR		0x0100
+ #define NFSDDBG_LOCKD		0x0200
++#define NFSDDBG_PNFS		0x0400
++#define NFSDDBG_FILELAYOUT	0x0800
+ #define NFSDDBG_ALL		0x7FFF
+ #define NFSDDBG_NOCHANGE	0xFFFF
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
+--- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-31 20:42:05.592118086 -0400
+@@ -100,6 +100,7 @@ struct svc_export {
+ 	uid_t			ex_anon_uid;
+ 	gid_t			ex_anon_gid;
+ 	int			ex_fsid;
++	int			ex_pnfs;
+ 	unsigned char *		ex_uuid; /* 16 byte fsid */
+ 	struct nfsd4_fs_locations ex_fslocs;
+ 	int			ex_nflavors;
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-31 20:42:05.592118086 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-31 20:42:05.592118086 -0400
+@@ -0,0 +1,132 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef NFSD_NFS4LAYOUTXDR_H
++#define NFSD_NFS4LAYOUTXDR_H
++
++#include <linux/sunrpc/xdr.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
++struct pnfs_filelayout_devaddr {
++	struct xdr_netobj	r_netid;
++	struct xdr_netobj	r_addr;
++};
++
++/* list of multipath servers */
++struct pnfs_filelayout_multipath {
++	u32				fl_multipath_length;
++	struct pnfs_filelayout_devaddr 	*fl_multipath_list;
++};
++
++struct pnfs_filelayout_device {
++	u32					fl_stripeindices_length;
++	u32       		 		*fl_stripeindices_list;
++	u32					fl_device_length;
++	struct pnfs_filelayout_multipath 	*fl_device_list;
++};
++
++struct pnfs_filelayout_layout {
++	u32                             lg_layout_type; /* response */
++	u32                             lg_stripe_type; /* response */
++	u32                             lg_commit_through_mds; /* response */
++	u64                             lg_stripe_unit; /* response */
++	u64                             lg_pattern_offset; /* response */
++	u32                             lg_first_stripe_index;	/* response */
++	struct nfsd4_pnfs_deviceid	device_id;		/* response */
++	u32                             lg_fh_length;		/* response */
++	struct knfsd_fh                 *lg_fh_list;		/* response */
++};
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++enum pnfs_block_extent_state4 {
++        PNFS_BLOCK_READWRITE_DATA       = 0,
++        PNFS_BLOCK_READ_DATA            = 1,
++        PNFS_BLOCK_INVALID_DATA         = 2,
++        PNFS_BLOCK_NONE_DATA            = 3
++};
++
++enum pnfs_block_volume_type4 {
++        PNFS_BLOCK_VOLUME_SIMPLE = 0,
++        PNFS_BLOCK_VOLUME_SLICE = 1,
++        PNFS_BLOCK_VOLUME_CONCAT = 2,
++        PNFS_BLOCK_VOLUME_STRIPE = 3,
++};
++typedef enum pnfs_block_volume_type4 pnfs_block_volume_type4;
++
++enum bl_cache_state {
++	BLOCK_LAYOUT_NEW	= 0,
++	BLOCK_LAYOUT_CACHE	= 1,
++	BLOCK_LAYOUT_UPDATE	= 2,
++};
++
++typedef struct pnfs_blocklayout_layout {
++        struct list_head                bll_list;
++        struct nfsd4_pnfs_deviceid      bll_vol_id;
++        u64                             bll_foff;	// file offset
++        u64                             bll_len;
++        u64                             bll_soff;	// storage offset
++	int				bll_recalled;
++        enum pnfs_block_extent_state4   bll_es;
++	enum bl_cache_state		bll_cache_state;
++} pnfs_blocklayout_layout_t;
++
++typedef struct pnfs_blocklayout_devinfo {
++        struct list_head                bld_list;
++        pnfs_block_volume_type4         bld_type;
++        struct nfsd4_pnfs_deviceid      bld_devid;
++        int                             bld_index_loc;
++        union {
++                struct {
++                        u64             bld_offset;
++                        u32             bld_sig_len,
++                                        *bld_sig;
++                } simple;
++                struct {
++                        u64             bld_start,
++                                        bld_len;
++                        u32             bld_index;      /* Index of Simple Volume */
++                } slice;
++                struct {
++                        u32             bld_stripes;
++                        u64             bld_chunk_size;
++                        u32             *bld_stripe_indexs;
++                } stripe;
++        } u;
++} pnfs_blocklayout_devinfo_t;
++
++#endif /* NFSD_NFS4LAYOUTXDR_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-31 20:42:05.593020723 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-31 20:42:05.593020723 -0400
+@@ -0,0 +1,54 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++#include <linux/genhd.h>
++
++/*
++ * Length of comma separated pnfs data server IPv4 addresses. Enough room for
++ * 32 addresses.
++ */
++#define NFSD_DLM_DS_LIST_MAX   512
++/*
++ * Length of colon separated pnfs dlm device of the form
++ * disk_name:comma separated data server IPv4 address
++ */
++#define NFSD_PNFS_DLM_DEVICE_MAX (NFSD_DLM_DS_LIST_MAX + DISK_NAME_LEN + 1)
++
++#ifdef CONFIG_PNFSD
++
++/* For use by DLM cluster file systems exported by pNFSD */
++extern const struct pnfs_export_operations pnfs_dlm_export_ops;
++
++int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);
++
++void nfsd4_pnfs_dlm_shutdown(void);
++
++ssize_t nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen);
++
++#else /* CONFIG_PNFSD */
++
++static inline void nfsd4_pnfs_dlm_shutdown(void)
++{
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-31 20:42:05.594107962 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-31 20:42:05.594107962 -0400
+@@ -0,0 +1,271 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef _LINUX_NFSD_NFSD4_PNFS_H
++#define _LINUX_NFSD_NFSD4_PNFS_H
++
++#include <linux/exportfs.h>
++#include <linux/exp_xdr.h>
++#include <linux/nfs_xdr.h>
++
++struct nfsd4_pnfs_deviceid {
++	u64	sbid;			/* per-superblock unique ID */
++	u64	devid;			/* filesystem-wide unique device ID */
++};
++
++struct nfsd4_pnfs_dev_iter_res {
++	u64		gd_cookie;	/* request/repsonse */
++	u64		gd_verf;	/* request/repsonse */
++	u64		gd_devid;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++/* Arguments for set_device_notify */
++struct pnfs_devnotify_arg {
++	struct nfsd4_pnfs_deviceid dn_devid;	/* request */
++	u32 dn_layout_type;			/* request */
++	u32 dn_notify_types;			/* request/response */
++};
++
++struct nfsd4_layout_seg {
++	u64	clientid;
++	u32	layout_type;
++	u32	iomode;
++	u64	offset;
++	u64	length;
++};
++
++/* Used by layout_get to encode layout (loc_body var in spec)
++ * Args:
++ * minlength - min number of accessible bytes given by layout
++ * fsid - Major part of struct pnfs_deviceid.  File system uses this
++ * to build the deviceid returned in the layout.
++ * fh - fs can modify the file handle for use on data servers
++ * seg - layout info requested and layout info returned
++ * xdr - xdr info
++ * return_on_close - true if layout to be returned on file close
++ */
++
++struct nfsd4_pnfs_layoutget_arg {
++	u64			lg_minlength;
++	u64			lg_sbid;
++	const struct knfsd_fh	*lg_fh;
++};
++
++struct nfsd4_pnfs_layoutget_res {
++	struct nfsd4_layout_seg	lg_seg;	/* request/resopnse */
++	u32			lg_return_on_close;
++};
++
++struct nfsd4_pnfs_layoutcommit_arg {
++	struct nfsd4_layout_seg	lc_seg;		/* request */
++	u32			lc_reclaim;	/* request */
++	u32			lc_newoffset;	/* request */
++	u64			lc_last_wr;	/* request */
++	struct nfstime4		lc_mtime;	/* request */
++	u32			lc_up_len;	/* layout length */
++	void			*lc_up_layout;	/* decoded by callback */
++};
++
++struct nfsd4_pnfs_layoutcommit_res {
++	u32			lc_size_chg;	/* boolean for response */
++	u64			lc_newsize;	/* response */
++};
++
++#define PNFS_LAST_LAYOUT_NO_RECALLS ((void *)-1) /* used with lr_cookie below */
++
++struct nfsd4_pnfs_layoutreturn_arg {
++	u32			lr_return_type;	/* request */
++	struct nfsd4_layout_seg	lr_seg;		/* request */
++	u32			lr_reclaim;	/* request */
++	u32			lrf_body_len;	/* request */
++	void			*lrf_body;	/* request */
++	void			*lr_cookie;	/* fs private */
++};
++
++/* pNFS Metadata to Data server state communication */
++struct pnfs_get_state {
++	u32			dsid;    /* request */
++	u64			ino;      /* request */
++	nfs4_stateid		stid;     /* request;response */
++	nfs4_clientid		clid;     /* response */
++	u32			access;    /* response */
++	u32			stid_gen;    /* response */
++	u32			verifier[2]; /* response */
++};
++
++/*
++ * pNFS export operations vector.
++ *
++ * The filesystem must implement the following methods:
++ *   layout_type
++ *   get_device_info
++ *   layout_get
++ *
++ * All other methods are optional and can be set to NULL if not implemented.
++ */
++struct pnfs_export_operations {
++	/* Returns the supported pnfs_layouttype4. */
++	int (*layout_type) (struct super_block *);
++
++	/* Encode device info onto the xdr stream. */
++	int (*get_device_info) (struct super_block *,
++				struct exp_xdr_stream *,
++				u32 layout_type,
++				const struct nfsd4_pnfs_deviceid *);
++
++	/* Retrieve all available devices via an iterator.
++	 * arg->cookie == 0 indicates the beginning of the list,
++	 * otherwise arg->verf is used to verify that the list hasn't changed
++	 * while retrieved.
++	 *
++	 * On output, the filesystem sets the devid based on the current cookie
++	 * and sets res->cookie and res->verf corresponding to the next entry.
++	 * When the last entry in the list is retrieved, res->eof is set to 1.
++	 */
++	int (*get_device_iter) (struct super_block *,
++				u32 layout_type,
++				struct nfsd4_pnfs_dev_iter_res *);
++
++	int (*set_device_notify) (struct super_block *,
++				  struct pnfs_devnotify_arg *);
++
++	/* Retrieve and encode a layout for inode onto the xdr stream.
++	 * arg->minlength is the minimum number of accessible bytes required
++	 *   by the client.
++	 * The maximum number of bytes to encode the layout is given by
++	 *   the xdr stream end pointer.
++	 * arg->fsid contains the major part of struct pnfs_deviceid.
++	 *   The file system uses this to build the deviceid returned
++	 *   in the layout.
++	 * res->seg - layout segment requested and layout info returned.
++	 * res->fh can be modified the file handle for use on data servers
++	 * res->return_on_close - true if layout to be returned on file close
++	 *
++	 * return one of the following nfs errors:
++	 * NFS_OK			Success
++	 * NFS4ERR_ACCESS		Permission error
++	 * NFS4ERR_BADIOMODE		Server does not support requested iomode
++	 * NFS4ERR_BADLAYOUT		No layout matching loga_minlength rules
++	 * NFS4ERR_INVAL		Parameter other than layout is invalid
++	 * NFS4ERR_IO			I/O error
++	 * NFS4ERR_LAYOUTTRYLATER	Layout may be retrieved later
++	 * NFS4ERR_LAYOUTUNAVAILABLE	Layout unavailable for this file
++	 * NFS4ERR_LOCKED		Lock conflict
++	 * NFS4ERR_NOSPC		Out-of-space error occured
++	 * NFS4ERR_RECALLCONFLICT	Layout currently unavialable due to
++	 *				a conflicting CB_LAYOUTRECALL
++	 * NFS4ERR_SERVERFAULT		Server went bezerk
++	 * NFS4ERR_TOOSMALL		loga_maxcount too small to fit layout
++	 * NFS4ERR_WRONG_TYPE		Wrong file type (not a regular file)
++	 */
++	enum nfsstat4 (*layout_get) (struct inode *,
++				     struct exp_xdr_stream *xdr,
++				     const struct nfsd4_pnfs_layoutget_arg *,
++				     struct nfsd4_pnfs_layoutget_res *);
++
++	/* Commit changes to layout */
++	int (*layout_commit) (struct inode *,
++			      const struct nfsd4_pnfs_layoutcommit_arg *,
++			      struct nfsd4_pnfs_layoutcommit_res *);
++
++	/* Returns the layout */
++	int (*layout_return) (struct inode *,
++			      const struct nfsd4_pnfs_layoutreturn_arg *);
++
++	/* Can layout segments be merged for this layout type? */
++	int (*can_merge_layouts) (u32 layout_type);
++
++	/* pNFS Files layout specific operations */
++
++	/* Get the write verifier for DS (called on MDS only) */
++	void (*get_verifier) (struct super_block *, u32 *p);
++	/* Call fs on DS only */
++	int (*get_state) (struct inode *, struct knfsd_fh *,
++			  struct pnfs_get_state *);
++};
++
++struct nfsd4_pnfs_cb_layout {
++	u32			cbl_recall_type;	/* request */
++	struct nfsd4_layout_seg cbl_seg;		/* request */
++	u32			cbl_layoutchanged;	/* request */
++	nfs4_stateid		cbl_sid;		/* request */
++	struct nfs4_fsid	cbl_fsid;
++	void			*cbl_cookie;		/* fs private */
++};
++
++/* layoutrecall request (from exported filesystem) */
++struct nfs4_layoutrecall {
++	struct kref			clr_ref;
++	struct nfsd4_pnfs_cb_layout	cb;	/* request */
++	struct list_head		clr_perclnt; /* on cl_layoutrecalls */
++	struct nfs4_client	       *clr_client;
++	struct nfs4_file	       *clr_file;
++	struct timespec			clr_time;	/* last activity */
++	struct super_block 		*clr_sb; /* We might not have a file */
++	struct nfs4_layoutrecall	*parent; /* The initiating recall */
++
++	void				*clr_args;	/* nfsd internal */
++};
++
++struct nfsd4_pnfs_cb_dev_item {
++	u32			cbd_notify_type;	/* request */
++	u32			cbd_layout_type;	/* request */
++	struct nfsd4_pnfs_deviceid cbd_devid;		/* request */
++	u32			cbd_immediate;		/* request */
++};
++
++struct nfsd4_pnfs_cb_dev_list {
++	u32				cbd_len;  /* request */
++	struct nfsd4_pnfs_cb_dev_item  *cbd_list; /* request */
++};
++
++/*
++ * callbacks provided by the nfsd
++ */
++struct pnfsd_cb_operations {
++	/* Generic callbacks */
++	int (*cb_layout_recall) (struct super_block *, struct inode *,
++				 struct nfsd4_pnfs_cb_layout *);
++	int (*cb_device_notify) (struct super_block *,
++				 struct nfsd4_pnfs_cb_dev_list *);
++
++	/* pNFS Files layout specific callbacks */
++
++	/* Callback from fs on MDS only */
++	int (*cb_get_state) (struct super_block *, struct pnfs_get_state *);
++	/* Callback from fs on DS only */
++	int (*cb_change_state) (struct pnfs_get_state *);
++};
++
++#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
+--- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-31 20:42:05.594107962 -0400
+@@ -29,6 +29,7 @@
+ /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
+ #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
+ #define	NFSCTL_GETFS		8	/* get an fh by path with max FH len */
++#define	NFSCTL_FD2FH		9	/* get a fh from a fd */
+ 
+ /* SVC */
+ struct nfsctl_svc {
+@@ -71,6 +72,11 @@ struct nfsctl_fsparm {
+ 	int			gd_maxlen;
+ };
+ 
++/* FD2FH */
++struct nfsctl_fd2fh {
++	int			fd;
++};
++
+ /*
+  * This is the argument union.
+  */
+@@ -82,6 +88,7 @@ struct nfsctl_arg {
+ 		struct nfsctl_export	u_export;
+ 		struct nfsctl_fdparm	u_getfd;
+ 		struct nfsctl_fsparm	u_getfs;
++		struct nfsctl_fd2fh	u_fd2fh;
+ 		/*
+ 		 * The following dummy member is needed to preserve binary compatibility
+ 		 * on platforms where alignof(void*)>alignof(int).  It's needed because
+@@ -95,6 +102,7 @@ struct nfsctl_arg {
+ #define ca_export	u.u_export
+ #define ca_getfd	u.u_getfd
+ #define	ca_getfs	u.u_getfs
++#define	ca_fd2fh	u.u_fd2fh
+ };
+ 
+ union nfsctl_res {
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-31 20:41:19.168160480 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-31 20:42:05.584098019 -0400
+@@ -72,13 +72,20 @@ struct nfs_access_entry {
+ 	int			mask;
+ };
+ 
++struct nfs_lock_context {
++	atomic_t count;
++	struct list_head list;
++	struct nfs_open_context *open_context;
++	fl_owner_t lockowner;
++	pid_t pid;
++};
++
+ struct nfs4_state;
+ struct nfs_open_context {
+-	atomic_t count;
++	struct nfs_lock_context lock_context;
+ 	struct path path;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+-	fl_owner_t lockowner;
+ 	fmode_t mode;
+ 
+ 	unsigned long flags;
+@@ -97,6 +104,27 @@ struct nfs_delegation;
+ 
+ struct posix_acl;
+ 
++struct pnfs_layout_hdr {
++	int			refcount;
++	struct list_head	layouts;   /* other client layouts */
++	struct list_head	segs;      /* layout segments list */
++	int			roc_iomode;/* return on close iomode, 0=none */
++	seqlock_t		seqlock;   /* Protects the stateid */
++	nfs4_stateid		stateid;
++	unsigned long		state;
++#define NFS_INO_RO_LAYOUT_FAILED 0         /* ro layoutget failed stop trying */
++#define NFS_INO_RW_LAYOUT_FAILED 1         /* rw layoutget failed stop trying */
++#define NFS_INO_LAYOUTCOMMIT     2         /* LAYOUTCOMMIT needed */
++
++	struct rpc_cred		*cred;     /* layoutcommit credential */
++	/* DH: These vars keep track of the maximum write range
++	 * so the values can be used for layoutcommit.
++	 */
++	loff_t			write_begin_pos;
++	loff_t			write_end_pos;
++	struct inode		*inode;
++};
++
+ /*
+  * nfs fs inode data in memory
+  */
+@@ -181,6 +209,13 @@ struct nfs_inode {
+ 	struct nfs_delegation	*delegation;
+ 	fmode_t			 delegation_state;
+ 	struct rw_semaphore	rwsem;
++
++	/* pNFS layout information */
++#if defined(CONFIG_NFS_V4_1)
++	wait_queue_head_t lo_waitq;
++	struct pnfs_layout_hdr *layout;
++	time_t pnfs_layout_suspend;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif /* CONFIG_NFS_V4*/
+ #ifdef CONFIG_NFS_FSCACHE
+ 	struct fscache_cookie	*fscache;
+@@ -353,6 +388,8 @@ extern void nfs_setattr_update_inode(str
+ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
+ extern void put_nfs_open_context(struct nfs_open_context *ctx);
+ extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
++extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
++extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
+@@ -481,8 +518,12 @@ extern void nfs_unblock_sillyrename(stru
+ extern int  nfs_congestion_kb;
+ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
+ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
+-extern int  nfs_flush_incompatible(struct file *file, struct page *page);
+-extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
++struct pnfs_layout_segment;
++extern int  nfs_flush_incompatible(struct file *file, struct page *page,
++				   struct pnfs_layout_segment *lseg);
++extern int  nfs_updatepage(struct file *, struct page *,
++			   unsigned int offset, unsigned int count,
++			   struct pnfs_layout_segment *lseg, void *fsdata);
+ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+ 
+ /*
+@@ -604,6 +645,8 @@ extern void * nfs_root_data(void);
+ #define NFSDBG_CLIENT		0x0200
+ #define NFSDBG_MOUNT		0x0400
+ #define NFSDBG_FSCACHE		0x0800
++#define NFSDBG_PNFS		0x1000
++#define NFSDBG_PNFS_LD		0x2000
+ #define NFSDBG_ALL		0xFFFF
+ 
+ #ifdef __KERNEL__
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-31 20:41:19.168160480 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-31 20:42:05.586087719 -0400
+@@ -15,6 +15,7 @@ struct nlm_host;
+ struct nfs4_sequence_args;
+ struct nfs4_sequence_res;
+ struct nfs_server;
++struct nfs4_minor_version_ops;
+ 
+ /*
+  * The nfs_client identifies our client state to the server.
+@@ -70,11 +71,7 @@ struct nfs_client {
+ 	 */
+ 	char			cl_ipaddr[48];
+ 	unsigned char		cl_id_uniquifier;
+-	int		     (* cl_call_sync)(struct nfs_server *server,
+-					      struct rpc_message *msg,
+-					      struct nfs4_sequence_args *args,
+-					      struct nfs4_sequence_res *res,
+-					      int cache_reply);
++	const struct nfs4_minor_version_ops *cl_mvops;
+ #endif /* CONFIG_NFS_V4 */
+ 
+ #ifdef CONFIG_NFS_V4_1
+@@ -85,6 +82,8 @@ struct nfs_client {
+ 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_session	*cl_session; 	/* sharred session */
++	struct list_head	cl_layouts;
++	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ #ifdef CONFIG_NFS_FSCACHE
+@@ -92,6 +91,16 @@ struct nfs_client {
+ #endif
+ };
+ 
++static inline bool
++is_ds_only_client(struct nfs_client *clp)
++{
++#ifdef CONFIG_NFS_V4_1
++	return is_ds_only_session(clp->cl_exchange_flags);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * NFS client parameters stored in the superblock.
+  */
+@@ -136,7 +145,7 @@ struct nfs_server {
+ #endif
+ 
+ #ifdef CONFIG_NFS_V4
+-	u32			attr_bitmask[2];/* V4 bitmask representing the set
++	u32			attr_bitmask[3];/* V4 bitmask representing the set
+ 						   of attributes supported on this
+ 						   filesystem */
+ 	u32			cache_consistency_bitmask[2];
+@@ -148,6 +157,15 @@ struct nfs_server {
+ 						   that are supported on this
+ 						   filesystem */
+ #endif
++
++#ifdef CONFIG_NFS_V4_1
++	u32				pnfs_blksize; /* layout_blksize attr */
++	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
++	void			       *pnfs_ld_data; /* Per-mount data */
++	unsigned int			ds_rsize;  /* Data server read size */
++	unsigned int			ds_wsize;  /* Data server write size */
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	void (*destroy)(struct nfs_server *);
+ 
+ 	atomic_t active; /* Keep trace of any activity to this server */
+diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
+--- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-31 20:42:05.587097913 -0400
+@@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
+ 	NFSIOS_SHORTREAD,
+ 	NFSIOS_SHORTWRITE,
+ 	NFSIOS_DELAY,
++	NFSIOS_PNFS_READ,
++	NFSIOS_PNFS_WRITE,
++	NFSIOS_PNFS_COMMIT,
+ 	__NFSIOS_COUNTSMAX,
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
+--- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-31 20:42:05.588097898 -0400
+@@ -39,6 +39,7 @@ struct nfs_page {
+ 	struct list_head	wb_list;	/* Defines state of page: */
+ 	struct page		*wb_page;	/* page to read in/write out */
+ 	struct nfs_open_context	*wb_context;	/* File state context info */
++	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
+ 	atomic_t		wb_complete;	/* i/os we're waiting for */
+ 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
+ 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+@@ -47,6 +48,7 @@ struct nfs_page {
+ 	struct kref		wb_kref;	/* reference count */
+ 	unsigned long		wb_flags;
+ 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
++	struct pnfs_layout_segment *wb_lseg;	/* Pnfs layout info */
+ };
+ 
+ struct nfs_pageio_descriptor {
+@@ -60,6 +62,12 @@ struct nfs_pageio_descriptor {
+ 	int			(*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
+ 	int 			pg_ioflags;
+ 	int			pg_error;
++	struct pnfs_layout_segment *pg_lseg;
++#ifdef CONFIG_NFS_V4_1
++	int			pg_iswrite;
++	int			pg_boundary;
++	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
+@@ -68,13 +76,15 @@ extern	struct nfs_page *nfs_create_reque
+ 					    struct inode *inode,
+ 					    struct page *page,
+ 					    unsigned int offset,
+-					    unsigned int count);
++					    unsigned int count,
++					    struct pnfs_layout_segment *lseg);
+ extern	void nfs_clear_request(struct nfs_page *req);
+ extern	void nfs_release_request(struct nfs_page *req);
+ 
+ 
+ extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+-			  pgoff_t idx_start, unsigned int npages, int tag);
++			  pgoff_t idx_start, unsigned int npages, int tag,
++			  int *use_pnfs);
+ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ 			     struct inode *inode,
+ 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-31 20:41:19.169171911 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-31 20:42:05.590087729 -0400
+@@ -3,6 +3,8 @@
+ 
+ #include <linux/nfsacl.h>
+ #include <linux/nfs3.h>
++#include <linux/nfs4.h>
++#include <linux/sunrpc/sched.h>
+ 
+ /*
+  * To change the maximum rsize and wsize supported by the NFS client, adjust
+@@ -10,7 +12,7 @@
+  * support a megabyte or more.  The default is left at 4096 bytes, which is
+  * reasonable for NFS over UDP.
+  */
+-#define NFS_MAX_FILE_IO_SIZE	(1048576U)
++#define NFS_MAX_FILE_IO_SIZE	(4U * 1048576U)
+ #define NFS_DEF_FILE_IO_SIZE	(4096U)
+ #define NFS_MIN_FILE_IO_SIZE	(1024U)
+ 
+@@ -113,6 +115,10 @@ struct nfs_fsinfo {
+ 	__u32			dtpref;	/* pref. readdir transfer size */
+ 	__u64			maxfilesize;
+ 	__u32			lease_time; /* in seconds */
++#if defined(CONFIG_NFS_V4_1)
++	__u32			layouttype; /* supported pnfs layout driver */
++	__u32			blksize; /* preferred pnfs io block size */
++#endif
+ };
+ 
+ struct nfs_fsstat {
+@@ -185,6 +191,125 @@ struct nfs4_get_lease_time_res {
+ 	struct nfs4_sequence_res	lr_seq_res;
+ };
+ 
++#define PNFS_LAYOUT_MAXSIZE 4096
++#define NFS4_PNFS_DEVICEID4_SIZE 16
++
++struct pnfs_deviceid {
++	char data[NFS4_PNFS_DEVICEID4_SIZE];
++};
++
++struct nfs4_layoutdriver_data {
++	__u32 len;
++	void *buf;
++};
++
++struct pnfs_layout_range {
++	u32 iomode;
++	u64 offset;
++	u64 length;
++};
++
++struct nfs4_layoutget_args {
++	__u32 type;
++	struct pnfs_layout_range range;
++	__u64 minlength;
++	__u32 maxcount;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutget_res {
++	__u32 return_on_close;
++	struct pnfs_layout_range range;
++	__u32 type;
++	nfs4_stateid stateid;
++	struct nfs4_layoutdriver_data layout;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_layoutget {
++	struct nfs4_layoutget_args args;
++	struct nfs4_layoutget_res res;
++	struct pnfs_layout_segment **lsegpp;
++	int status;
++};
++
++struct nfs4_layoutcommit_args {
++	nfs4_stateid stateid;
++	__u64 lastbytewritten;
++	__u32 time_modify_changed;
++	struct timespec time_modify;
++	const u32 *bitmask;
++	struct nfs_fh *fh;
++	struct inode *inode;
++
++	/* Values set by layout driver */
++	struct pnfs_layout_range range;
++	__u32 layout_type;
++	void *layoutdriver_data;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutcommit_res {
++	__u32 sizechanged;
++	__u64 newsize;
++	struct nfs_fattr *fattr;
++	const struct nfs_server *server;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_layoutcommit_data {
++	struct rpc_task task;
++	struct rpc_cred *cred;
++	struct nfs_fattr fattr;
++	struct nfs4_layoutcommit_args args;
++	struct nfs4_layoutcommit_res res;
++	int status;
++};
++
++struct nfs4_layoutreturn_args {
++	__u32   reclaim;
++	__u32   layout_type;
++	__u32   return_type;
++	struct pnfs_layout_range range;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutreturn_res {
++	struct nfs4_sequence_res seq_res;
++	u32 lrs_present;
++	nfs4_stateid stateid;
++};
++
++struct nfs4_layoutreturn {
++	struct nfs4_layoutreturn_args args;
++	struct nfs4_layoutreturn_res res;
++	struct rpc_cred *cred;
++	int rpc_status;
++};
++
++struct nfs4_getdevicelist_args {
++	const struct nfs_fh *fh;
++	u32 layoutclass;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_getdevicelist_res {
++	struct pnfs_devicelist *devlist;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_getdeviceinfo_args {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_getdeviceinfo_res {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_res seq_res;
++};
++
+ /*
+  * Arguments to the open call.
+  */
+@@ -196,8 +321,10 @@ struct nfs_openargs {
+ 	__u64                   clientid;
+ 	__u64                   id;
+ 	union {
+-		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+-		nfs4_verifier   verifier; /* EXCLUSIVE */
++		struct {
++			struct iattr *  attrs;    /* UNCHECKED, GUARDED */
++			nfs4_verifier   verifier; /* EXCLUSIVE */
++		};
+ 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
+ 		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
+ 	} u;
+@@ -313,6 +440,10 @@ struct nfs_lockt_res {
+ 	struct nfs4_sequence_res	seq_res;
+ };
+ 
++struct nfs_release_lockowner_args {
++	struct nfs_lowner	lock_owner;
++};
++
+ struct nfs4_delegreturnargs {
+ 	const struct nfs_fh *fhandle;
+ 	const nfs4_stateid *stateid;
+@@ -332,6 +463,7 @@ struct nfs4_delegreturnres {
+ struct nfs_readargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	unsigned int		pgbase;
+@@ -352,6 +484,7 @@ struct nfs_readres {
+ struct nfs_writeargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	enum nfs3_stable_how	stable;
+@@ -846,7 +979,7 @@ struct nfs4_server_caps_arg {
+ };
+ 
+ struct nfs4_server_caps_res {
+-	u32				attr_bitmask[2];
++	u32				attr_bitmask[3];
+ 	u32				acl_bitmask;
+ 	u32				has_links;
+ 	u32				has_symlinks;
+@@ -961,6 +1094,27 @@ struct nfs_page;
+ 
+ #define NFS_PAGEVEC_SIZE	(8U)
+ 
++#if defined(CONFIG_NFS_V4_1)
++/* pnfsflag values */
++#define PNFS_NO_RPC		0x0001   /* non rpc result callback switch */
++
++/* pnfs-specific data needed for read, write, and commit calls */
++struct pnfs_call_data {
++	struct pnfs_layout_segment *lseg;
++	const struct rpc_call_ops *call_ops;
++	u32			orig_count;	/* for retry via MDS */
++	int			pnfs_error;
++	u8			pnfsflags;
++	u8			how;		/* for FLUSH_STABLE */
++};
++
++/* files layout-type specific data for read, write, and commit */
++struct pnfs_fl_call_data {
++	struct nfs_client	*ds_nfs_client;
++	__u64			orig_offset;
++};
++#endif /* CONFIG_NFS_V4_1 */
++
+ struct nfs_read_data {
+ 	int			flags;
+ 	struct rpc_task		task;
+@@ -976,10 +1130,16 @@ struct nfs_read_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+ struct nfs_write_data {
++	struct kref		refcount;	/* For pnfs commit splitting */
++	struct nfs_write_data	*parent;	/* For pnfs commit splitting */
+ 	int			flags;
+ 	struct rpc_task		task;
+ 	struct inode		*inode;
+@@ -995,6 +1155,10 @@ struct nfs_write_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+@@ -1008,6 +1172,7 @@ struct nfs_rpc_ops {
+ 	const struct dentry_operations *dentry_ops;
+ 	const struct inode_operations *dir_inode_ops;
+ 	const struct inode_operations *file_inode_ops;
++	const struct file_operations *file_ops;
+ 
+ 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
+ 			    struct nfs_fsinfo *);
+@@ -1072,6 +1237,7 @@ struct nfs_rpc_ops {
+ extern const struct nfs_rpc_ops	nfs_v2_clientops;
+ extern const struct nfs_rpc_ops	nfs_v3_clientops;
+ extern const struct nfs_rpc_ops	nfs_v4_clientops;
++extern const struct nfs_rpc_ops	pnfs_v4_clientops;
+ extern struct rpc_version	nfs_version2;
+ extern struct rpc_version	nfs_version3;
+ extern struct rpc_version	nfs_version4;
+diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-31 20:42:05.598087997 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-31 20:42:05.599087710 -0400
+@@ -0,0 +1,57 @@
++#ifndef _PANFS_SHIM_API_H
++#define _PANFS_SHIM_API_H
++
++/*
++ * imported panfs functions
++ */
++struct panfs_export_operations {
++	int (*convert_rc)(pan_status_t rc);
++
++	int (*sm_sec_t_get_size_otw)(
++		pan_sm_sec_otw_t *var,
++		pan_size_t *core_sizep,
++		pan_size_t *wire_size,
++		void *buf_end);
++
++	int (*sm_sec_t_unmarshall)(
++		pan_sm_sec_otw_t *in,
++		pan_sm_sec_t *out,
++		void *buf,
++		pan_size_t size,
++		pan_size_t *otw_consumed,
++		pan_size_t *in_core_consumed);
++
++	int (*ucreds_get)(void **ucreds_pp);
++
++	void (*ucreds_put)(void *ucreds);
++
++	int (*sam_read)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_read_args_t    *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_read_cb_t       closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_read_res_t     *res_p);
++
++	int (*sam_write)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_write_args_t   *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_write_cb_t      closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_write_res_t    *res_p);
++};
++
++extern int
++panfs_shim_register(struct panfs_export_operations *ops);
++
++extern int
++panfs_shim_unregister(void);
++
++#endif /* _PANFS_SHIM_API_H */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-31 20:42:05.600025088 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-31 20:42:05.600025088 -0400
+@@ -0,0 +1,439 @@
++/*
++ *  pnfs_osd_xdr.h
++ *
++ *  pNFS-osd on-the-wire data structures
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#ifndef __PNFS_OSD_XDR_H__
++#define __PNFS_OSD_XDR_H__
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/exp_xdr.h>
++#include <scsi/osd_protocol.h>
++
++#define PNFS_OSD_OSDNAME_MAXSIZE 256
++
++/*
++ * START OF "GENERIC" DECODE ROUTINES.
++ *   These may look a little ugly since they are imported from a "generic"
++ * set of XDR encode/decode routines which are intended to be shared by
++ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
++ *
++ * If the pain of reading these is too great, it should be a straightforward
++ * task to translate them into Linux-specific versions which are more
++ * consistent with the style used in NFSv2/v3...
++ */
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {			\
++	(x) = (u64)ntohl(*p++) << 32;		\
++	(x) |= ntohl(*p++);			\
++} while (0)
++#define COPYMEM(x, nbytes) do {			\
++	memcpy((x), p, nbytes);			\
++	p += XDR_QUADLEN(nbytes);		\
++} while (0)
++
++/*
++ * draft-ietf-nfsv4-minorversion-22
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/* Layout Structure */
++
++enum pnfs_osd_raid_algorithm4 {
++	PNFS_OSD_RAID_0		= 1,
++	PNFS_OSD_RAID_4		= 2,
++	PNFS_OSD_RAID_5		= 3,
++	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
++};
++
++/*   struct pnfs_osd_data_map4 {
++ *       uint32_t                    odm_num_comps;
++ *       length4                     odm_stripe_unit;
++ *       uint32_t                    odm_group_width;
++ *       uint32_t                    odm_group_depth;
++ *       uint32_t                    odm_mirror_cnt;
++ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
++ *   };
++ */
++struct pnfs_osd_data_map {
++	u32	odm_num_comps;
++	u64	odm_stripe_unit;
++	u32	odm_group_width;
++	u32	odm_group_depth;
++	u32	odm_mirror_cnt;
++	u32	odm_raid_algorithm;
++};
++
++static inline int
++pnfs_osd_data_map_xdr_sz(void)
++{
++	return 1 + 2 + 1 + 1 + 1 + 1;
++}
++
++static inline size_t
++pnfs_osd_data_map_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_data_map);
++}
++
++/*   struct pnfs_osd_objid4 {
++ *       deviceid4       oid_device_id;
++ *       uint64_t        oid_partition_id;
++ *       uint64_t        oid_object_id;
++ *   };
++ */
++struct pnfs_osd_objid {
++	struct pnfs_deviceid	oid_device_id;
++	u64			oid_partition_id;
++	u64			oid_object_id;
++};
++
++/* For printout. I use "dev(%llx:%llx)", _DEVID_LO(), _DEVID_HI BE style */
++#define _DEVID_LO(oid_device_id) \
++	(unsigned long long)be64_to_cpup((__be64 *)oid_device_id.data)
++
++#define _DEVID_HI(oid_device_id) \
++	(unsigned long long)be64_to_cpup(((__be64 *)oid_device_id.data) + 1)
++
++static inline int
++pnfs_osd_objid_xdr_sz(void)
++{
++	return (NFS4_PNFS_DEVICEID4_SIZE / 4) + 2 + 2;
++}
++
++static inline size_t
++pnfs_osd_objid_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_objid);
++}
++
++enum pnfs_osd_version {
++	PNFS_OSD_MISSING              = 0,
++	PNFS_OSD_VERSION_1            = 1,
++	PNFS_OSD_VERSION_2            = 2
++};
++
++struct pnfs_osd_opaque_cred {
++	u32 cred_len;
++	u8 *cred;
++};
++
++static inline int
++pnfs_osd_opaque_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	READ32(n);
++	p += XDR_QUADLEN(n);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_opaque_cred_incore_sz(u32 *p)
++{
++	u32 n;
++
++	READ32(n);
++	return XDR_QUADLEN(n) * 4;
++}
++
++enum pnfs_osd_cap_key_sec {
++	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
++	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
++};
++
++/*   struct pnfs_osd_object_cred4 {
++ *       pnfs_osd_objid4         oc_object_id;
++ *       pnfs_osd_version4       oc_osd_version;
++ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
++ *       opaque                  oc_capability_key<>;
++ *       opaque                  oc_capability<>;
++ *   };
++ */
++struct pnfs_osd_object_cred {
++	struct pnfs_osd_objid		oc_object_id;
++	u32				oc_osd_version;
++	u32				oc_cap_key_sec;
++	struct pnfs_osd_opaque_cred	oc_cap_key;
++	struct pnfs_osd_opaque_cred	oc_cap;
++};
++
++static inline int
++pnfs_osd_object_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_object_cred_incore_sz(u32 *p)
++{
++	size_t sz = sizeof(struct pnfs_osd_object_cred);
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	return sz;
++}
++
++/*   struct pnfs_osd_layout4 {
++ *       pnfs_osd_data_map4      olo_map;
++ *       uint32_t                olo_comps_index;
++ *       pnfs_osd_object_cred4   olo_components<>;
++ *   };
++ */
++struct pnfs_osd_layout {
++	struct pnfs_osd_data_map	olo_map;
++	u32				olo_comps_index;
++	u32				olo_num_comps;
++	struct pnfs_osd_object_cred	*olo_comps;
++};
++
++static inline int
++pnfs_osd_layout_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	while ((int)(n--) > 0)
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_layout_incore_sz(u32 *p)
++{
++	u32 n;
++	size_t sz;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	sz = sizeof(struct pnfs_osd_layout);
++	while ((int)(n--) > 0) {
++		sz += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++	return sz;
++}
++
++/* Device Address */
++
++enum pnfs_osd_targetid_type {
++	OBJ_TARGET_ANON = 1,
++	OBJ_TARGET_SCSI_NAME = 2,
++	OBJ_TARGET_SCSI_DEVICE_ID = 3,
++};
++
++/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
++ *       case OBJ_TARGET_SCSI_NAME:
++ *           string              oti_scsi_name<>;
++ *
++ *       case OBJ_TARGET_SCSI_DEVICE_ID:
++ *           opaque              oti_scsi_device_id<>;
++ *
++ *       default:
++ *           void;
++ *   };
++ *
++ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
++ *       case TRUE:
++ *           netaddr4            ota_netaddr;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_deviceaddr4 {
++ *       pnfs_osd_targetid4      oda_targetid;
++ *       pnfs_osd_targetaddr4    oda_targetaddr;
++ *       uint64_t                oda_lun;
++ *       opaque                  oda_systemid<>;
++ *       pnfs_osd_object_cred4   oda_root_obj_cred;
++ *       opaque                  oda_osdname<>;
++ *   };
++ */
++struct pnfs_osd_targetid {
++	u32				oti_type;
++	struct nfs4_string		oti_scsi_device_id;
++};
++
++enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
++
++/*   struct netaddr4 {
++ *       // see struct rpcb in RFC1833
++ *       string r_netid<>;    // network id
++ *       string r_addr<>;     // universal address
++ *   };
++ */
++struct pnfs_osd_net_addr {
++	struct nfs4_string	r_netid;
++	struct nfs4_string	r_addr;
++};
++
++struct pnfs_osd_targetaddr {
++	u32				ota_available;
++	struct pnfs_osd_net_addr	ota_netaddr;
++};
++
++enum {
++	NETWORK_ID_MAX = 16 / 4,
++	UNIVERSAL_ADDRESS_MAX = 64 / 4,
++	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
++};
++
++struct pnfs_osd_deviceaddr {
++	struct pnfs_osd_targetid	oda_targetid;
++	struct pnfs_osd_targetaddr	oda_targetaddr;
++	u8				oda_lun[8];
++	struct nfs4_string		oda_systemid;
++	struct pnfs_osd_object_cred	oda_root_obj_cred;
++	struct nfs4_string		oda_osdname;
++};
++
++enum {
++	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
++	PNFS_OSD_DEVICEADDR_MAX =
++		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
++		2 /*oda_lun*/ +
++		1 + OSD_SYSTEMID_LEN +
++		1 + ODA_OSDNAME_MAX,
++};
++
++/* LAYOUTCOMMIT: layoutupdate */
++
++/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
++ *       case TRUE:
++ *           int64_t     dsu_delta;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_layoutupdate4 {
++ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
++ *       bool                        olu_ioerr_flag;
++ *   };
++ */
++struct pnfs_osd_layoutupdate {
++	u32	dsu_valid;
++	s64	dsu_delta;
++	u32	olu_ioerr_flag;
++};
++
++/* LAYOUTRETURN: I/O Rrror Report */
++
++enum pnfs_osd_errno {
++	PNFS_OSD_ERR_EIO		= 1,
++	PNFS_OSD_ERR_NOT_FOUND		= 2,
++	PNFS_OSD_ERR_NO_SPACE		= 3,
++	PNFS_OSD_ERR_BAD_CRED		= 4,
++	PNFS_OSD_ERR_NO_ACCESS		= 5,
++	PNFS_OSD_ERR_UNREACHABLE	= 6,
++	PNFS_OSD_ERR_RESOURCE		= 7
++};
++
++/*   struct pnfs_osd_ioerr4 {
++ *       pnfs_osd_objid4     oer_component;
++ *       length4             oer_comp_offset;
++ *       length4             oer_comp_length;
++ *       bool                oer_iswrite;
++ *       pnfs_osd_errno4     oer_errno;
++ *   };
++ */
++struct pnfs_osd_ioerr {
++	struct pnfs_osd_objid	oer_component;
++	u64			oer_comp_offset;
++	u64			oer_comp_length;
++	u32			oer_iswrite;
++	u32			oer_errno;
++};
++
++static inline unsigned
++pnfs_osd_ioerr_xdr_sz(void)
++{
++	return pnfs_osd_objid_xdr_sz() + 2 + 2 + 1 + 1;
++}
++
++/* OSD XDR API */
++
++/* Layout helpers */
++extern struct pnfs_osd_layout *pnfs_osd_xdr_decode_layout(
++	struct pnfs_osd_layout *layout, u32 *p);
++
++extern int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *layout);
++
++/* Device Info helpers */
++
++/* First pass calculate total size for space needed */
++extern size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p);
++
++/* Note: some strings pointed to inside @deviceaddr might point
++ * to space inside @p. @p should stay valid while @deviceaddr
++ * is in use.
++ * It is assumed that @deviceaddr points to bigger memory of size
++ * calculated in first pass by pnfs_osd_xdr_deviceaddr_incore_sz()
++ */
++extern void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p);
++
++/* For Servers */
++extern int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr);
++
++/* layoutupdate (layout_commit) xdr helpers */
++extern int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou);
++extern __be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p);
++
++/* osd_ioerror encoding/decoding (layout_return) */
++extern int
++pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr, struct pnfs_osd_ioerr *ioerr);
++extern __be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p);
++
++#endif /* __PNFS_OSD_XDR_H__ */
+diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
+--- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-31 20:42:05.601087875 -0400
+@@ -8,6 +8,7 @@
+ #ifndef __LINUX_POSIX_ACL_H
+ #define __LINUX_POSIX_ACL_H
+ 
++#include <linux/fs.h>
+ #include <linux/slab.h>
+ 
+ #define ACL_UNDEFINED_ID	(-1)
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-31 20:42:05.602100892 -0400
+@@ -14,6 +14,8 @@
+ /* size of an XDR encoding unit in bytes, i.e. 32bit */
+ #define XDR_UNIT	(4)
+ 
++#include <linux/types.h>
++
+ /* spec defines authentication flavor as an unsigned 32 bit integer */
+ typedef u32	rpc_authflavor_t;
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-31 20:42:05.603108001 -0400
+@@ -3,6 +3,7 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/fs.h>
+ #include <linux/workqueue.h>
+ 
+ struct rpc_pipe_msg {
+@@ -11,6 +12,10 @@ struct rpc_pipe_msg {
+ 	size_t len;
+ 	size_t copied;
+ 	int errno;
++#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
++#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
++#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
++	u8 flags;
+ };
+ 
+ struct rpc_pipe_ops {
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-31 20:42:05.603108001 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-31 20:42:05.603108001 -0400
+@@ -0,0 +1,111 @@
++/*
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#ifndef _SIMPLE_RPC_PIPEFS_H_
++#define _SIMPLE_RPC_PIPEFS_H_
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++
++
++#define payload_of(headerp)  ((void *)(headerp + 1))
++
++/*
++ * struct pipefs_hdr -- the generic message format for simple_rpc_pipefs.
++ * Messages may simply be the header itself, although having an optional
++ * data payload follow the header allows much more flexibility.
++ *
++ * Messages are created using pipefs_alloc_init_msg() and
++ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
++ * (optional) data payload.
++ *
++ * Given a struct pipefs_hdr *msg that has a struct foo payload, the data
++ * can be accessed using: struct foo *foop = payload_of(msg)
++ */
++struct pipefs_hdr {
++	u32 msgid;
++	u8  type;
++	u8  flags;
++	u16 totallen; /* length of entire message, including hdr itself */
++	u32 status;
++};
++
++/*
++ * struct pipefs_list -- a type of list used for tracking callers who've made an
++ * upcall and are blocked waiting for a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
++ */
++struct pipefs_list {
++	struct list_head list;
++	spinlock_t list_lock;
++};
++
++
++/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
++extern struct dentry *pipefs_mkpipe(const char *name,
++				    const struct rpc_pipe_ops *ops,
++				    int wait_for_open);
++extern void pipefs_closepipe(struct dentry *pipe);
++extern void pipefs_init_list(struct pipefs_list *list);
++extern struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++						void *data, u16 datalen);
++extern struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
++						       u8 flags, void *data,
++						       u16 datalen, u16 padlen);
++extern struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++							struct pipefs_hdr *msg,
++							struct pipefs_list
++							*uplist, u8 upflags,
++							u32 timeout);
++extern int pipefs_queue_upcall_noreply(struct dentry *pipe,
++				       struct pipefs_hdr *msg, u8 upflags);
++extern int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++				      struct pipefs_list *uplist);
++extern struct pipefs_hdr *pipefs_readmsg(struct file *filp,
++					 const char __user *src, size_t len);
++extern ssize_t pipefs_generic_upcall(struct file *filp,
++				     struct rpc_pipe_msg *rpcmsg,
++				     char __user *dst, size_t buflen);
++extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
++
++#endif /* _SIMPLE_RPC_PIPEFS_H_ */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-31 20:42:05.604049784 -0400
+@@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
+ 
+ 	return buf;
+ }
++
++/*
++ * Print a network address in a universal format (see rfc1833 and nfsv4.1)
++ */
++static inline int __svc_print_netaddr(struct sockaddr *addr,
++				      struct xdr_netobj *na)
++{
++	u16 port;
++	ssize_t len;
++
++	switch (addr->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *sin = (struct sockaddr_in *)addr;
++		port = ntohs(sin->sin_port);
++
++		len = snprintf(na->data, na->len, "%pI4.%u.%u",
++				&sin->sin_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
++		port = ntohs(sin6->sin6_port);
++
++		len = snprintf(na->data, na->len, "%pI6.%u.%u",
++				&sin6->sin6_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	default:
++		snprintf(na->data, na->len, "unknown address type: %d",
++			 addr->sa_family);
++		len = -EINVAL;
++		break;
++	}
++	return len;
++}
+ #endif /* SUNRPC_SVC_XPRT_H */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-31 20:41:19.173118431 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-31 20:42:05.605107904 -0400
+@@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
+ 	return p + 2;
+ }
+ 
++static inline __be32 *
++xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
++{
++	memcpy(ptr, p, len);
++	return p + XDR_QUADLEN(len);
++}
++
+ /*
+  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
+  */
+@@ -197,6 +204,7 @@ struct xdr_stream {
+ 
+ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
++extern __be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q);
+ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
+ 		unsigned int base, unsigned int len);
+ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
+--- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-31 20:42:05.605107904 -0400
++++ linux-2.6.34.noarch/localversion-pnfs	2010-08-31 20:42:05.605107904 -0400
+@@ -0,0 +1 @@
++-pnfs
+diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-31 20:42:05.606020148 -0400
+@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
+ 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+ 	    addr.o rpcb_clnt.o timer.o xdr.o \
+ 	    sunrpc_syms.o cache.o rpc_pipe.o \
+-	    svc_xprt.o
++	    svc_xprt.o simple_rpc_pipefs.o
+ sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+ sunrpc-$(CONFIG_PROC_FS) += stats.o
+ sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-31 20:42:05.606020148 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-31 20:42:05.607108065 -0400
+@@ -0,0 +1,424 @@
++/*
++ *  net/sunrpc/simple_rpc_pipefs.c
++ *
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#include <linux/completion.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++
++/*
++ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
++ * filesystem.
++ *
++ * If @wait_for_open is non-zero and an upcall is later queued but the userland
++ * end of the pipe has not yet been opened, the upcall will remain queued until
++ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
++ */
++struct dentry *pipefs_mkpipe(const char *name, const struct rpc_pipe_ops *ops,
++			     int wait_for_open)
++{
++	struct dentry *dir, *pipe;
++	struct vfsmount *mnt;
++
++	mnt = rpc_get_mount();
++	if (IS_ERR(mnt)) {
++		pipe = ERR_CAST(mnt);
++		goto out;
++	}
++	dir = mnt->mnt_root;
++	if (!dir) {
++		pipe = ERR_PTR(-ENOENT);
++		goto out;
++	}
++	pipe = rpc_mkpipe(dir, name, NULL, ops,
++			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
++out:
++	return pipe;
++}
++EXPORT_SYMBOL(pipefs_mkpipe);
++
++/*
++ * Shutdown a pipe made by pipefs_mkpipe().
++ * XXX: do we need to retain an extra reference on the mount?
++ */
++void pipefs_closepipe(struct dentry *pipe)
++{
++	rpc_unlink(pipe);
++	rpc_put_mount();
++}
++EXPORT_SYMBOL(pipefs_closepipe);
++
++/*
++ * Initialize a struct pipefs_list -- which are a way to keep track of callers
++ * who're blocked having made an upcall and are awaiting a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
++ * to use them.
++ */
++inline void pipefs_init_list(struct pipefs_list *list)
++{
++	INIT_LIST_HEAD(&list->list);
++	spin_lock_init(&list->list_lock);
++}
++EXPORT_SYMBOL(pipefs_init_list);
++
++/*
++ * Alloc/init a generic pipefs message header and copy into its message body
++ * an arbitrary data payload.
++ *
++ * struct pipefs_hdr's are meant to serve as generic, general-purpose message
++ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
++ * struct pipefs_hdr is assigned to a struct rpc_pipe_msg and delivered
++ * therein.  --And yes, the naming can seem a little confusing at first:
++ *
++ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
++ * struct pipefs_hdr (possibly with an attached message body).  A
++ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
++ * message is delivered and processed.
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
++					   void *data, u16 datalen, u16 padlen)
++{
++	u16 totallen;
++	struct pipefs_hdr *msg = NULL;
++
++	totallen = sizeof(*msg) + datalen + padlen;
++	if (totallen > PAGE_SIZE) {
++		msg = ERR_PTR(-E2BIG);
++		goto out;
++	}
++
++	msg = kzalloc(totallen, GFP_KERNEL);
++	if (!msg) {
++		msg = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	msg->msgid = msgid;
++	msg->type = type;
++	msg->flags = flags;
++	msg->totallen = totallen;
++	memcpy(payload_of(msg), data, datalen);
++out:
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
++
++/*
++ * See the description of pipefs_alloc_init_msg_padded().
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++				    void *data, u16 datalen)
++{
++	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
++					    datalen, 0);
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg);
++
++
++static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg,
++			       struct pipefs_hdr *msg, u8 upflags)
++{
++	memset(rpcmsg, 0, sizeof(*rpcmsg));
++	rpcmsg->data = msg;
++	rpcmsg->len = msg->totallen;
++	rpcmsg->flags = upflags;
++}
++
++static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(struct pipefs_hdr *msg,
++						     u8 upflags)
++{
++	struct rpc_pipe_msg *rpcmsg;
++
++	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
++	if (!rpcmsg)
++		return ERR_PTR(-ENOMEM);
++
++	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
++	return rpcmsg;
++}
++
++
++/* represents an upcall that'll block and wait for a reply */
++struct pipefs_upcall {
++	u32 msgid;
++	struct rpc_pipe_msg rpcmsg;
++	struct list_head list;
++	wait_queue_head_t waitq;
++	struct pipefs_hdr *reply;
++};
++
++
++static void pipefs_init_upcall_waitreply(struct pipefs_upcall *upcall,
++					 struct pipefs_hdr *msg, u8 upflags)
++{
++	upcall->reply = NULL;
++	upcall->msgid = msg->msgid;
++	INIT_LIST_HEAD(&upcall->list);
++	init_waitqueue_head(&upcall->waitq);
++	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
++}
++
++static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					   struct pipefs_upcall *upcall,
++					   struct pipefs_list *uplist,
++					   u32 timeout)
++{
++	int err = 0;
++	DECLARE_WAITQUEUE(wq, current);
++
++	add_wait_queue(&upcall->waitq, &wq);
++	spin_lock(&uplist->list_lock);
++	list_add(&upcall->list, &uplist->list);
++	spin_unlock(&uplist->list_lock);
++
++	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
++	if (err < 0)
++		goto out;
++
++	if (timeout) {
++		/* retval of 0 means timer expired */
++		err = schedule_timeout_uninterruptible(timeout);
++		if (err == 0 && upcall->reply == NULL)
++			err = -ETIMEDOUT;
++	} else {
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule();
++		__set_current_state(TASK_RUNNING);
++	}
++
++out:
++	spin_lock(&uplist->list_lock);
++	list_del_init(&upcall->list);
++	spin_unlock(&uplist->list_lock);
++	remove_wait_queue(&upcall->waitq, &wq);
++	return err;
++}
++
++/*
++ * Queue a pipefs msg for an upcall to userspace, place the calling thread
++ * on @uplist, and block the thread to wait for a reply.  If @timeout is
++ * nonzero, the thread will be blocked for at most @timeout jiffies.
++ *
++ * (To convert time units into jiffies, consider the functions
++ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
++ *  timespec_to_jiffies().)
++ *
++ * Once a reply is received by your downcall handler, call
++ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
++ * assign the reply, and wake the waiting thread.
++ *
++ * This function's return value pointer may be an error and should be checked
++ * with IS_ERR() before attempting to access the reply message.
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					    struct pipefs_hdr *msg,
++					    struct pipefs_list *uplist,
++					    u8 upflags, u32 timeout)
++{
++	int err = 0;
++	struct pipefs_upcall upcall;
++
++	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
++	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
++	if (err < 0) {
++		kfree(upcall.reply);
++		upcall.reply = ERR_PTR(err);
++	}
++
++	return upcall.reply;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
++
++/*
++ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
++ * no reply is expected).
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++int pipefs_queue_upcall_noreply(struct dentry *pipe, struct pipefs_hdr *msg,
++				u8 upflags)
++{
++	int err = 0;
++	struct rpc_pipe_msg *rpcmsg;
++
++	upflags |= PIPEFS_AUTOFREE_RPCMSG;
++	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
++	if (IS_ERR(rpcmsg)) {
++		err = PTR_ERR(rpcmsg);
++		goto out;
++	}
++	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
++
++
++static struct pipefs_upcall *pipefs_find_upcall_msgid(u32 msgid,
++						 struct pipefs_list *uplist)
++{
++	struct pipefs_upcall *upcall;
++
++	spin_lock(&uplist->list_lock);
++	list_for_each_entry(upcall, &uplist->list, list)
++		if (upcall->msgid == msgid)
++			goto out;
++	upcall = NULL;
++out:
++	spin_unlock(&uplist->list_lock);
++	return upcall;
++}
++
++/*
++ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
++ * message and have determined that it is a reply to a waiting upcall,
++ * you can use this function to find the appropriate upcall, assign the result,
++ * and wake the upcall thread.
++ *
++ * The reply message must have the same msgid as the original upcall message's.
++ *
++ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
++ */
++int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++			       struct pipefs_list *uplist)
++{
++	int err = 0;
++	struct pipefs_upcall *upcall;
++
++	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
++	if (!upcall) {
++		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
++			"for msgid %d\n", __func__, reply->msgid);
++		err = -ENOENT;
++		goto out;
++	}
++	upcall->reply = reply;
++	wake_up(&upcall->waitq);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_assign_upcall_reply);
++
++/*
++ * Generic method to read-in and return a newly-allocated message which begins
++ * with a struct pipefs_hdr.
++ */
++struct pipefs_hdr *pipefs_readmsg(struct file *filp, const char __user *src,
++			     size_t len)
++{
++	int err = 0, hdrsize;
++	struct pipefs_hdr *msg = NULL;
++
++	hdrsize = sizeof(*msg);
++	if (len < hdrsize) {
++		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
++		       __func__, (int) len, hdrsize);
++		err = -EINVAL;
++		goto out;
++	}
++
++	msg = kzalloc(len, GFP_KERNEL);
++	if (!msg) {
++		err = -ENOMEM;
++		goto out;
++	}
++	if (copy_from_user(msg, src, len))
++		err = -EFAULT;
++out:
++	if (err) {
++		kfree(msg);
++		msg = ERR_PTR(err);
++	}
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_readmsg);
++
++/*
++ * Generic rpc_pipe_ops->upcall() handler implementation.
++ *
++ * Don't call this directly: to make an upcall, use
++ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
++ */
++ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
++			      char __user *dst, size_t buflen)
++{
++	char *data;
++	ssize_t len, left;
++
++	data = (char *)rpcmsg->data + rpcmsg->copied;
++	len = rpcmsg->len - rpcmsg->copied;
++	if (len > buflen)
++		len = buflen;
++
++	left = copy_to_user(dst, data, len);
++	if (left < 0) {
++		rpcmsg->errno = left;
++		return left;
++	}
++
++	len -= left;
++	rpcmsg->copied += len;
++	rpcmsg->errno = 0;
++	return len;
++}
++EXPORT_SYMBOL(pipefs_generic_upcall);
++
++/*
++ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
++ *
++ * Items are only freed if @rpcmsg->flags has been set appropriately.
++ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
++ */
++void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
++{
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
++		kfree(rpcmsg->data);
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
++		kfree(rpcmsg);
++}
++EXPORT_SYMBOL(pipefs_generic_destroy_msg);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-31 20:41:19.188144022 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-31 20:42:05.607108065 -0400
+@@ -395,24 +395,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
+ {
+ 	struct kvec *tail;
+ 	size_t copy;
+-	char *p;
+ 	unsigned int pglen = buf->page_len;
++	unsigned int tailbuf_len;
+ 
+ 	tail = buf->tail;
+ 	BUG_ON (len > pglen);
+ 
++	tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
++
+ 	/* Shift the tail first */
+-	if (tail->iov_len != 0) {
+-		p = (char *)tail->iov_base + len;
++	if (tailbuf_len != 0) {
++		unsigned int free_space = tailbuf_len - tail->iov_len;
++
++		if (len < free_space)
++			free_space = len;
++		tail->iov_len += free_space;
++
++		copy = len;
+ 		if (tail->iov_len > len) {
+-			copy = tail->iov_len - len;
+-			memmove(p, tail->iov_base, copy);
++			char *p = (char *)tail->iov_base + len;
++			memmove(p, tail->iov_base, tail->iov_len - len);
+ 		} else
+-			buf->buflen -= len;
+-		/* Copy from the inlined pages into the tail */
+-		copy = len;
+-		if (copy > tail->iov_len)
+ 			copy = tail->iov_len;
++		/* Copy from the inlined pages into the tail */
+ 		_copy_from_pages((char *)tail->iov_base,
+ 				buf->pages, buf->page_base + pglen - len,
+ 				copy);
+@@ -496,6 +501,27 @@ __be32 * xdr_reserve_space(struct xdr_st
+ EXPORT_SYMBOL_GPL(xdr_reserve_space);
+ 
+ /**
++ * xdr_rewind_stream - rewind a stream back to some checkpoint
++ * @xdr: pointer to xdr_stream
++ * @q: some checkpoint at historical place of @xdr
++ *
++ * Restors an xdr stream to some historical point. @q must be
++ * a logical xdr point in the past that was sampled by @q = @xdr->p.
++ */
++__be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q)
++{
++	size_t nbytes = (xdr->p - q) << 2;
++
++	BUG_ON(xdr->p < q);
++	BUG_ON(nbytes > xdr->iov->iov_len || nbytes > xdr->buf->len);
++	xdr->p = q;
++	xdr->iov->iov_len -= nbytes;
++	xdr->buf->len -= nbytes;
++	return q;
++}
++EXPORT_SYMBOL_GPL(xdr_rewind_stream);
++
++/**
+  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
+  * @xdr: pointer to xdr_stream
+  * @pages: list of pages

From cf0a5bb309bf8f67c9d4549718137d08d20da726 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Sat, 4 Sep 2010 09:23:12 -0400
Subject: [PATCH 19/20] Removed localversion-nfs file

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 pnfs-all-2.6.35-2010-08-24-f13.patch | 393 +++++++++++++--------------
 1 file changed, 194 insertions(+), 199 deletions(-)

diff --git a/pnfs-all-2.6.35-2010-08-24-f13.patch b/pnfs-all-2.6.35-2010-08-24-f13.patch
index 17d1c844d..7d82d9fa4 100644
--- a/pnfs-all-2.6.35-2010-08-24-f13.patch
+++ b/pnfs-all-2.6.35-2010-08-24-f13.patch
@@ -1,6 +1,6 @@
 diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
---- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-08-31 20:41:16.924243041 -0400
-+++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-08-31 20:42:05.486160576 -0400
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-09-04 09:20:04.110038647 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-09-04 09:21:44.875202803 -0400
 @@ -13,6 +13,7 @@
  #include <sys/stat.h>
  #include <sys/mman.h>
@@ -11,7 +11,7 @@ diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arc
  #include "os.h"
 diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
 --- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/block/genhd.c	2010-08-31 20:42:05.487160201 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-09-04 09:21:44.875202803 -0400
 @@ -1009,6 +1009,7 @@ static void disk_release(struct device *
  struct class block_class = {
  	.name		= "block",
@@ -21,8 +21,8 @@ diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.
  static char *block_devnode(struct device *dev, mode_t *mode)
  {
 diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
---- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-08-31 20:42:05.486160576 -0400
-+++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-08-31 20:42:05.486160576 -0400
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-09-04 09:21:44.876222743 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-09-04 09:21:44.876222743 -0400
 @@ -0,0 +1,211 @@
 +(c) 2007 Network Appliance Inc.
 +
@@ -236,8 +236,8 @@ diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.
 +
 +
 diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
---- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-08-31 20:41:17.063232968 -0400
-+++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-08-31 20:42:05.488160560 -0400
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-09-04 09:20:04.252180557 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-09-04 09:21:44.877242928 -0400
 @@ -657,6 +657,12 @@ static int dev_create(struct dm_ioctl *p
  	return r;
  }
@@ -292,7 +292,7 @@ diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/driv
  	int r;
 diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
 --- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-08-31 20:42:05.489160594 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-09-04 09:21:44.879035601 -0400
 @@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
  	put_device(&class_to_shost(dev)->shost_gendev);
  }
@@ -304,7 +304,7 @@ diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drive
  };
 diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
 --- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-08-31 20:42:05.492243039 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-09-04 09:21:44.879035601 -0400
 @@ -36,13 +36,9 @@
  #include <linux/fs.h>
  #include <linux/time.h>
@@ -360,8 +360,8 @@ diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/
 +
  #endif
 diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
---- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-08-31 20:42:05.493222759 -0400
-+++ linux-2.6.34.noarch/fs/exofs/export.c	2010-08-31 20:42:05.493222759 -0400
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-09-04 09:21:44.880171068 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-09-04 09:21:44.880171068 -0400
 @@ -0,0 +1,396 @@
 +/*
 + * export.c - Implementation of the pnfs_export_operations
@@ -761,7 +761,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs
 +}
 diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
 --- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-08-31 20:42:05.494222756 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-09-04 09:21:44.881160952 -0400
 @@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
  	if (unlikely(wait_obj_created(oi)))
  		goto fail;
@@ -781,7 +781,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/
   * Fill in an inode read from the OSD and set it up for use
 diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
 --- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-08-31 20:42:05.490222933 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-09-04 09:21:44.882160660 -0400
 @@ -13,4 +13,5 @@
  #
  
@@ -790,7 +790,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/K
  obj-$(CONFIG_EXOFS_FS) += exofs.o
 diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
 --- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-08-31 20:42:05.491232880 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-09-04 09:21:44.883039027 -0400
 @@ -1,6 +1,7 @@
  config EXOFS_FS
  	tristate "exofs: OSD based file system support"
@@ -801,7 +801,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/
  	  as its backing storage.
 diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
 --- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exofs/super.c	2010-08-31 20:42:05.496073173 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-09-04 09:21:44.883039027 -0400
 @@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
  	sb->s_fs_info = sbi;
  	sb->s_op = &exofs_sops;
@@ -812,7 +812,7 @@ diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/
  		EXOFS_ERR("ERROR: exofs_iget failed\n");
 diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
 --- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-08-31 20:42:05.497212975 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-09-04 09:21:44.884180594 -0400
 @@ -16,6 +16,13 @@
  #include <linux/namei.h>
  #include <linux/sched.h>
@@ -829,7 +829,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exp
  
 diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
 --- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-08-31 20:42:05.496073173 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-09-04 09:21:44.885160697 -0400
 @@ -3,4 +3,7 @@
  
  obj-$(CONFIG_EXPORTFS) += exportfs.o
@@ -840,8 +840,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/ex
 +exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
 +exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-08-31 20:42:05.497212975 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-09-04 09:21:44.885160697 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-09-04 09:21:44.885160697 -0400
 @@ -0,0 +1,158 @@
 +/*
 + *  linux/fs/nfsd/nfs4blocklayoutxdr.c
@@ -1002,8 +1002,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.
 +}
 +EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
---- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-08-31 20:42:05.498113655 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-08-31 20:42:05.498113655 -0400
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-09-04 09:21:44.886051895 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-09-04 09:21:44.886051895 -0400
 @@ -0,0 +1,218 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -1224,8 +1224,8 @@ diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.n
 +}
 +EXPORT_SYMBOL(filelayout_encode_layout);
 diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
---- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-08-31 20:42:05.499125509 -0400
-+++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-08-31 20:42:05.499125509 -0400
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-09-04 09:21:44.887054758 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-09-04 09:21:44.887054758 -0400
 @@ -0,0 +1,289 @@
 +/*
 + *  pnfs_osd_xdr_enc.c
@@ -1518,7 +1518,7 @@ diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.no
 +EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
 diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
 --- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-08-31 20:42:05.500123860 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-09-04 09:21:44.888035389 -0400
 @@ -19,6 +19,7 @@
  #include <linux/gfs2_ondisk.h>
  #include <linux/slow-work.h>
@@ -1539,7 +1539,7 @@ diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gf
  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
 --- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/Kconfig	2010-08-31 20:42:05.490222933 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-09-04 09:21:44.889035490 -0400
 @@ -224,6 +224,31 @@ config LOCKD_V4
  config EXPORTFS
  	tristate
@@ -1573,8 +1573,8 @@ diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
  	tristate
  	select FS_POSIX_ACL
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-08-31 20:42:05.503222878 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-08-31 20:42:05.503222878 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-09-04 09:21:44.890035431 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-09-04 09:21:44.890035431 -0400
 @@ -0,0 +1,66 @@
 +#include <linux/module.h>
 +#include <linux/uaccess.h>
@@ -1643,8 +1643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.or
 +	return;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-08-31 20:42:05.504232855 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-08-31 20:42:05.504232855 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-09-04 09:21:44.891045310 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-09-04 09:21:44.891045310 -0400
 @@ -0,0 +1,1160 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.c
@@ -2807,8 +2807,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.
 +module_init(nfs4blocklayout_init);
 +module_exit(nfs4blocklayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-08-31 20:42:05.506119071 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-08-31 20:42:05.506119071 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-09-04 09:21:44.892025716 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-09-04 09:21:44.892025716 -0400
 @@ -0,0 +1,335 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdev.c
@@ -3146,8 +3146,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.
 +	goto out;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-08-31 20:42:05.506119071 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-08-31 20:42:05.506119071 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-09-04 09:21:44.893035500 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-09-04 09:21:44.893035500 -0400
 @@ -0,0 +1,120 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayoutdm.c
@@ -3270,8 +3270,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.3
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
---- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-08-31 20:42:05.505169618 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-08-31 20:42:05.505169618 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-09-04 09:21:44.894045279 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-09-04 09:21:44.894045279 -0400
 @@ -0,0 +1,302 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -3576,8 +3576,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.
 +
 +#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
---- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-08-31 20:42:05.507113260 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-08-31 20:42:05.508119925 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-09-04 09:21:44.895035248 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-09-04 09:21:44.895035248 -0400
 @@ -0,0 +1,948 @@
 +/*
 + *  linux/fs/nfs/blocklayout/blocklayout.h
@@ -4528,8 +4528,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noar
 +	}
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
---- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-08-31 20:42:05.502212803 -0400
-+++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-08-31 20:42:05.502212803 -0400
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-09-04 09:21:44.895035248 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-09-04 09:21:44.896025369 -0400
 @@ -0,0 +1,6 @@
 +#
 +# Makefile for the pNFS block layout driver kernel module
@@ -4539,7 +4539,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarc
 +			extents.o block-device-discovery-pipe.o
 diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
 --- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-08-31 20:42:05.508119925 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-09-04 09:21:44.896025369 -0400
 @@ -111,6 +111,13 @@ extern int nfs41_validate_delegation_sta
  
  #define RCA4_TYPE_MASK_RDATA_DLG	0
@@ -4596,7 +4596,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/c
  extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
 --- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-08-31 20:42:05.509093330 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-09-04 09:21:44.897056128 -0400
 @@ -8,10 +8,15 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -5079,7 +5079,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/
  	return status;
 diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
 --- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-08-31 20:42:05.510143651 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-09-04 09:21:44.898072186 -0400
 @@ -22,6 +22,8 @@
  #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
  
@@ -5281,8 +5281,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/n
  		.process_op = (callback_process_op_t)nfs4_callback_sequence,
  		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
 diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
---- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-08-31 20:41:19.144140225 -0400
-+++ linux-2.6.34.noarch/fs/nfs/client.c	2010-08-31 20:42:05.511222861 -0400
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-09-04 09:20:05.988202702 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-09-04 09:21:44.900025165 -0400
 @@ -39,6 +39,7 @@
  #include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
@@ -5491,8 +5491,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/cli
  		goto error;
  
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
---- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-08-31 20:42:05.550110844 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-08-31 20:42:05.550110844 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-09-04 09:21:44.900025165 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-09-04 09:21:44.901035455 -0400
 @@ -0,0 +1,292 @@
 +#if defined(CONFIG_SPNFS_BLOCK)
 +
@@ -5787,8 +5787,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +}
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
---- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-08-31 20:42:05.551222888 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-08-31 20:42:05.551222888 -0400
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-09-04 09:21:44.902035254 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-09-04 09:21:44.902035254 -0400
 @@ -0,0 +1,1672 @@
 +/*
 + *  bl_ops.c
@@ -7463,8 +7463,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/b
 +
 +#endif /* CONFIG_SPNFS_BLOCK */
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
---- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-08-31 20:41:19.144140225 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-08-31 20:42:05.512106042 -0400
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-09-04 09:20:05.988202702 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-09-04 09:21:44.903025737 -0400
 @@ -104,7 +104,8 @@ again:
  			continue;
  		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
@@ -7541,7 +7541,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs
  	rcu_read_unlock();
 diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
 --- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-08-31 20:42:05.513114811 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-09-04 09:21:44.904035627 -0400
 @@ -34,9 +34,7 @@ enum {
  int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
  void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
@@ -7554,8 +7554,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs
  
  struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
---- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-08-31 20:41:19.196140434 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-08-31 20:42:05.553222784 -0400
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-09-04 09:20:06.039203080 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-09-04 09:21:44.905045348 -0400
 @@ -17,11 +17,19 @@
  #include <linux/module.h>
  #include <linux/exportfs.h>
@@ -7733,7 +7733,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/e
  	svcauth_unix_purge();
 diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
 --- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-08-31 20:42:05.514196343 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-09-04 09:21:44.906025356 -0400
 @@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
  	.rpc_release = nfs_direct_read_release,
  };
@@ -7979,7 +7979,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/dir
  		user_addr += bytes;
 diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
 --- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-08-31 20:42:05.549222922 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-09-04 09:21:44.907035472 -0400
 @@ -79,3 +79,52 @@ config NFSD_V4
  	  available from http://linux-nfs.org/.
  
@@ -8035,7 +8035,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kc
 +	  If unsure, say N.
 diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
 --- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-08-31 20:42:05.549222922 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-09-04 09:21:44.907035472 -0400
 @@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
@@ -8045,8 +8045,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/M
 +nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
 +nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-08-31 20:41:19.197150385 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-08-31 20:42:05.554114789 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-09-04 09:20:06.040212867 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-09-04 09:21:44.908055511 -0400
 @@ -40,7 +40,6 @@
  
  #define NFSPROC4_CB_NULL 0
@@ -8586,8 +8586,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/
 +}
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-08-31 20:42:05.556172071 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-08-31 20:42:05.556172071 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-09-04 09:21:44.910025108 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-09-04 09:21:44.910025108 -0400
 @@ -0,0 +1,1679 @@
 +/******************************************************************************
 + *
@@ -10269,8 +10269,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfs
 +	return status;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-08-31 20:42:05.557222774 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-08-31 20:42:05.557222774 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-09-04 09:21:44.911025728 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-09-04 09:21:44.911025728 -0400
 @@ -0,0 +1,461 @@
 +/******************************************************************************
 + *
@@ -10734,8 +10734,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/n
 +};
 +EXPORT_SYMBOL(pnfs_dlm_export_ops);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-08-31 20:42:05.558141620 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-08-31 20:42:05.558141620 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-09-04 09:21:44.912035398 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-09-04 09:21:44.912035398 -0400
 @@ -0,0 +1,620 @@
 +/*
 +*  linux/fs/nfsd/nfs4pnfsds.c
@@ -11358,8 +11358,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nf
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-08-31 20:41:19.198160463 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-08-31 20:42:05.559129617 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-09-04 09:20:06.041223204 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-09-04 09:21:44.913035888 -0400
 @@ -34,10 +34,14 @@
   */
  #include <linux/file.h>
@@ -11834,8 +11834,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd
  
  static const char *nfsd4_op_name(unsigned opnum)
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-08-31 20:41:19.200150153 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-08-31 20:42:05.561202607 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-09-04 09:20:06.043212709 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-09-04 09:21:44.916015197 -0400
 @@ -42,6 +42,8 @@
  #include "xdr4.h"
  #include "vfs.h"
@@ -12351,8 +12351,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs
  }
  
 diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-08-31 20:41:19.202150173 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-08-31 20:42:05.563232916 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-09-04 09:20:06.045212665 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-09-04 09:21:44.918025318 -0400
 @@ -47,9 +47,14 @@
  #include <linux/nfsd_idmap.h>
  #include <linux/nfs4_acl.h>
@@ -12971,8 +12971,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/
  	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
  	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
---- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-08-31 20:41:19.203150982 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-08-31 20:42:05.565212801 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-09-04 09:20:06.047233081 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-09-04 09:21:44.920025397 -0400
 @@ -13,10 +13,15 @@
  #include <linux/nfsd/syscall.h>
  #include <linux/lockd/lockd.h>
@@ -13149,8 +13149,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/n
  	remove_proc_entry("fs/nfs/exports", NULL);
  	remove_proc_entry("fs/nfs", NULL);
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
---- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-08-31 20:41:19.204160960 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-08-31 20:42:05.565212801 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-09-04 09:20:06.047233081 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-09-04 09:21:44.920025397 -0400
 @@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
  #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
  	NFSD4_SUPPORTED_ATTRS_WORD0
@@ -13172,7 +13172,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfs
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-08-31 20:42:05.566222921 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-09-04 09:21:44.921045937 -0400
 @@ -10,6 +10,7 @@
  #include <linux/exportfs.h>
  
@@ -13210,7 +13210,7 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nf
  		__u32 tfh[2];
 diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
 --- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-08-31 20:42:05.567233002 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-09-04 09:21:44.922035547 -0400
 @@ -14,6 +14,7 @@ enum nfsd_fsid {
  	FSID_UUID8,
  	FSID_UUID16,
@@ -13263,8 +13263,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nf
 +
  #endif /* _LINUX_NFSD_FH_INT_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
---- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-08-31 20:41:17.274232911 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-08-31 20:42:05.568144414 -0400
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-09-04 09:20:04.514160362 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-09-04 09:21:44.923045353 -0400
 @@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
  
  };
@@ -13275,8 +13275,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/n
  int nfsd_vers(int vers, enum vers_op change)
  {
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
---- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-08-31 20:42:05.569090615 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-08-31 20:42:05.569090615 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-09-04 09:21:44.923045353 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-09-04 09:21:44.923045353 -0400
 @@ -0,0 +1,143 @@
 +/*
 + *  Copyright (c) 2005 The Regents of the University of Michigan.
@@ -13422,8 +13422,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pn
 +
 +#endif /* LINUX_NFSD_PNFSD_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
---- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-08-31 20:42:05.569090615 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-08-31 20:42:05.569090615 -0400
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-09-04 09:21:44.924046083 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-09-04 09:21:44.924046083 -0400
 @@ -0,0 +1,225 @@
 +/*
 + * linux/fs/nfsd/pnfs_lexp.c
@@ -13651,8 +13651,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nf
 +	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-08-31 20:42:05.570119170 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-08-31 20:42:05.570119170 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-09-04 09:21:44.925035828 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-09-04 09:21:44.925035828 -0400
 @@ -0,0 +1,535 @@
 +/*
 + * fs/nfsd/spnfs_com.c
@@ -14190,8 +14190,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfs
 +}
 +#endif /* CONFIG_PROC_FS */
 diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
---- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-08-31 20:42:05.571097807 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-08-31 20:42:05.572091128 -0400
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-09-04 09:21:44.926030099 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-09-04 09:21:44.926030099 -0400
 @@ -0,0 +1,878 @@
 +/*
 + * fs/nfsd/spnfs_ops.c
@@ -15072,8 +15072,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfs
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
---- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-08-31 20:41:19.205016844 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-08-31 20:42:05.572091128 -0400
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-09-04 09:20:06.048233523 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-09-04 09:21:44.927025219 -0400
 @@ -242,6 +242,12 @@ struct nfs4_client {
  	u32			cl_cb_seq_nr;
  	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
@@ -15190,8 +15190,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/st
 +
  #endif   /* NFSD4_STATE_H */
 diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
---- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-08-31 20:41:17.275233561 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-08-31 20:42:05.573121119 -0400
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-09-04 09:20:04.515160297 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-09-04 09:21:44.929025356 -0400
 @@ -37,7 +37,12 @@
  #ifdef CONFIG_NFSD_V4
  #include <linux/nfs4_acl.h>
@@ -15318,8 +15318,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.
  out_nfserr:
  	err = nfserrno(host_err);
 diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
---- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-08-31 20:41:19.206170424 -0400
-+++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-08-31 20:42:05.575139084 -0400
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-09-04 09:20:06.049232898 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-09-04 09:21:44.930035442 -0400
 @@ -37,6 +37,8 @@
  #ifndef _LINUX_NFSD_XDR4_H
  #define _LINUX_NFSD_XDR4_H
@@ -15396,8 +15396,8 @@ diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr
  	struct nfs4_replay *			replay;
  };
 diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
---- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-08-31 20:41:19.146161064 -0400
-+++ linux-2.6.34.noarch/fs/nfs/file.c	2010-08-31 20:42:05.515139585 -0400
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-09-04 09:20:05.990223533 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-09-04 09:21:44.930035442 -0400
 @@ -36,6 +36,7 @@
  #include "internal.h"
  #include "iostat.h"
@@ -15515,8 +15515,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.
  	if (!ret)
  		return VM_FAULT_LOCKED;
 diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
---- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-08-31 20:41:19.149170418 -0400
-+++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-08-31 20:42:05.516222809 -0400
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-09-04 09:20:05.993222927 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-09-04 09:21:44.932035441 -0400
 @@ -48,6 +48,7 @@
  #include "internal.h"
  #include "fscache.h"
@@ -15730,8 +15730,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inod
  	nfs_fs_proc_exit();
  	nfsiod_stop();
 diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
---- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-08-31 20:41:19.149170418 -0400
-+++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-08-31 20:42:05.517099944 -0400
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-09-04 09:20:05.993222927 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-09-04 09:21:44.933035332 -0400
 @@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
  					   struct nfs_fattr *);
  extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
@@ -15792,7 +15792,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/i
  		struct page *, struct page *);
 diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
 --- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-08-31 20:42:05.500123860 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-09-04 09:21:44.933035332 -0400
 @@ -79,10 +79,48 @@ config NFS_V4_1
  	depends on NFS_V4 && EXPERIMENTAL
  	help
@@ -15845,7 +15845,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kcon
  	depends on NFS_FS=y && IP_PNP
 diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
 --- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-08-31 20:42:05.501268752 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-09-04 09:21:44.934046035 -0400
 @@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
  			   delegation.o idmap.o \
  			   callback.o callback_xdr.o callback_proc.o \
@@ -15860,8 +15860,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Mak
 +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
 +obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
 diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-08-31 20:41:19.152180625 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-08-31 20:42:05.518232887 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-09-04 09:20:05.996242985 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-09-04 09:21:44.935035426 -0400
 @@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
  	.dentry_ops	= &nfs_dentry_operations,
  	.dir_inode_ops	= &nfs3_dir_inode_operations,
@@ -15871,8 +15871,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.getattr	= nfs3_proc_getattr,
  	.setattr	= nfs3_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-08-31 20:42:05.519163219 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-08-31 20:42:05.520222923 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-09-04 09:21:44.936035595 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-09-04 09:21:44.936035595 -0400
 @@ -0,0 +1,768 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayout.c
@@ -16643,8 +16643,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs
 +module_init(nfs4filelayout_init);
 +module_exit(nfs4filelayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-08-31 20:42:05.521233147 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-08-31 20:42:05.521233147 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-09-04 09:21:44.937035580 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-09-04 09:21:44.937035580 -0400
 @@ -0,0 +1,635 @@
 +/*
 + *  linux/fs/nfs/nfs4filelayoutdev.c
@@ -17282,8 +17282,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch
 +}
 +
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
---- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-08-31 20:42:05.520222923 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-08-31 20:42:05.520222923 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-09-04 09:21:44.938035519 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-09-04 09:21:44.938035519 -0400
 @@ -0,0 +1,96 @@
 +/*
 + *  pnfs_nfs4filelayout.h
@@ -17382,8 +17382,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs
 +
 +#endif /* FS_NFS_NFS4FILELAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
---- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-08-31 20:41:19.154160465 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-08-31 20:42:05.519163219 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-09-04 09:20:05.998222938 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-09-04 09:21:44.939035693 -0400
 @@ -45,8 +45,28 @@ enum nfs4_client_state {
  	NFS4CLNT_RECLAIM_NOGRACE,
  	NFS4CLNT_DELEGRETURN,
@@ -17532,8 +17532,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nf
  
  /* nfs4xdr.c */
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
---- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-08-31 20:41:19.157140145 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-08-31 20:42:05.524099925 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-09-04 09:20:06.001202714 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-09-04 09:21:44.942015100 -0400
 @@ -49,12 +49,14 @@
  #include <linux/mount.h>
  #include <linux/module.h>
@@ -19198,7 +19198,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/n
  	.setattr	= nfs4_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
 --- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-08-31 20:42:05.526213255 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-09-04 09:21:44.944045456 -0400
 @@ -54,17 +54,17 @@
  void
  nfs4_renew_state(struct work_struct *work)
@@ -19221,8 +19221,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs
  	spin_lock(&clp->cl_lock);
  	lease = clp->cl_lease_time;
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
---- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-08-31 20:41:19.158078621 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-08-31 20:42:05.527232994 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-09-04 09:20:06.002213222 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-09-04 09:21:44.945035417 -0400
 @@ -48,11 +48,13 @@
  #include <linux/random.h>
  #include <linux/workqueue.h>
@@ -19545,8 +19545,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/
  			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
  			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
 diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
---- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-08-31 20:41:19.160150207 -0400
-+++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-08-31 20:42:05.530092192 -0400
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-09-04 09:20:06.004212730 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-09-04 09:21:44.948015074 -0400
 @@ -50,8 +50,10 @@
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
@@ -21056,8 +21056,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nf
  };
  
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
---- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-08-31 20:42:05.532213157 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-08-31 20:42:05.532213157 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-09-04 09:21:44.950025182 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-09-04 09:21:44.950025182 -0400
 @@ -0,0 +1,11 @@
 +#
 +# Makefile for the pNFS Objects Layout Driver kernel module
@@ -21071,8 +21071,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs
 +panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
 +obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-08-31 20:42:05.533243491 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-08-31 20:42:05.534105468 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-09-04 09:21:44.951035482 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-09-04 09:21:44.951035482 -0400
 @@ -0,0 +1,1087 @@
 +/*
 + *  objio_osd.c
@@ -22162,8 +22162,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noar
 +module_init(objlayout_init);
 +module_exit(objlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-08-31 20:42:05.535059115 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-08-31 20:42:05.535059115 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-09-04 09:21:44.952035857 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-09-04 09:21:44.952035857 -0400
 @@ -0,0 +1,790 @@
 +/*
 + *  objlayout.c
@@ -22956,8 +22956,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noar
 +	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
 +};
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-08-31 20:42:05.535059115 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-08-31 20:42:05.535059115 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-09-04 09:21:44.953025191 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-09-04 09:21:44.953025191 -0400
 @@ -0,0 +1,171 @@
 +/*
 + *  objlayout.h
@@ -23131,8 +23131,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noar
 +
 +#endif /* _OBJLAYOUT_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-08-31 20:42:05.536110535 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-08-31 20:42:05.536110535 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-09-04 09:21:44.954045432 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-09-04 09:21:44.954045432 -0400
 @@ -0,0 +1,734 @@
 +/*
 + *  panfs_shim.c
@@ -23869,8 +23869,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noa
 +module_init(panlayout_init);
 +module_exit(panlayout_exit);
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
---- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-08-31 20:42:05.537124598 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-08-31 20:42:05.537124598 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-09-04 09:21:44.955035904 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-09-04 09:21:44.955035904 -0400
 @@ -0,0 +1,482 @@
 +/*
 + *  panfs_shim.h
@@ -24355,8 +24355,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noa
 +
 +#endif /* _PANLAYOUT_PANFS_SHIM_H */
 diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
---- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-08-31 20:42:05.538121971 -0400
-+++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-08-31 20:42:05.538121971 -0400
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-09-04 09:21:44.956036011 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-09-04 09:21:44.956036011 -0400
 @@ -0,0 +1,435 @@
 +/*
 + *  pnfs_osd_xdr.c
@@ -24794,8 +24794,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.
 +	return 0;
 +}
 diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
---- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-08-31 20:41:19.162150222 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-08-31 20:42:05.539131687 -0400
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-09-04 09:20:06.006202442 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-09-04 09:21:44.957035861 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  
@@ -24918,8 +24918,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/p
  				if (res == INT_MAX)
  					goto out;
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
---- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-08-31 20:42:05.541150301 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-08-31 20:42:05.541150301 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-09-04 09:21:44.959025145 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-09-04 09:21:44.959025145 -0400
 @@ -0,0 +1,2037 @@
 +/*
 + *  linux/fs/nfs/pnfs.c
@@ -26959,8 +26959,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +}
 +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
 diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
---- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-08-31 20:42:05.542222767 -0400
-+++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-08-31 20:42:05.542222767 -0400
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-09-04 09:21:44.960025819 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-09-04 09:21:44.960025819 -0400
 @@ -0,0 +1,354 @@
 +/*
 + *  fs/nfs/pnfs.h
@@ -27317,8 +27317,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.
 +
 +#endif /* FS_NFS_PNFS_H */
 diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
---- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-08-31 20:41:19.163155499 -0400
-+++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-08-31 20:42:05.543103394 -0400
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-09-04 09:20:06.007232858 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-09-04 09:21:44.961035556 -0400
 @@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
  	fattr = nfs_alloc_fattr();
  	status = -ENOMEM;
@@ -27346,8 +27346,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.
  	.getattr	= nfs_proc_getattr,
  	.setattr	= nfs_proc_setattr,
 diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
---- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-08-31 20:41:19.164160482 -0400
-+++ linux-2.6.34.noarch/fs/nfs/read.c	2010-08-31 20:42:05.544233042 -0400
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-09-04 09:20:06.008232903 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-09-04 09:21:44.962035703 -0400
 @@ -18,8 +18,12 @@
  #include <linux/sunrpc/clnt.h>
  #include <linux/nfs_fs.h>
@@ -27562,8 +27562,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.
  	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
  read_complete:
 diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
---- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-08-31 20:41:19.165170508 -0400
-+++ linux-2.6.34.noarch/fs/nfs/super.c	2010-08-31 20:42:05.545114737 -0400
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-09-04 09:20:06.009232934 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-09-04 09:21:44.963035469 -0400
 @@ -64,6 +64,7 @@
  #include "iostat.h"
  #include "internal.h"
@@ -27611,8 +27611,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/supe
  #endif
  
 diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
---- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-08-31 20:41:19.166151095 -0400
-+++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-08-31 20:42:05.546131839 -0400
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-09-04 09:20:06.010203248 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-09-04 09:21:44.964036069 -0400
 @@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
  	struct nfs_unlinkdata *data = calldata;
  	struct nfs_server *server = NFS_SERVER(data->dir);
@@ -27623,8 +27623,8 @@ diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unl
  		return;
  	rpc_call_start(task);
 diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
---- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-08-31 20:41:17.273213379 -0400
-+++ linux-2.6.34.noarch/fs/nfs/write.c	2010-08-31 20:42:05.548212682 -0400
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-09-04 09:20:04.513160311 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-09-04 09:21:44.966025174 -0400
 @@ -20,6 +20,7 @@
  #include <linux/nfs_mount.h>
  #include <linux/nfs_page.h>
@@ -28313,7 +28313,7 @@ diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/writ
  int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
 --- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-08-31 20:42:05.577222704 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-09-04 09:21:44.967035352 -0400
 @@ -2,6 +2,7 @@
  #define LINUX_EXPORTFS_H 1
  
@@ -28386,8 +28386,8 @@ diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/i
 +#endif /* CONFIG_PNFSD */
  #endif /* LINUX_EXPORTFS_H */
 diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
---- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-08-31 20:42:05.576053304 -0400
-+++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-08-31 20:42:05.576053304 -0400
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-09-04 09:21:44.969025737 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-09-04 09:21:44.969025737 -0400
 @@ -0,0 +1,141 @@
 +#ifndef _LINUX_EXP_XDR_H
 +#define _LINUX_EXP_XDR_H
@@ -28531,8 +28531,8 @@ diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/in
 +}
 +#endif /* _LINUX_EXP_XDR_H */
 diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
---- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-08-31 20:41:19.120034834 -0400
-+++ linux-2.6.34.noarch/include/linux/fs.h	2010-08-31 20:42:05.579212604 -0400
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-09-04 09:20:05.965243003 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-09-04 09:21:44.971015113 -0400
 @@ -387,6 +387,7 @@ struct inodes_stat_t {
  #include <asm/byteorder.h>
  
@@ -28551,7 +28551,7 @@ diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include
  	struct dentry		*s_root;
 diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
 --- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-08-31 20:42:05.581035627 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-09-04 09:21:44.973025301 -0400
 @@ -17,7 +17,10 @@
  
  #define NFS4_BITMAP_SIZE	2
@@ -28681,8 +28681,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/inclu
  #endif
  
 diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-08-31 20:42:05.583087731 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-08-31 20:42:05.583087731 -0400
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-09-04 09:21:44.974035325 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-09-04 09:21:44.974035325 -0400
 @@ -0,0 +1,329 @@
 +/*
 + *  include/linux/nfs4_pnfs.h
@@ -29014,8 +29014,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/
 +
 +#endif /* LINUX_NFS4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
---- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-08-31 20:42:05.596098115 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-08-31 20:42:05.596098115 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-09-04 09:21:44.976025566 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-09-04 09:21:44.976025566 -0400
 @@ -0,0 +1,101 @@
 +#ifndef NFSD4_BLOCK
 +#define NFSD4_BLOCK
@@ -29119,8 +29119,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarc
 +#endif /* NFSD4_BLOCK */
 +
 diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-08-31 20:42:05.597097942 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-08-31 20:42:05.597097942 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-09-04 09:21:44.977035317 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-09-04 09:21:44.977035317 -0400
 @@ -0,0 +1,345 @@
 +/*
 + * include/linux/nfsd4_spnfs.h
@@ -29469,7 +29469,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarc
 +#endif /* NFS_SPNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
 --- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-08-31 20:42:05.591097762 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-09-04 09:21:44.978015841 -0400
 @@ -29,6 +29,7 @@
  #ifdef __KERNEL__
  
@@ -29480,7 +29480,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch
   * Largest number of bytes we need to allocate for an NFS
 diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
 --- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-08-31 20:42:05.591097762 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-09-04 09:21:44.978015841 -0400
 @@ -32,6 +32,8 @@
  #define NFSDDBG_REPCACHE	0x0080
  #define NFSDDBG_XDR		0x0100
@@ -29492,7 +29492,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
 --- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-08-31 20:42:05.592118086 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-09-04 09:21:44.979055116 -0400
 @@ -100,6 +100,7 @@ struct svc_export {
  	uid_t			ex_anon_uid;
  	gid_t			ex_anon_gid;
@@ -29502,8 +29502,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarc
  	struct nfsd4_fs_locations ex_fslocs;
  	int			ex_nflavors;
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-08-31 20:42:05.592118086 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-08-31 20:42:05.592118086 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-09-04 09:21:44.979055116 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-09-04 09:21:44.980035474 -0400
 @@ -0,0 +1,132 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29638,8 +29638,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.3
 +
 +#endif /* NFSD_NFS4LAYOUTXDR_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-08-31 20:42:05.593020723 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-08-31 20:42:05.593020723 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-09-04 09:21:44.980035474 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-09-04 09:21:44.980035474 -0400
 @@ -0,0 +1,54 @@
 +/******************************************************************************
 + *
@@ -29696,8 +29696,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.
 +
 +#endif /* CONFIG_PNFSD */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
---- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-08-31 20:42:05.594107962 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-08-31 20:42:05.594107962 -0400
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-09-04 09:21:44.981055721 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-09-04 09:21:44.981055721 -0400
 @@ -0,0 +1,271 @@
 +/*
 + *  Copyright (c) 2006 The Regents of the University of Michigan.
@@ -29972,7 +29972,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.n
 +#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
 diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
 --- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-08-31 20:42:05.594107962 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-09-04 09:21:44.982035422 -0400
 @@ -29,6 +29,7 @@
  /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
  #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
@@ -30010,8 +30010,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noar
  
  union nfsctl_res {
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
---- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-08-31 20:41:19.168160480 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-08-31 20:42:05.584098019 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-09-04 09:20:06.012232950 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-09-04 09:21:44.983045467 -0400
 @@ -72,13 +72,20 @@ struct nfs_access_entry {
  	int			mask;
  };
@@ -30111,8 +30111,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/inc
  
  #ifdef __KERNEL__
 diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
---- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-08-31 20:41:19.168160480 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-08-31 20:42:05.586087719 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-09-04 09:20:06.012232950 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-09-04 09:21:44.985025570 -0400
 @@ -15,6 +15,7 @@ struct nlm_host;
  struct nfs4_sequence_args;
  struct nfs4_sequence_res;
@@ -30187,7 +30187,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/
  	atomic_t active; /* Keep trace of any activity to this server */
 diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
 --- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-08-31 20:42:05.587097913 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-09-04 09:21:44.986035288 -0400
 @@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
  	NFSIOS_SHORTREAD,
  	NFSIOS_SHORTWRITE,
@@ -30200,7 +30200,7 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch
  
 diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
 --- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-08-31 20:42:05.588097898 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-09-04 09:21:44.987025532 -0400
 @@ -39,6 +39,7 @@ struct nfs_page {
  	struct list_head	wb_list;	/* Defines state of page: */
  	struct page		*wb_page;	/* page to read in/write out */
@@ -30249,8 +30249,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/i
  			     struct inode *inode,
  			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
 diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
---- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-08-31 20:41:19.169171911 -0400
-+++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-08-31 20:42:05.590087729 -0400
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-09-04 09:20:06.013233555 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-09-04 09:21:44.989035583 -0400
 @@ -3,6 +3,8 @@
  
  #include <linux/nfsacl.h>
@@ -30528,8 +30528,8 @@ diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/in
  extern struct rpc_version	nfs_version3;
  extern struct rpc_version	nfs_version4;
 diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
---- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-08-31 20:42:05.598087997 -0400
-+++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-08-31 20:42:05.599087710 -0400
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-09-04 09:21:44.990025422 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-09-04 09:21:44.991025218 -0400
 @@ -0,0 +1,57 @@
 +#ifndef _PANFS_SHIM_API_H
 +#define _PANFS_SHIM_API_H
@@ -30589,8 +30589,8 @@ diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.no
 +
 +#endif /* _PANFS_SHIM_API_H */
 diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
---- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-08-31 20:42:05.600025088 -0400
-+++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-08-31 20:42:05.600025088 -0400
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-09-04 09:21:44.992035338 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-09-04 09:21:44.992035338 -0400
 @@ -0,0 +1,439 @@
 +/*
 + *  pnfs_osd_xdr.h
@@ -31033,7 +31033,7 @@ diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noar
 +#endif /* __PNFS_OSD_XDR_H__ */
 diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
 --- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-08-31 20:42:05.601087875 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-09-04 09:21:44.993025468 -0400
 @@ -8,6 +8,7 @@
  #ifndef __LINUX_POSIX_ACL_H
  #define __LINUX_POSIX_ACL_H
@@ -31044,7 +31044,7 @@ diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/
  #define ACL_UNDEFINED_ID	(-1)
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-08-31 20:42:05.602100892 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-09-04 09:21:44.994025129 -0400
 @@ -14,6 +14,8 @@
  /* size of an XDR encoding unit in bytes, i.e. 32bit */
  #define XDR_UNIT	(4)
@@ -31056,7 +31056,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.n
  
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-08-31 20:42:05.603108001 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-09-04 09:21:44.995045529 -0400
 @@ -3,6 +3,7 @@
  
  #ifdef __KERNEL__
@@ -31077,8 +31077,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.3
  
  struct rpc_pipe_ops {
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
---- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-08-31 20:42:05.603108001 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-08-31 20:42:05.603108001 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-09-04 09:21:44.995045529 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-09-04 09:21:44.995045529 -0400
 @@ -0,0 +1,111 @@
 +/*
 + *  Copyright (c) 2008 The Regents of the University of Michigan.
@@ -31193,7 +31193,7 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux
 +#endif /* _SIMPLE_RPC_PIPEFS_H_ */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
 --- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-08-31 20:42:05.604049784 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-09-04 09:21:44.996061803 -0400
 @@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
  
  	return buf;
@@ -31237,8 +31237,8 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.n
 +}
  #endif /* SUNRPC_SVC_XPRT_H */
 diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
---- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-08-31 20:41:19.173118431 -0400
-+++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-08-31 20:42:05.605107904 -0400
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-09-04 09:20:06.017243774 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-09-04 09:21:44.997045653 -0400
 @@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
  	return p + 2;
  }
@@ -31261,14 +31261,9 @@ diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch
  extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
  		unsigned int base, unsigned int len);
  extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-diff -up linux-2.6.34.noarch/localversion-pnfs.orig linux-2.6.34.noarch/localversion-pnfs
---- linux-2.6.34.noarch/localversion-pnfs.orig	2010-08-31 20:42:05.605107904 -0400
-+++ linux-2.6.34.noarch/localversion-pnfs	2010-08-31 20:42:05.605107904 -0400
-@@ -0,0 +1 @@
-+-pnfs
 diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
 --- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-08-31 20:42:05.606020148 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-09-04 09:21:44.998058968 -0400
 @@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
  	    svc.o svcsock.o svcauth.o svcauth_unix.o \
  	    addr.o rpcb_clnt.o timer.o xdr.o \
@@ -31279,8 +31274,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/su
  sunrpc-$(CONFIG_PROC_FS) += stats.o
  sunrpc-$(CONFIG_SYSCTL) += sysctl.o
 diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
---- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-08-31 20:42:05.606020148 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-08-31 20:42:05.607108065 -0400
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-09-04 09:21:44.999045582 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-09-04 09:21:44.999045582 -0400
 @@ -0,0 +1,424 @@
 +/*
 + *  net/sunrpc/simple_rpc_pipefs.c
@@ -31707,8 +31702,8 @@ diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.no
 +}
 +EXPORT_SYMBOL(pipefs_generic_destroy_msg);
 diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
---- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-08-31 20:41:19.188144022 -0400
-+++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-08-31 20:42:05.607108065 -0400
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-09-04 09:20:06.031222775 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-09-04 09:21:45.000045387 -0400
 @@ -395,24 +395,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
  {
  	struct kvec *tail;

From 9e6bd6253b21f27fbc50f0788b1dd4a32ecae976 Mon Sep 17 00:00:00 2001
From: Steve Dickson <steved@redhat.com>
Date: Thu, 30 Sep 2010 10:58:43 -0400
Subject: [PATCH 20/20] - Updated to the latest pNFS tag:
 pnfs-all-2.6.35-2010-09-14

Signed-off-by: Steve Dickson <steved@redhat.com>
---
 kernel.spec                          |     9 +-
 pnfs-all-2.6.35-2010-09-14-f13.patch | 31775 +++++++++++++++++++++++++
 2 files changed, 31781 insertions(+), 3 deletions(-)
 create mode 100644 pnfs-all-2.6.35-2010-09-14-f13.patch

diff --git a/kernel.spec b/kernel.spec
index 2eabb4dab..da80a8709 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -23,7 +23,7 @@ Summary: The Linux kernel
 #
 # (Uncomment the '#' and both spaces below to set the buildid.)
 #
-%define buildid .pnfs34.2010.08.24
+%define buildid .pnfs35.2010.09.14
 ###################################################################
 
 # The buildid can also be specified on the rpmbuild command line
@@ -818,7 +818,7 @@ Patch12581: xen-use-percpu-interrupts-for-ipis-and-virqs.patch
 
 Patch30000: nfs-35-fc.patch
 Patch30001: nfsd-35-fc.patch
-Patch30002: pnfs-all-2.6.35-2010-08-24-f13.patch
+Patch30002: pnfs-all-2.6.35-2010-09-14-f13.patch
 Patch30003: linux-2.6-pnfs-compile.patch
 Patch30004: linux-2.6.35-inline.patch
 
@@ -1551,7 +1551,7 @@ ApplyPatch xen-use-percpu-interrupts-for-ipis-and-virqs.patch
 
 ApplyPatch nfs-35-fc.patch  
 ApplyPatch nfsd-35-fc.patch  
-ApplyPatch pnfs-all-2.6.35-2010-08-24-f13.patch
+ApplyPatch pnfs-all-2.6.35-2010-09-14-f13.patch
 ApplyPatch linux-2.6-pnfs-compile.patch
 ApplyPatch linux-2.6.35-inline.patch
 # END OF PATCH APPLICATIONS
@@ -2175,6 +2175,9 @@ fi
 
 
 %changelog
+* Thu Sep 30 2010 Steve Dickson <steved@redhat.com>
+- Updated to the latest pNFS tag: pnfs-all-2.6.35-2010-09-14
+
 * Mon Sep 27 2010 Ben Skeggs <bskeggs@redhat.com> 2.6.34.7-58
 - nouveau: better handling of certain GPU errors
 
diff --git a/pnfs-all-2.6.35-2010-09-14-f13.patch b/pnfs-all-2.6.35-2010-09-14-f13.patch
new file mode 100644
index 000000000..2d6f9a09b
--- /dev/null
+++ b/pnfs-all-2.6.35-2010-09-14-f13.patch
@@ -0,0 +1,31775 @@
+diff -up linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig linux-2.6.34.noarch/arch/um/os-Linux/mem.c
+--- linux-2.6.34.noarch/arch/um/os-Linux/mem.c.orig	2010-09-30 10:14:57.591122000 -0400
++++ linux-2.6.34.noarch/arch/um/os-Linux/mem.c	2010-09-30 10:17:08.383984000 -0400
+@@ -13,6 +13,7 @@
+ #include <sys/stat.h>
+ #include <sys/mman.h>
+ #include <sys/param.h>
++#include <sys/stat.h>
+ #include "init.h"
+ #include "kern_constants.h"
+ #include "os.h"
+diff -up linux-2.6.34.noarch/block/genhd.c.orig linux-2.6.34.noarch/block/genhd.c
+--- linux-2.6.34.noarch/block/genhd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/block/genhd.c	2010-09-30 10:17:08.410985000 -0400
+@@ -1009,6 +1009,7 @@ static void disk_release(struct device *
+ struct class block_class = {
+ 	.name		= "block",
+ };
++EXPORT_SYMBOL(block_class);
+ 
+ static char *block_devnode(struct device *dev, mode_t *mode)
+ {
+diff -up linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt
+--- linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt.orig	2010-09-30 10:17:08.376984000 -0400
++++ linux-2.6.34.noarch/Documentation/filesystems/spnfs.txt	2010-09-30 10:17:08.378989000 -0400
+@@ -0,0 +1,211 @@
++(c) 2007 Network Appliance Inc.
++
++spNFS
++-----
++
++An spNFS system consists of a Meta Data Server (MDS), a number of Client machines (C) and a number of Data Servers (DS).
++
++A file system is mounted by the clients from the MDS, and all file data
++is striped across the DSs.
++
++Identify the machines that will be filling each of these roles.
++
++The spnfs kernel will be installed on all machines: clients, the MDS and DSs.
++
++
++Building and installing the spNFS kernel
++----------------------------------------
++
++Get the spNFS kernel from:
++
++	git://linux-nfs.org/~bhalevy/linux-pnfs.git
++
++Use the pnfs-all-latest branch and add these options to your .config file
++
++	CONFIG_NETWORK_FILESYSTEMS=y
++	CONFIG_NFS_FS=m
++	CONFIG_NFS_V4=y
++	CONFIG_NFS_V4_1=y
++	CONFIG_PNFS=y
++	CONFIG_NFSD=m
++	CONFIG_PNFSD=y
++	# CONFIG_PNFSD_LOCAL_EXPORT is not set
++	CONFIG_SPNFS=y
++
++By default, spNFS uses whole-file layouts.  Layout segments can be enabled
++by adding:
++
++	CONFIG_SPNFS_LAYOUTSEGMENTS=y
++
++to your .config file.
++
++Building and installation of kernel+modules is as usual.
++This kernel should be installed and booted on the client, MDS and DSs.
++
++Note that CONFIG_PNFSD_LOCAL_EXPORT must be disabled for spnfs as it
++takes over the pnfs export interface.
++
++Building nfs-utils
++------------------
++
++Get the nfs-utils package containing spnfsd from:
++
++	git://linux-nfs.org/~bhalevy/pnfs-nfs-utils.git
++
++Follow the standard instructions for building nfs-utils.
++
++After building, the spnfsd daemon will be located in utils/spnfsd.  The spnfsd
++daemon will only be needed on the MDS.
++
++
++Installation
++------------
++
++The nfs-utils package contains a default spnfsd.conf file in
++utils/spnfsd/spnfsd.conf.  Copy this file to /etc/spnfsd.conf.
++
++By default, the DS-Mount-Directory is set to /spnfs (see spnfsd.conf).  Under
++this directory, mount points must be created for each DS to
++be used for pNFS data stripes.  These mount points are named by the ip address
++of the corresponding DS.  In the sample spnfsd.conf, there are two
++DSs defined (172.16.28.134 and 172.16.28.141).
++
++Following the sample spnfsd.conf,
++
++	mkdir /spnfs
++
++on the MDS (corresponding to DS-Mount-Directory).  Then
++
++	mkdir /spnfs/172.16.28.134
++	mkdir /spnfs/172.16.28.141
++
++to create the mount points for the DSs.
++
++On the DSs, chose a directory where data stripes will be created by the MDS.
++For the sample file, this directory is /pnfs, so on each DS execute:
++
++	mkdir /pnfs
++
++This directory is specified in the spnfsd.conf file by the DS*_ROOT option
++(where * is replaced by the DS number).  DS_ROOT is specified relative to
++the directory being exported by the DSs.  In our example, our DSs are exporting
++the root directory (/) and therefore our DS_ROOT is /pnfs.  On the DSs, we have
++the following entry in /etc/exports:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check)
++
++N.B. If we had created a /exports directory and a /pnfs directory under
++/exports, and if we were exporting /exports, then DS_ROOT would still be /pnfs
++(not /exports/pnfs).
++
++It may be useful to add entries to /etc/fstab on the MDS to automatically
++mount the DS_ROOT file systems.  For this example, our MDS fstab would
++contain:
++
++	172.17.84.128:/pnfs /spnfs/172.17.84.128 nfs    defaults        1 2
++	172.17.84.122:/pnfs /spnfs/172.17.84.122 nfs    defaults        1 2
++
++The DS mounts must be performed manually or via fstab at this time (automatic
++mounting, directory creation, etc. are on the todo list).  To perform I/O
++through the MDS, the DS mounts MUST use NFSv3 at this time (this restriction
++will eventually be removed).
++
++
++On the MDS, choose a file system to use with spNFS and export it, e.g.:
++
++	/ *(rw,fsid=0,insecure,no_root_squash,sync,no_subtree_check,pnfs)
++
++Make sure nfsd and all supporting processes are running on the MDS and DSs.
++
++
++Running
++-------
++
++If rpc_pipefs is not already mounted (if you're running idmapd it probably is),
++you may want to add the following line to /etc/fstab:
++
++	rpc_pipefs    /var/lib/nfs/rpc_pipefs rpc_pipefs defaults     0 0
++
++to automatically mount rpc_pipefs.
++
++With spnfsd.conf configured for your environment and the mounts mounted as
++described above, spnfsd can now be started.
++
++On the MDS, execute spnfsd:
++
++	spnfsd
++
++The executable is located in the directory where it was built, and
++may also have been installed elsewhere depending on how you built nfs-utils.
++It will run in the foreground by default, and in fact will do so despite
++any options suggesting the contrary (it's still a debugging build).
++
++On the client, make sure the nfslayoutdriver module is loaded:
++
++	modprobe nfslayoutdriver
++
++Then mount the file system from the MDS:
++
++	mount -t nfs4 -o minorversion=1 mds:/ /mnt
++
++I/O through the MDS is now supported.  To use it, do not load the
++nfslayoutdriver on the client, and mount the MDS using NFSv4 or 4.1
++(NFSv2 and v3 are not yet supported).
++
++You may now use spNFS by performing file system activities in /mnt.
++If you create files in /mnt, you should see stripe files corresponding to
++new files being created on the DSs.  The current implementation names the
++stripe files based on the inode number of the file on the MDS.  For example,
++if you create a file foo in /mnt and do an 'ls -li /mnt/foo':
++
++	# ls -li foo
++	1233 -rw-r--r-- 1 root root 0 Nov 29 15:54 foo
++
++You should see stripe files on each under /pnfs (per the sample) named
++1233.  The file /pnfs/1233 on DS1 will contain the first <stripe size> bytes
++of data written to foo, DS2 will contain the next <stripe size> bytes, etc.
++Removing /mnt/foo will remove the corresponding stripe files on the DSs.
++Other file system operations should behave (mostly :-) as expected.
++
++
++Layout Segments
++---------------
++
++If the kernel is compiled to support layout segments, there will
++be two files created under /proc/fs/spnfs for controlling layout
++segment functionality.
++
++To enable layout segments, write a '1' to /proc/fs/spnfs/layoutseg, e.g.:
++
++	echo 1 > /proc/fs/spnfs/layoutseg
++
++Layout segments can be disabled (returning to whole-file layouts) by
++writing a '0' to /proc/fs/spnfs/layoutseg:
++
++	echo 0 > /proc/fs/spnfs/layoutseg
++
++When layout segments are enabled, the size of the layouts returned can
++be specified by writing a decimal number (ascii representation) to
++/proc/fs/spnfs/layoutsegsize:
++
++	echo 1024 > /proc/fs/spnfs/layoutsegsize
++
++The value'0' has a special meaning--it causes the server to return a
++layout that is exactly the size requested by the client:
++
++	echo 0 > /proc/fs/spnfs/layoutsegsize
++
++
++Troubleshooting
++---------------
++
++If you see data being written to the files on the MDS rather than
++the stripe files, make sure the nfslayoutdriver is loaded on the client
++(see above).
++
++If you get a "permission denied" error, make sure mountd is running on the mds
++(it occasionally fails to start).
++
++Bugs, enhancements, compliments, complaints to: dmuntz@netapp.com
++
++
+diff -up linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig linux-2.6.34.noarch/drivers/md/dm-ioctl.c
+--- linux-2.6.34.noarch/drivers/md/dm-ioctl.c.orig	2010-09-30 10:15:01.214222000 -0400
++++ linux-2.6.34.noarch/drivers/md/dm-ioctl.c	2010-09-30 10:17:08.417985000 -0400
+@@ -657,6 +657,12 @@ static int dev_create(struct dm_ioctl *p
+ 	return r;
+ }
+ 
++int dm_dev_create(struct dm_ioctl *param)
++{
++	return dev_create(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_create);
++
+ /*
+  * Always use UUID for lookups if it's present, otherwise use name or dev.
+  */
+@@ -751,6 +757,12 @@ static int dev_remove(struct dm_ioctl *p
+ 	return 0;
+ }
+ 
++int dm_dev_remove(struct dm_ioctl *param)
++{
++	return dev_remove(param, sizeof(*param));
++}
++EXPORT_SYMBOL(dm_dev_remove);
++
+ /*
+  * Check a string doesn't overrun the chunk of
+  * memory we copied from userland.
+@@ -923,6 +935,12 @@ static int do_resume(struct dm_ioctl *pa
+ 	return r;
+ }
+ 
++int dm_do_resume(struct dm_ioctl *param)
++{
++	return do_resume(param);
++}
++EXPORT_SYMBOL(dm_do_resume);
++
+ /*
+  * Set or unset the suspension state of a device.
+  * If the device already is in the requested state we just return its status.
+@@ -1200,6 +1218,12 @@ out:
+ 	return r;
+ }
+ 
++int dm_table_load(struct dm_ioctl *param, size_t param_size)
++{
++	return table_load(param, param_size);
++}
++EXPORT_SYMBOL(dm_table_load);
++
+ static int table_clear(struct dm_ioctl *param, size_t param_size)
+ {
+ 	int r;
+diff -up linux-2.6.34.noarch/drivers/scsi/hosts.c.orig linux-2.6.34.noarch/drivers/scsi/hosts.c
+--- linux-2.6.34.noarch/drivers/scsi/hosts.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/drivers/scsi/hosts.c	2010-09-30 10:17:08.422988000 -0400
+@@ -49,7 +49,7 @@ static void scsi_host_cls_release(struct
+ 	put_device(&class_to_shost(dev)->shost_gendev);
+ }
+ 
+-static struct class shost_class = {
++struct class shost_class = {
+ 	.name		= "scsi_host",
+ 	.dev_release	= scsi_host_cls_release,
+ };
+diff -up linux-2.6.34.noarch/fs/exofs/exofs.h.orig linux-2.6.34.noarch/fs/exofs/exofs.h
+--- linux-2.6.34.noarch/fs/exofs/exofs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/exofs.h	2010-09-30 10:17:08.444986000 -0400
+@@ -36,13 +36,9 @@
+ #include <linux/fs.h>
+ #include <linux/time.h>
+ #include <linux/backing-dev.h>
++#include <linux/pnfs_osd_xdr.h>
+ #include "common.h"
+ 
+-/* FIXME: Remove once pnfs hits mainline
+- * #include <linux/exportfs/pnfs_osd_xdr.h>
+- */
+-#include "pnfs.h"
+-
+ #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
+ 
+ #ifdef CONFIG_EXOFS_DEBUG
+@@ -103,6 +99,7 @@ struct exofs_sb_info {
+ struct exofs_i_info {
+ 	struct inode   vfs_inode;          /* normal in-memory inode          */
+ 	wait_queue_head_t i_wq;            /* wait queue for inode            */
++	spinlock_t     i_layout_lock;      /* lock for layout/return/recall   */
+ 	unsigned long  i_flags;            /* various atomic flags            */
+ 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
+ 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
+@@ -166,6 +163,9 @@ static inline unsigned exofs_io_state_si
+  */
+ #define OBJ_2BCREATED	0	/* object will be created soon*/
+ #define OBJ_CREATED	1	/* object has been created on the osd*/
++/* Below are not used atomic but reuse the same i_flags */
++#define OBJ_LAYOUT_IS_GIVEN  2  /* inode has given layouts to clients*/
++#define OBJ_IN_LAYOUT_RECALL 3  /* inode is in the middle of a layout recall*/
+ 
+ static inline int obj_2bcreated(struct exofs_i_info *oi)
+ {
+@@ -304,4 +304,20 @@ extern const struct inode_operations exo
+ extern const struct inode_operations exofs_symlink_inode_operations;
+ extern const struct inode_operations exofs_fast_symlink_inode_operations;
+ 
++/* export.c */
++typedef int (exofs_recall_fn)(struct inode *inode);
++#ifdef CONFIG_PNFSD
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo);
++void exofs_init_export(struct super_block *sb);
++#else
++static inline int exofs_inode_recall_layout(struct inode *inode,
++				enum pnfs_iomode iomode, exofs_recall_fn todo)
++{
++	return todo(inode);
++}
++
++static inline void exofs_init_export(struct super_block *sb) {}
++#endif
++
+ #endif
+diff -up linux-2.6.34.noarch/fs/exofs/export.c.orig linux-2.6.34.noarch/fs/exofs/export.c
+--- linux-2.6.34.noarch/fs/exofs/export.c.orig	2010-09-30 10:17:08.447987000 -0400
++++ linux-2.6.34.noarch/fs/exofs/export.c	2010-09-30 10:17:08.449986000 -0400
+@@ -0,0 +1,396 @@
++/*
++ * export.c - Implementation of the pnfs_export_operations
++ *
++ * Copyright (C) 2009 Panasas Inc.
++ * All rights reserved.
++ *
++ * Boaz Harrosh <bharrosh@panasas.com>
++ *
++ * This file is part of exofs.
++ *
++ * exofs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation.  Since it is based on ext2, and the only
++ * valid version of GPL for the Linux kernel is version 2, the only valid
++ * version of GPL for exofs is version 2.
++ *
++ * exofs is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with exofs; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include "exofs.h"
++
++static int exofs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_OSD2_OBJECTS;
++}
++
++static void set_dev_id(struct pnfs_deviceid *pnfs_devid, u64 sbid, u64 devid)
++{
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)pnfs_devid;
++
++	dev_id->sbid  = sbid;
++	dev_id->devid = devid;
++}
++
++static int cb_layout_recall(struct inode *inode, enum pnfs_iomode iomode,
++			    u64 offset, u64 length, void *cookie)
++{
++	struct nfsd4_pnfs_cb_layout cbl;
++	struct pnfsd_cb_ctl cb_ctl;
++	int status;
++
++	memset(&cb_ctl, 0, sizeof(cb_ctl));
++	status = pnfsd_get_cb_op(&cb_ctl);
++	if (unlikely(status)) {
++		EXOFS_ERR("%s: nfsd unloaded!! inode (0x%lx) status=%d\n",
++			  __func__, inode->i_ino, status);
++		goto err;
++	}
++
++	memset(&cbl, 0, sizeof(cbl));
++	cbl.cbl_recall_type = RETURN_FILE;
++	cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
++	cbl.cbl_seg.iomode = iomode;
++	cbl.cbl_seg.offset = offset;
++	cbl.cbl_seg.length = length;
++	cbl.cbl_cookie = cookie;
++
++	status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
++	pnfsd_put_cb_op(&cb_ctl);
++
++err:
++	return status;
++}
++
++static enum nfsstat4 exofs_layout_get(
++	struct inode *inode,
++	struct exp_xdr_stream *xdr,
++	const struct nfsd4_pnfs_layoutget_arg *args,
++	struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
++	struct exofs_layout *el = &sbi->layout;
++	struct pnfs_osd_object_cred *creds = NULL;
++	struct pnfs_osd_layout layout;
++	__be32 *start;
++	bool in_recall;
++	int i, err;
++	enum nfsstat4 nfserr;
++
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	res->lg_seg.iomode = IOMODE_RW;
++	res->lg_return_on_close = true; /* TODO: unused but will be soon */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	creds = kcalloc(el->s_numdevs, sizeof(*creds), GFP_KERNEL);
++	if (!creds) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto out;
++	}
++
++	/* Fill in a pnfs_osd_layout struct */
++	layout.olo_map = sbi->data_map;
++
++	for (i = 0; i < el->s_numdevs; i++) {
++		struct pnfs_osd_object_cred *cred = &creds[i];
++		osd_id id = exofs_oi_objno(oi);
++		unsigned dev = exofs_layout_od_id(el, id, i);
++
++		set_dev_id(&cred->oc_object_id.oid_device_id, args->lg_sbid,
++			   dev);
++		cred->oc_object_id.oid_partition_id = el->s_pid;
++		cred->oc_object_id.oid_object_id = id;
++		cred->oc_osd_version = osd_dev_is_ver1(el->s_ods[dev]) ?
++						PNFS_OSD_VERSION_1 :
++						PNFS_OSD_VERSION_2;
++		cred->oc_cap_key_sec = PNFS_OSD_CAP_KEY_SEC_NONE;
++
++		cred->oc_cap_key.cred_len	= 0;
++		cred->oc_cap_key.cred		= NULL;
++
++		cred->oc_cap.cred_len	= OSD_CAP_LEN;
++		cred->oc_cap.cred	= oi->i_cred;
++	}
++
++	layout.olo_comps_index = 0;
++	layout.olo_num_comps = el->s_numdevs;
++	layout.olo_comps = creds;
++
++	err = pnfs_osd_xdr_encode_layout(xdr, &layout);
++	if (err) {
++		nfserr = NFS4ERR_TOOSMALL; /* FIXME: Change osd_xdr error codes */
++		goto out;
++	}
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	if (!in_recall) {
++		__set_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		nfserr = NFS4_OK;
++	} else {
++		nfserr = NFS4ERR_RECALLCONFLICT;
++	}
++	spin_unlock(&oi->i_layout_lock);
++
++out:
++	kfree(creds);
++	EXOFS_DBGMSG("(0x%lx) nfserr=%u xdr_bytes=%zu\n",
++		     inode->i_ino, nfserr, exp_xdr_qbytes(xdr->p - start));
++	return nfserr;
++}
++
++/* NOTE: inode mutex must NOT be held */
++static int exofs_layout_commit(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutcommit_arg *args,
++	struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	struct timespec mtime;
++	loff_t i_size;
++	int in_recall;
++
++	/* In case of a recall we ignore the new size and mtime since they
++	 * are going to be changed again by truncate, and since we cannot take
++	 * the inode lock in that case.
++	 */
++	spin_lock(&oi->i_layout_lock);
++	in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	if (in_recall) {
++		EXOFS_DBGMSG("(0x%lx) commit was called during recall\n",
++			     inode->i_ino);
++		return 0;
++	}
++
++	/* NOTE: I would love to call inode_setattr here
++	 *	 but i cannot since this will cause an eventual vmtruncate,
++	 *	 which will cause a layout_recall. So open code the i_size
++	 *	 and mtime/atime changes under i_mutex.
++	 */
++	mutex_lock_nested(&inode->i_mutex, I_MUTEX_NORMAL);
++
++	if (args->lc_mtime.seconds) {
++		mtime.tv_sec = args->lc_mtime.seconds;
++		mtime.tv_nsec = args->lc_mtime.nseconds;
++
++		/* layout commit may only make time bigger, since there might
++		 * be reordering of the notifications and it might arrive after
++		 * A local change.
++		 * TODO: if mtime > ctime then we know set_attr did an mtime
++		 * in the future. and we can let this update through
++		 */
++		if (0 <= timespec_compare(&mtime, &inode->i_mtime))
++			mtime = inode->i_mtime;
++	} else {
++		mtime = current_fs_time(inode->i_sb);
++	}
++
++	/* TODO: Will below work? since mark_inode_dirty has it's own
++	 *       Time handling
++	 */
++	inode->i_atime = inode->i_mtime = mtime;
++
++	i_size = i_size_read(inode);
++	if (args->lc_newoffset) {
++		loff_t new_size = args->lc_last_wr + 1;
++
++		if (i_size < new_size) {
++			i_size_write(inode, i_size = new_size);
++			res->lc_size_chg = 1;
++			res->lc_newsize = new_size;
++		}
++	}
++	/* TODO: else { i_size = osd_get_object_length() } */
++
++/* TODO: exofs does not currently use the osd_xdr part of the layout_commit */
++
++	mark_inode_dirty_sync(inode);
++
++	mutex_unlock(&inode->i_mutex);
++	EXOFS_DBGMSG("(0x%lx) i_size=0x%llx lcp->off=0x%llx\n",
++		     inode->i_ino, i_size, args->lc_last_wr);
++	return 0;
++}
++
++static void exofs_handle_error(struct pnfs_osd_ioerr *ioerr)
++{
++	EXOFS_ERR("exofs_handle_error: errno=%d is_write=%d obj=0x%llx "
++		  "offset=0x%llx length=0x%llx\n",
++		  ioerr->oer_errno, ioerr->oer_iswrite,
++		  _LLU(ioerr->oer_component.oid_object_id),
++		  _LLU(ioerr->oer_comp_offset),
++		  _LLU(ioerr->oer_comp_length));
++}
++
++static int exofs_layout_return(
++	struct inode *inode,
++	const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	__be32 *p = args->lrf_body;
++	unsigned len = exp_xdr_qwords(args->lrf_body_len);
++
++	EXOFS_DBGMSG("(0x%lx) cookie %p xdr_len %d\n",
++		     inode->i_ino, args->lr_cookie, len);
++
++	while (len >= pnfs_osd_ioerr_xdr_sz()) {
++		struct pnfs_osd_ioerr ioerr;
++
++		p = pnfs_osd_xdr_decode_ioerr(&ioerr, p);
++		len -= pnfs_osd_ioerr_xdr_sz();
++		exofs_handle_error(&ioerr);
++	}
++
++	if (args->lr_cookie) {
++		struct exofs_i_info *oi = exofs_i(inode);
++		bool in_recall;
++
++		spin_lock(&oi->i_layout_lock);
++		in_recall = test_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++		__clear_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++		spin_unlock(&oi->i_layout_lock);
++
++		/* TODO: how to communicate cookie with the waiter */
++		if (in_recall)
++			wake_up(&oi->i_wq); /* wakeup any recalls */
++	}
++
++	return 0;
++}
++
++int exofs_get_device_info(struct super_block *sb, struct exp_xdr_stream *xdr,
++			  u32 layout_type,
++			  const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct exofs_sb_info *sbi = sb->s_fs_info;
++	struct pnfs_osd_deviceaddr devaddr;
++	const struct osd_dev_info *odi;
++	u64 devno = devid->devid;
++	__be32 *start;
++	int err;
++
++	memset(&devaddr, 0, sizeof(devaddr));
++
++	if (unlikely(devno >= sbi->layout.s_numdevs))
++		return -ENODEV;
++
++	odi = osduld_device_info(sbi->layout.s_ods[devno]);
++
++	devaddr.oda_systemid.len = odi->systemid_len;
++	devaddr.oda_systemid.data = (void *)odi->systemid; /* !const cast */
++
++	devaddr.oda_osdname.len = odi->osdname_len ;
++	devaddr.oda_osdname.data = (void *)odi->osdname;/* !const cast */
++
++	/* skip opaque size, will be filled-in later */
++	start = exp_xdr_reserve_qwords(xdr, 1);
++	if (!start) {
++		err = -E2BIG;
++		goto err;
++	}
++
++	err = pnfs_osd_xdr_encode_deviceaddr(xdr, &devaddr);
++	if (err)
++		goto err;
++
++	exp_xdr_encode_opaque_len(start, xdr->p);
++
++	EXOFS_DBGMSG("xdr_bytes=%Zu devno=%lld osdname-%s\n",
++		     exp_xdr_qbytes(xdr->p - start), devno, odi->osdname);
++	return 0;
++
++err:
++	EXOFS_DBGMSG("Error: err=%d at_byte=%zu\n",
++		     err, exp_xdr_qbytes(xdr->p - start));
++	return err;
++}
++
++struct pnfs_export_operations exofs_pnfs_ops = {
++	.layout_type	= exofs_layout_type,
++	.layout_get	= exofs_layout_get,
++	.layout_commit	= exofs_layout_commit,
++	.layout_return	= exofs_layout_return,
++	.get_device_info = exofs_get_device_info,
++};
++
++static bool is_layout_returned(struct exofs_i_info *oi)
++{
++	bool layout_given;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	return !layout_given;
++}
++
++int exofs_inode_recall_layout(struct inode *inode, enum pnfs_iomode iomode,
++			      exofs_recall_fn todo)
++{
++	struct exofs_i_info *oi = exofs_i(inode);
++	int layout_given;
++	int error = 0;
++
++	spin_lock(&oi->i_layout_lock);
++	layout_given = test_bit(OBJ_LAYOUT_IS_GIVEN, &oi->i_flags);
++	__set_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++
++	if (!layout_given)
++		goto exec;
++
++	for (;;) {
++		EXOFS_DBGMSG("(0x%lx) has_layout issue a recall\n",
++			     inode->i_ino);
++		error = cb_layout_recall(inode, iomode, 0, NFS4_MAX_UINT64,
++					 &oi->i_wq);
++		switch (error) {
++		case 0:
++		case -EAGAIN:
++			break;
++		case -ENOENT:
++			goto exec;
++		default:
++			goto err;
++		}
++
++		error = wait_event_interruptible(oi->i_wq,
++						 is_layout_returned(oi));
++		if (error)
++			goto err;
++	}
++
++exec:
++	error = todo(inode);
++
++err:
++	spin_lock(&oi->i_layout_lock);
++	__clear_bit(OBJ_IN_LAYOUT_RECALL, &oi->i_flags);
++	spin_unlock(&oi->i_layout_lock);
++	EXOFS_DBGMSG("(0x%lx) return=>%d\n", inode->i_ino, error);
++	return error;
++}
++
++void exofs_init_export(struct super_block *sb)
++{
++	sb->s_pnfs_op = &exofs_pnfs_ops;
++}
+diff -up linux-2.6.34.noarch/fs/exofs/inode.c.orig linux-2.6.34.noarch/fs/exofs/inode.c
+--- linux-2.6.34.noarch/fs/exofs/inode.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/inode.c	2010-09-30 10:17:08.454986000 -0400
+@@ -833,7 +833,7 @@ void exofs_truncate(struct inode *inode)
+ 	if (unlikely(wait_obj_created(oi)))
+ 		goto fail;
+ 
+-	ret = _do_truncate(inode);
++	ret = exofs_inode_recall_layout(inode, IOMODE_ANY, _do_truncate);
+ 	if (ret)
+ 		goto fail;
+ 
+@@ -964,6 +964,7 @@ static void __oi_init(struct exofs_i_inf
+ {
+ 	init_waitqueue_head(&oi->i_wq);
+ 	oi->i_flags = 0;
++	spin_lock_init(&oi->i_layout_lock);
+ }
+ /*
+  * Fill in an inode read from the OSD and set it up for use
+diff -up linux-2.6.34.noarch/fs/exofs/Kbuild.orig linux-2.6.34.noarch/fs/exofs/Kbuild
+--- linux-2.6.34.noarch/fs/exofs/Kbuild.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kbuild	2010-09-30 10:17:08.434986000 -0400
+@@ -13,4 +13,5 @@
+ #
+ 
+ exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
++exofs-$(CONFIG_PNFSD) +=  export.o
+ obj-$(CONFIG_EXOFS_FS) += exofs.o
+diff -up linux-2.6.34.noarch/fs/exofs/Kconfig.orig linux-2.6.34.noarch/fs/exofs/Kconfig
+--- linux-2.6.34.noarch/fs/exofs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/Kconfig	2010-09-30 10:17:08.438994000 -0400
+@@ -1,6 +1,7 @@
+ config EXOFS_FS
+ 	tristate "exofs: OSD based file system support"
+ 	depends on SCSI_OSD_ULD
++	select EXPORTFS_OSD_LAYOUT if PNFSD
+ 	help
+ 	  EXOFS is a file system that uses an OSD storage device,
+ 	  as its backing storage.
+diff -up linux-2.6.34.noarch/fs/exofs/super.c.orig linux-2.6.34.noarch/fs/exofs/super.c
+--- linux-2.6.34.noarch/fs/exofs/super.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exofs/super.c	2010-09-30 10:17:08.465986000 -0400
+@@ -621,6 +621,7 @@ static int exofs_fill_super(struct super
+ 	sb->s_fs_info = sbi;
+ 	sb->s_op = &exofs_sops;
+ 	sb->s_export_op = &exofs_export_ops;
++	exofs_init_export(sb);
+ 	root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
+ 	if (IS_ERR(root)) {
+ 		EXOFS_ERR("ERROR: exofs_iget failed\n");
+diff -up linux-2.6.34.noarch/fs/exportfs/expfs.c.orig linux-2.6.34.noarch/fs/exportfs/expfs.c
+--- linux-2.6.34.noarch/fs/exportfs/expfs.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/expfs.c	2010-09-30 10:17:08.489990000 -0400
+@@ -16,6 +16,13 @@
+ #include <linux/namei.h>
+ #include <linux/sched.h>
+ 
++#if defined(CONFIG_PNFSD)
++struct pnfsd_cb_ctl pnfsd_cb_ctl = {
++	.lock = __SPIN_LOCK_UNLOCKED(pnfsd_cb_ctl.lock)
++};
++EXPORT_SYMBOL(pnfsd_cb_ctl);
++#endif /* CONFIG_PNFSD */
++
+ #define dprintk(fmt, args...) do{}while(0)
+ 
+ 
+diff -up linux-2.6.34.noarch/fs/exportfs/Makefile.orig linux-2.6.34.noarch/fs/exportfs/Makefile
+--- linux-2.6.34.noarch/fs/exportfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/Makefile	2010-09-30 10:17:08.484990000 -0400
+@@ -3,4 +3,7 @@
+ 
+ obj-$(CONFIG_EXPORTFS) += exportfs.o
+ 
+-exportfs-objs := expfs.o
++exportfs-y				:= expfs.o
++exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT)	+= nfs4filelayoutxdr.o
++exportfs-$(CONFIG_EXPORTFS_OSD_LAYOUT)	+= pnfs_osd_xdr_srv.o
++exportfs-$(CONFIG_EXPORTFS_BLOCK_LAYOUT) += nfs4blocklayoutxdr.o
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c.orig	2010-09-30 10:17:08.492991000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4blocklayoutxdr.c	2010-09-30 10:17:08.494987000 -0400
+@@ -0,0 +1,158 @@
++/*
++ *  linux/fs/nfsd/nfs4blocklayoutxdr.c
++ *
++ *
++ *  Created by Rick McNeal on 3/31/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++#include <linux/module.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++static int
++bl_encode_simple(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  12 + 4 + bld->u.simple.bld_sig_len);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u32(p, 1);
++	p = exp_xdr_encode_u64(p, bld->u.simple.bld_offset);
++	exp_xdr_encode_opaque(p, bld->u.simple.bld_sig,
++			      bld->u.simple.bld_sig_len);
++
++	return 0;
++}
++
++static int
++bl_encode_slice(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2 + 2 + 1);
++
++	if (!p)
++		return -ETOOSMALL;
++
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_start);
++	p = exp_xdr_encode_u64(p, bld->u.slice.bld_len);
++	exp_xdr_encode_u32(p, bld->u.slice.bld_index);
++
++	return 0;
++}
++
++static int
++bl_encode_concat(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	return -ENOTSUPP;
++}
++
++static int
++bl_encode_stripe(struct exp_xdr_stream *xdr, pnfs_blocklayout_devinfo_t *bld)
++{
++	int i;
++	__be32 *p = exp_xdr_reserve_space(xdr,
++					  2 + 1 + bld->u.stripe.bld_stripes);
++
++	p = exp_xdr_encode_u64(p, bld->u.stripe.bld_chunk_size);
++	p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripes);
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++)
++		p = exp_xdr_encode_u32(p, bld->u.stripe.bld_stripe_indexs[i]);
++
++	return 0;
++}
++
++int
++blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			   const struct list_head *volumes)
++{
++	u32				num_vols	= 0,
++					*layoutlen_p	= xdr->p;
++	pnfs_blocklayout_devinfo_t	*bld;
++	int				status		= 0;
++	__be32 *p;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -ETOOSMALL;
++	p += 2;
++
++	/*
++	 * All simple volumes with their signature are required to be listed
++	 * first.
++	 */
++	list_for_each_entry(bld, volumes, bld_list) {
++		num_vols++;
++		p = exp_xdr_reserve_qwords(xdr, 1);
++		if (!p)
++			return -ETOOSMALL;
++		p = exp_xdr_encode_u32(p, bld->bld_type);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				status = bl_encode_simple(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_SLICE:
++				status = bl_encode_slice(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				status = bl_encode_concat(xdr, bld);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				status = bl_encode_stripe(xdr, bld);
++				break;
++			default:
++				BUG();
++		}
++		if (status)
++			goto error;
++	}
++
++	/* ---- Fill in the overall length and number of volumes ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (xdr->p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, num_vols);
++
++error:
++	return status;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_devinfo);
++
++enum nfsstat4
++blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++			  const struct list_head *bl_head)
++{
++	struct pnfs_blocklayout_layout	*b;
++	u32				*layoutlen_p	= xdr->p,
++					extents		= 0;
++	__be32 *p;
++
++	/*
++	 * Save spot for opaque block layout length and number of extents,
++	 * fill-in later.
++	 */
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return NFS4ERR_TOOSMALL;
++	p += 2;
++
++	list_for_each_entry(b, bl_head, bll_list) {
++		extents++;
++		p = exp_xdr_reserve_qwords(xdr, 5 * 2 + 1);
++		if (!p)
++			return NFS4ERR_TOOSMALL;
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.sbid);
++		p = exp_xdr_encode_u64(p, b->bll_vol_id.devid);
++		p = exp_xdr_encode_u64(p, b->bll_foff);
++		p = exp_xdr_encode_u64(p, b->bll_len);
++		p = exp_xdr_encode_u64(p, b->bll_soff);
++		p = exp_xdr_encode_u32(p, b->bll_es);
++	}
++
++	/* ---- Fill in the overall length and number of extents ---- */
++	p = exp_xdr_encode_u32(layoutlen_p, (p - layoutlen_p - 1) * 4);
++	exp_xdr_encode_u32(p, extents);
++
++	return NFS4_OK;
++}
++EXPORT_SYMBOL_GPL(blocklayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c
+--- linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c.orig	2010-09-30 10:17:08.496992000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/nfs4filelayoutxdr.c	2010-09-30 10:17:08.498993000 -0400
+@@ -0,0 +1,218 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/exp_xdr.h>
++#include <linux/module.h>
++#include <linux/nfs4.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++/* We do our-own dprintk so filesystems are not dependent on sunrpc */
++#ifdef dprintk
++#undef dprintk
++#endif
++#define dprintk(fmt, args, ...)	do { } while (0)
++
++/* Calculate the XDR length of the GETDEVICEINFO4resok structure
++ * excluding the gdir_notification and the gdir_device_addr da_layout_type.
++ */
++static int fl_devinfo_xdr_words(const struct pnfs_filelayout_device *fdev)
++{
++	struct pnfs_filelayout_devaddr *fl_addr;
++	struct pnfs_filelayout_multipath *mp;
++	int i, j, nwords;
++
++	/* da_addr_body length, indice length, indices,
++	 * multipath_list4 length */
++	nwords = 1 + 1 + fdev->fl_stripeindices_length + 1;
++	for (i = 0; i < fdev->fl_device_length; i++) {
++		mp = &fdev->fl_device_list[i];
++		nwords++; /* multipath list length */
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			fl_addr = mp->fl_multipath_list;
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_netid.len);
++			nwords += 1 + exp_xdr_qwords(fl_addr->r_addr.len);
++		}
++	}
++	dprintk("<-- %s nwords %d\n", __func__, nwords);
++	return nwords;
++}
++
++/* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++int
++filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++			  const struct pnfs_filelayout_device *fdev)
++{
++	unsigned int i, j, len = 0, opaque_words;
++	u32 *p_in;
++	u32 index_count = fdev->fl_stripeindices_length;
++	u32 dev_count = fdev->fl_device_length;
++	int error = 0;
++	__be32 *p;
++
++	opaque_words = fl_devinfo_xdr_words(fdev);
++	dprintk("%s: Begin indx_cnt: %u dev_cnt: %u total size %u\n",
++		__func__,
++		index_count,
++		dev_count,
++		opaque_words*4);
++
++	/* check space for opaque length */
++	p = p_in = exp_xdr_reserve_qwords(xdr, opaque_words);
++	if (!p) {
++		error =  -ETOOSMALL;
++		goto out;
++	}
++
++	/* Fill in length later */
++	p++;
++
++	/* encode device list indices */
++	p = exp_xdr_encode_u32(p, index_count);
++	for (i = 0; i < index_count; i++)
++		p = exp_xdr_encode_u32(p, fdev->fl_stripeindices_list[i]);
++
++	/* encode device list */
++	p = exp_xdr_encode_u32(p, dev_count);
++	for (i = 0; i < dev_count; i++) {
++		struct pnfs_filelayout_multipath *mp = &fdev->fl_device_list[i];
++
++		p = exp_xdr_encode_u32(p, mp->fl_multipath_length);
++		for (j = 0; j < mp->fl_multipath_length; j++) {
++			struct pnfs_filelayout_devaddr *da =
++						&mp->fl_multipath_list[j];
++
++			/* Encode device info */
++			p = exp_xdr_encode_opaque(p, da->r_netid.data,
++						     da->r_netid.len);
++			p = exp_xdr_encode_opaque(p, da->r_addr.data,
++						     da->r_addr.len);
++		}
++	}
++
++	/* backfill in length. Subtract 4 for da_addr_body size */
++	len = (char *)p - (char *)p_in;
++	exp_xdr_encode_u32(p_in, len - 4);
++
++	error = 0;
++out:
++	dprintk("%s: End err %d xdrlen %d\n",
++		__func__, error, len);
++	return error;
++}
++EXPORT_SYMBOL(filelayout_encode_devinfo);
++
++/* Encodes the loc_body structure from draft 13
++ * on the response stream.
++ * Use linux error codes (not nfs) since these values are being
++ * returned to the file system.
++ */
++enum nfsstat4
++filelayout_encode_layout(struct exp_xdr_stream *xdr,
++			 const struct pnfs_filelayout_layout *flp)
++{
++	u32 len = 0, nfl_util, fhlen, i;
++	u32 *layoutlen_p;
++	enum nfsstat4 nfserr;
++	__be32 *p;
++
++	dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
++		__func__,
++		flp->device_id.pnfs_fsid,
++		flp->device_id.pnfs_devid,
++		flp->lg_first_stripe_index,
++		flp->lg_fh_length);
++
++	/* Ensure file system added at least one file handle */
++	if (flp->lg_fh_length <= 0) {
++		dprintk("%s: File Layout has no file handles!!\n", __func__);
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto out;
++	}
++
++	/* Ensure room for len, devid, util, first_stripe_index,
++	 * pattern_offset, number of filehandles */
++	p = layoutlen_p = exp_xdr_reserve_qwords(xdr, 1+2+2+1+1+2+1);
++	if (!p) {
++		nfserr = NFS4ERR_TOOSMALL;
++		goto out;
++	}
++
++	/* save spot for opaque file layout length, fill-in later*/
++	p++;
++
++	/* encode device id */
++	p = exp_xdr_encode_u64(p, flp->device_id.sbid);
++	p = exp_xdr_encode_u64(p, flp->device_id.devid);
++
++	/* set and encode flags */
++	nfl_util = flp->lg_stripe_unit;
++	if (flp->lg_commit_through_mds)
++		nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
++	if (flp->lg_stripe_type == STRIPE_DENSE)
++		nfl_util |= NFL4_UFLG_DENSE;
++	p = exp_xdr_encode_u32(p, nfl_util);
++
++	/* encode first stripe index */
++	p = exp_xdr_encode_u32(p, flp->lg_first_stripe_index);
++
++	/* encode striping pattern start */
++	p = exp_xdr_encode_u64(p, flp->lg_pattern_offset);
++
++	/* encode number of file handles */
++	p = exp_xdr_encode_u32(p, flp->lg_fh_length);
++
++	/* encode file handles */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		fhlen = flp->lg_fh_list[i].fh_size;
++		p = exp_xdr_reserve_space(xdr, 4 + fhlen);
++		if (!p) {
++			nfserr = NFS4ERR_TOOSMALL;
++			goto out;
++		}
++		p = exp_xdr_encode_opaque(p, &flp->lg_fh_list[i].fh_base, fhlen);
++	}
++
++	/* Set number of bytes encoded =  total_bytes_encoded - length var */
++	len = (char *)p - (char *)layoutlen_p;
++	exp_xdr_encode_u32(layoutlen_p, len - 4);
++
++	nfserr = NFS4_OK;
++out:
++	dprintk("%s: End err %u xdrlen %d\n",
++		__func__, nfserr, len);
++	return nfserr;
++}
++EXPORT_SYMBOL(filelayout_encode_layout);
+diff -up linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c
+--- linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c.orig	2010-09-30 10:17:08.501989000 -0400
++++ linux-2.6.34.noarch/fs/exportfs/pnfs_osd_xdr_srv.c	2010-09-30 10:17:08.503988000 -0400
+@@ -0,0 +1,289 @@
++/*
++ *  pnfs_osd_xdr_enc.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static int pnfs_osd_xdr_encode_data_map(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_data_map *data_map)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 1+2+1+1+1+1);
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, data_map->odm_num_comps);
++	p = exp_xdr_encode_u64(p, data_map->odm_stripe_unit);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_width);
++	p = exp_xdr_encode_u32(p, data_map->odm_group_depth);
++	p = exp_xdr_encode_u32(p, data_map->odm_mirror_cnt);
++	p = exp_xdr_encode_u32(p, data_map->odm_raid_algorithm);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline int pnfs_osd_xdr_encode_objid(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_objid *object_id)
++{
++	__be32 *p = exp_xdr_reserve_qwords(xdr, 2+2+2+2);
++	struct nfsd4_pnfs_deviceid *dev_id =
++		(struct nfsd4_pnfs_deviceid *)&object_id->oid_device_id;
++
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u64(p, dev_id->sbid);
++	p = exp_xdr_encode_u64(p, dev_id->devid);
++	p = exp_xdr_encode_u64(p, object_id->oid_partition_id);
++	p = exp_xdr_encode_u64(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * enum pnfs_osd_cap_key_sec4 {
++ * 	PNFS_OSD_CAP_KEY_SEC_NONE = 0,
++ * 	PNFS_OSD_CAP_KEY_SEC_SSV  = 1
++ * };
++ *
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static int pnfs_osd_xdr_encode_object_cred(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_object_cred *olo_comp)
++{
++	__be32 *p;
++	int err;
++
++	err = pnfs_osd_xdr_encode_objid(xdr, &olo_comp->oc_object_id);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_space(xdr, 3*4 + 4+olo_comp->oc_cap.cred_len);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, olo_comp->oc_osd_version);
++
++	/* No sec for now */
++	p = exp_xdr_encode_u32(p, PNFS_OSD_CAP_KEY_SEC_NONE);
++	p = exp_xdr_encode_u32(p, 0); /* opaque oc_capability_key<> */
++
++	exp_xdr_encode_opaque(p, olo_comp->oc_cap.cred,
++			      olo_comp->oc_cap.cred_len);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_layout {
++ * 	struct pnfs_osd_data_map	olo_map;
++ * 	u32				olo_comps_index;
++ * 	u32				olo_num_comps;
++ * 	struct pnfs_osd_object_cred	*olo_comps;
++ * };
++ */
++int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *pol)
++{
++	__be32 *p;
++	u32 i;
++	int err;
++
++	err = pnfs_osd_xdr_encode_data_map(xdr, &pol->olo_map);
++	if (err)
++		return err;
++
++	p = exp_xdr_reserve_qwords(xdr, 2);
++	if (!p)
++		return -E2BIG;
++
++	p = exp_xdr_encode_u32(p, pol->olo_comps_index);
++	p = exp_xdr_encode_u32(p, pol->olo_num_comps);
++
++	for (i = 0; i < pol->olo_num_comps; i++) {
++		err = pnfs_osd_xdr_encode_object_cred(xdr, &pol->olo_comps[i]);
++		if (err)
++			return err;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_layout);
++
++static int _encode_string(struct exp_xdr_stream *xdr,
++			  const struct nfs4_string *str)
++{
++	__be32 *p = exp_xdr_reserve_space(xdr, 4 + str->len);
++
++	if (!p)
++		return -E2BIG;
++	exp_xdr_encode_opaque(p, str->data, str->len);
++	return 0;
++}
++
++/* struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr)
++{
++	__be32 *p;
++	int err;
++
++	p = exp_xdr_reserve_space(xdr, 4 + 4 + sizeof(devaddr->oda_lun));
++	if (!p)
++		return -E2BIG;
++
++	/* Empty oda_targetid */
++	p = exp_xdr_encode_u32(p, OBJ_TARGET_ANON);
++
++	/* Empty oda_targetaddr for now */
++	p = exp_xdr_encode_u32(p, 0);
++
++	/* oda_lun */
++	exp_xdr_encode_bytes(p, devaddr->oda_lun, sizeof(devaddr->oda_lun));
++
++	err = _encode_string(xdr, &devaddr->oda_systemid);
++	if (err)
++		return err;
++
++	err = pnfs_osd_xdr_encode_object_cred(xdr,
++					      &devaddr->oda_root_obj_cred);
++	if (err)
++		return err;
++
++	err = _encode_string(xdr, &devaddr->oda_osdname);
++	if (err)
++		return err;
++
++	return 0;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_encode_deviceaddr);
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p)
++{
++	lou->dsu_valid = be32_to_cpu(*p++);
++	if (lou->dsu_valid)
++		p = xdr_decode_hyper(p, &lou->dsu_delta);
++	lou->olu_ioerr_flag = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_layoutupdate);
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline __be32 *
++pnfs_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
++{
++	/* FIXME: p = xdr_decode_fixed(...) */
++	memcpy(objid->oid_device_id.data, p, sizeof(objid->oid_device_id.data));
++	p += XDR_QUADLEN(sizeof(objid->oid_device_id.data));
++
++	p = xdr_decode_hyper(p, &objid->oid_partition_id);
++	p = xdr_decode_hyper(p, &objid->oid_object_id);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++__be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p)
++{
++	p = pnfs_osd_xdr_decode_objid(p, &ioerr->oer_component);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_offset);
++	p = xdr_decode_hyper(p, &ioerr->oer_comp_length);
++	ioerr->oer_iswrite = be32_to_cpu(*p++);
++	ioerr->oer_errno = be32_to_cpu(*p++);
++	return p;
++}
++EXPORT_SYMBOL(pnfs_osd_xdr_decode_ioerr);
+diff -up linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig linux-2.6.34.noarch/fs/gfs2/ops_fstype.c
+--- linux-2.6.34.noarch/fs/gfs2/ops_fstype.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/gfs2/ops_fstype.c	2010-09-30 10:17:08.509988000 -0400
+@@ -19,6 +19,7 @@
+ #include <linux/gfs2_ondisk.h>
+ #include <linux/slow-work.h>
+ #include <linux/quotaops.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "gfs2.h"
+ #include "incore.h"
+@@ -1146,6 +1147,9 @@ static int fill_super(struct super_block
+ 	sb->s_magic = GFS2_MAGIC;
+ 	sb->s_op = &gfs2_super_ops;
+ 	sb->s_export_op = &gfs2_export_ops;
++#if defined(CONFIG_PNFSD)
++	sb->s_pnfs_op = &pnfs_dlm_export_ops;
++#endif /* CONFIG_PNFSD */
+ 	sb->s_xattr = gfs2_xattr_handlers;
+ 	sb->s_qcop = &gfs2_quotactl_ops;
+ 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+diff -up linux-2.6.34.noarch/fs/Kconfig.orig linux-2.6.34.noarch/fs/Kconfig
+--- linux-2.6.34.noarch/fs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/Kconfig	2010-09-30 10:17:08.428989000 -0400
+@@ -224,6 +224,31 @@ config LOCKD_V4
+ config EXPORTFS
+ 	tristate
+ 
++config EXPORTFS_FILE_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 files layout type.
++	  Must be automatically selected by supporting filesystems.
++
++config EXPORTFS_OSD_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 objects layout type.
++	  Must be automatically selected by supporting osd
++	  filesystems.
++
++	  If unsure, say N.
++
++config EXPORTFS_BLOCK_LAYOUT
++	bool
++	depends on PNFSD && EXPORTFS
++	help
++	  Exportfs support for the NFSv4.1 blocks layout type.
++	  Must be automatically selected by supporting filesystems.
++
++
+ config NFS_ACL_SUPPORT
+ 	tristate
+ 	select FS_POSIX_ACL
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c.orig	2010-09-30 10:17:08.528988000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/block-device-discovery-pipe.c	2010-09-30 10:17:08.529994000 -0400
+@@ -0,0 +1,66 @@
++#include <linux/module.h>
++#include <linux/uaccess.h>
++#include <linux/proc_fs.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ctype.h>
++#include <linux/sched.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY NFSDBG_PNFS_LD
++
++struct pipefs_list bl_device_list;
++struct dentry *bl_device_pipe;
++
++ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t len)
++{
++	int err;
++	struct pipefs_hdr *msg;
++
++	dprintk("Entering %s...\n", __func__);
++
++	msg = pipefs_readmsg(filp, src, len);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: unable to read pipefs message.\n");
++		return PTR_ERR(msg);
++	}
++
++	/* now assign the result, which wakes the blocked thread */
++	err = pipefs_assign_upcall_reply(msg, &bl_device_list);
++	if (err) {
++		dprintk("ERROR: failed to assign upcall with id %u\n",
++			msg->msgid);
++		kfree(msg);
++	}
++	return len;
++}
++
++static const struct rpc_pipe_ops bl_pipe_ops = {
++	.upcall         = pipefs_generic_upcall,
++	.downcall       = bl_pipe_downcall,
++	.destroy_msg    = pipefs_generic_destroy_msg,
++};
++
++int bl_pipe_init(void)
++{
++	dprintk("%s: block_device pipefs registering...\n", __func__);
++	bl_device_pipe = pipefs_mkpipe("bl_device_pipe", &bl_pipe_ops, 1);
++	if (IS_ERR(bl_device_pipe))
++		dprintk("ERROR, unable to make block_device pipe\n");
++
++	if (!bl_device_pipe)
++		dprintk("bl_device_pipe is NULL!\n");
++	else
++	dprintk("bl_device_pipe created!\n");
++	pipefs_init_list(&bl_device_list);
++	return 0;
++}
++
++void bl_pipe_exit(void)
++{
++	dprintk("%s: block_device pipefs unregistering...\n", __func__);
++	if (IS_ERR(bl_device_pipe))
++		return ;
++	pipefs_closepipe(bl_device_pipe);
++	return;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c.orig	2010-09-30 10:17:08.533988000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.c	2010-09-30 10:17:08.535989000 -0400
+@@ -0,0 +1,1160 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++
++#include <linux/buffer_head.h> /* various write calls */
++#include <linux/bio.h> /* struct bio */
++#include <linux/vmalloc.h>
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
++
++/* Callback operations to the pNFS client */
++static struct pnfs_client_operations *pnfs_block_callback_ops;
++
++static void print_page(struct page *page)
++{
++	dprintk("PRINTPAGE page %p\n", page);
++	dprintk("        PagePrivate %d\n", PagePrivate(page));
++	dprintk("        PageUptodate %d\n", PageUptodate(page));
++	dprintk("        PageError %d\n", PageError(page));
++	dprintk("        PageDirty %d\n", PageDirty(page));
++	dprintk("        PageReferenced %d\n", PageReferenced(page));
++	dprintk("        PageLocked %d\n", PageLocked(page));
++	dprintk("        PageWriteback %d\n", PageWriteback(page));
++	dprintk("        PageMappedToDisk %d\n", PageMappedToDisk(page));
++	dprintk("\n");
++}
++
++/* Given the be associated with isect, determine if page data needs to be
++ * initialized.
++ */
++static int is_hole(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_NONE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return !is_sector_initialized(be->be_inval, isect);
++}
++
++/* Given the be associated with isect, determine if page data can be
++ * written to disk.
++ */
++static int is_writable(struct pnfs_block_extent *be, sector_t isect)
++{
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA)
++		return 1;
++	else if (be->be_state != PNFS_BLOCK_INVALID_DATA)
++		return 0;
++	else
++		return is_sector_initialized(be->be_inval, isect);
++}
++
++static int
++dont_like_caller(struct nfs_page *req)
++{
++	if (atomic_read(&req->wb_complete)) {
++		/* Called by _multi */
++		return 1;
++	} else {
++		/* Called by _one */
++		return 0;
++	}
++}
++
++static enum pnfs_try_status
++bl_commit(struct nfs_write_data *nfs_data,
++	  int sync)
++{
++	dprintk("%s enter\n", __func__);
++	return PNFS_NOT_ATTEMPTED;
++}
++
++/* The data we are handed might be spread across several bios.  We need
++ * to track when the last one is finished.
++ */
++struct parallel_io {
++	struct kref refcnt;
++	struct rpc_call_ops call_ops;
++	void (*pnfs_callback) (void *data);
++	void *data;
++};
++
++static inline struct parallel_io *alloc_parallel(void *data)
++{
++	struct parallel_io *rv;
++
++	rv  = kmalloc(sizeof(*rv), GFP_KERNEL);
++	if (rv) {
++		rv->data = data;
++		kref_init(&rv->refcnt);
++	}
++	return rv;
++}
++
++static inline void get_parallel(struct parallel_io *p)
++{
++	kref_get(&p->refcnt);
++}
++
++static void destroy_parallel(struct kref *kref)
++{
++	struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
++
++	dprintk("%s enter\n", __func__);
++	p->pnfs_callback(p->data);
++	kfree(p);
++}
++
++static inline void put_parallel(struct parallel_io *p)
++{
++	kref_put(&p->refcnt, destroy_parallel);
++}
++
++static struct bio *
++bl_submit_bio(int rw, struct bio *bio)
++{
++	if (bio) {
++		get_parallel(bio->bi_private);
++		dprintk("%s submitting %s bio %u@%llu\n", __func__,
++			rw == READ ? "read" : "write",
++			bio->bi_size, (u64)bio->bi_sector);
++		submit_bio(rw, bio);
++	}
++	return NULL;
++}
++
++static inline void
++bl_done_with_rpage(struct page *page, const int ok)
++{
++	if (ok) {
++		ClearPagePnfsErr(page);
++		SetPageUptodate(page);
++	} else {
++		ClearPageUptodate(page);
++		SetPageError(page);
++		SetPagePnfsErr(page);
++	}
++	/* Page is unlocked via rpc_release.  Should really be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_read(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_rpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++static void bl_read_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	pnfs_block_callback_ops->nfs_readlist_complete(rdata);
++}
++
++static void
++bl_end_par_io_read(void *data)
++{
++	struct nfs_read_data *rdata = data;
++
++	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
++	schedule_work(&rdata->task.u.tk_work);
++}
++
++/* We don't want normal .rpc_call_done callback used, so we replace it
++ * with this stub.
++ */
++static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
++{
++	return;
++}
++
++static enum pnfs_try_status
++bl_read_pagelist(struct nfs_read_data *rdata,
++		 unsigned nr_pages)
++{
++	int i, hole;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t f_offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct page **pages = rdata->args.pages;
++	int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__,
++	       nr_pages, f_offset, count);
++
++	if (dont_like_caller(rdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		goto use_mds;
++	}
++	if ((nr_pages == 1) && PagePnfsErr(rdata->req->wb_page)) {
++		/* We want to fall back to mds in case of read_page
++		 * after error on read_pages.
++		 */
++		dprintk("%s PG_pnfserr set\n", __func__);
++		goto use_mds;
++	}
++	par = alloc_parallel(rdata);
++	if (!par)
++		goto use_mds;
++	par->call_ops = *rdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_read;
++	/* At this point, we can no longer jump to use_mds */
++
++	isect = (sector_t) (f_offset >> 9);
++	/* Code assumes extents are page-aligned */
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			put_extent(cow_read);
++			bio = bl_submit_bio(READ, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(rdata->pdata.lseg),
++					     isect, &cow_read);
++			if (!be) {
++				/* Error out this page */
++				bl_done_with_rpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++			if (cow_read) {
++				sector_t cow_length = cow_read->be_length -
++					(isect - cow_read->be_f_offset);
++				extent_length = min(extent_length, cow_length);
++			}
++		}
++		hole = is_hole(be, isect);
++		if (hole && !cow_read) {
++			bio = bl_submit_bio(READ, bio);
++			/* Fill hole w/ zeroes w/o accessing device */
++			dprintk("%s Zeroing page for hole\n", __func__);
++			zero_user(pages[i], 0,
++				  min_t(int, PAGE_CACHE_SIZE, count));
++			print_page(pages[i]);
++			bl_done_with_rpage(pages[i], 1);
++		} else {
++			struct pnfs_block_extent *be_read;
++
++			be_read = (hole && cow_read) ? cow_read : be;
++			for (;;) {
++				if (!bio) {
++					bio = bio_alloc(GFP_NOIO, nr_pages - i);
++					if (!bio) {
++						/* Error out this page */
++						bl_done_with_rpage(pages[i], 0);
++						break;
++					}
++					bio->bi_sector = isect -
++						be_read->be_f_offset +
++						be_read->be_v_offset;
++					bio->bi_bdev = be_read->be_mdev;
++					bio->bi_end_io = bl_end_io_read;
++					bio->bi_private = par;
++				}
++				if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++					break;
++				bio = bl_submit_bio(READ, bio);
++			}
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	if ((isect << 9) >= rdata->inode->i_size) {
++		rdata->res.eof = 1;
++		rdata->res.count = rdata->inode->i_size - f_offset;
++	} else {
++		rdata->res.count = (isect << 9) - f_offset;
++	}
++	put_extent(be);
++	put_extent(cow_read);
++	bl_submit_bio(READ, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++
++ use_mds:
++	dprintk("Giving up and using normal NFS\n");
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static void mark_extents_written(struct pnfs_block_layout *bl,
++				 __u64 offset, __u32 count)
++{
++	sector_t isect, end;
++	struct pnfs_block_extent *be;
++
++	dprintk("%s(%llu, %u)\n", __func__, offset, count);
++	if (count == 0)
++		return;
++	isect = (offset & (long)(PAGE_CACHE_MASK)) >> 9;
++	end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK);
++	end >>= 9;
++	while (isect < end) {
++		sector_t len;
++		be = find_get_extent(bl, isect, NULL);
++		BUG_ON(!be); /* FIXME */
++		len = min(end, be->be_f_offset + be->be_length) - isect;
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			mark_for_commit(be, isect, len); /* What if fails? */
++		isect += len;
++		put_extent(be);
++	}
++}
++
++/* STUB - this needs thought */
++static inline void
++bl_done_with_wpage(struct page *page, const int ok)
++{
++	if (!ok) {
++		SetPageError(page);
++		SetPagePnfsErr(page);
++		/* This is an inline copy of nfs_zap_mapping */
++		/* This is oh so fishy, and needs deep thought */
++		if (page->mapping->nrpages != 0) {
++			struct inode *inode = page->mapping->host;
++			spin_lock(&inode->i_lock);
++			NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
++			spin_unlock(&inode->i_lock);
++		}
++	}
++	/* end_page_writeback called in rpc_release.  Should be done here. */
++}
++
++/* This is basically copied from mpage_end_io_read */
++static void bl_end_io_write(struct bio *bio, int err)
++{
++	void *data = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++		bl_done_with_wpage(page, uptodate);
++	} while (bvec >= bio->bi_io_vec);
++	bio_put(bio);
++	put_parallel(data);
++}
++
++/* Function scheduled for call during bl_end_par_io_write,
++ * it marks sectors as written and extends the commitlist.
++ */
++static void bl_write_cleanup(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	if (!wdata->task.tk_status) {
++		/* Marks for LAYOUTCOMMIT */
++		/* BUG - this should be called after each bio, not after
++		 * all finish, unless have some way of storing success/failure
++		 */
++		mark_extents_written(BLK_LSEG2EXT(wdata->pdata.lseg),
++				     wdata->args.offset, wdata->args.count);
++	}
++	pnfs_block_callback_ops->nfs_writelist_complete(wdata);
++}
++
++/* Called when last of bios associated with a bl_write_pagelist call finishes */
++static void
++bl_end_par_io_write(void *data)
++{
++	struct nfs_write_data *wdata = data;
++
++	/* STUB - ignoring error handling */
++	wdata->task.tk_status = 0;
++	wdata->verf.committed = NFS_FILE_SYNC;
++	INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
++	schedule_work(&wdata->task.u.tk_work);
++}
++
++static enum pnfs_try_status
++bl_write_pagelist(struct nfs_write_data *wdata,
++		  unsigned nr_pages,
++		  int sync)
++{
++	int i;
++	struct bio *bio = NULL;
++	struct pnfs_block_extent *be = NULL;
++	sector_t isect, extent_length = 0;
++	struct parallel_io *par;
++	loff_t offset = wdata->args.offset;
++	size_t count = wdata->args.count;
++	struct page **pages = wdata->args.pages;
++	int pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
++
++	dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
++	if (!wdata->req->wb_lseg) {
++		dprintk("%s no lseg, falling back to MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	if (dont_like_caller(wdata->req)) {
++		dprintk("%s dont_like_caller failed\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	/* At this point, wdata->pages is a (sequential) list of nfs_pages.
++	 * We want to write each, and if there is an error remove it from
++	 * list and call
++	 * nfs_retry_request(req) to have it redone using nfs.
++	 * QUEST? Do as block or per req?  Think have to do per block
++	 * as part of end_bio
++	 */
++	par = alloc_parallel(wdata);
++	if (!par)
++		return PNFS_NOT_ATTEMPTED;
++	par->call_ops = *wdata->pdata.call_ops;
++	par->call_ops.rpc_call_done = bl_rpc_do_nothing;
++	par->pnfs_callback = bl_end_par_io_write;
++	/* At this point, have to be more careful with error handling */
++
++	isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> 9);
++	for (i = pg_index; i < nr_pages; i++) {
++		if (!extent_length) {
++			/* We've used up the previous extent */
++			put_extent(be);
++			bio = bl_submit_bio(WRITE, bio);
++			/* Get the next one */
++			be = find_get_extent(BLK_LSEG2EXT(wdata->pdata.lseg),
++					     isect, NULL);
++			if (!be || !is_writable(be, isect)) {
++				/* FIXME */
++				bl_done_with_wpage(pages[i], 0);
++				break;
++			}
++			extent_length = be->be_length -
++				(isect - be->be_f_offset);
++		}
++		for (;;) {
++			if (!bio) {
++				bio = bio_alloc(GFP_NOIO, nr_pages - i);
++				if (!bio) {
++					/* Error out this page */
++					/* FIXME */
++					bl_done_with_wpage(pages[i], 0);
++					break;
++				}
++				bio->bi_sector = isect - be->be_f_offset +
++					be->be_v_offset;
++				bio->bi_bdev = be->be_mdev;
++				bio->bi_end_io = bl_end_io_write;
++				bio->bi_private = par;
++			}
++			if (bio_add_page(bio, pages[i], PAGE_SIZE, 0))
++				break;
++			bio = bl_submit_bio(WRITE, bio);
++		}
++		isect += PAGE_CACHE_SIZE >> 9;
++		extent_length -= PAGE_CACHE_SIZE >> 9;
++	}
++	wdata->res.count = (isect << 9) - (offset & (long)PAGE_CACHE_MASK);
++	put_extent(be);
++	bl_submit_bio(WRITE, bio);
++	put_parallel(par);
++	return PNFS_ATTEMPTED;
++}
++
++/* FIXME - range ignored */
++static void
++release_extents(struct pnfs_block_layout *bl,
++		struct pnfs_layout_range *range)
++{
++	int i;
++	struct pnfs_block_extent *be;
++
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		while (!list_empty(&bl->bl_extents[i])) {
++			be = list_first_entry(&bl->bl_extents[i],
++					      struct pnfs_block_extent,
++					      be_node);
++			list_del(&be->be_node);
++			put_extent(be);
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++}
++
++static void
++release_inval_marks(struct pnfs_inval_markings *marks)
++{
++	struct pnfs_inval_tracking *pos, *temp;
++
++	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
++		list_del(&pos->it_link);
++		kfree(pos);
++	}
++	return;
++}
++
++/* Note we are relying on caller locking to prevent nasty races. */
++static void
++bl_free_layout(struct pnfs_layout_hdr *lo)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++
++	dprintk("%s enter\n", __func__);
++	release_extents(bl, NULL);
++	release_inval_marks(&bl->bl_inval);
++	kfree(bl);
++}
++
++static struct pnfs_layout_hdr *
++bl_alloc_layout(struct inode *inode)
++{
++	struct pnfs_block_layout	*bl;
++
++	dprintk("%s enter\n", __func__);
++	bl = kzalloc(sizeof(*bl), GFP_KERNEL);
++	if (!bl)
++		return NULL;
++	spin_lock_init(&bl->bl_ext_lock);
++	INIT_LIST_HEAD(&bl->bl_extents[0]);
++	INIT_LIST_HEAD(&bl->bl_extents[1]);
++	INIT_LIST_HEAD(&bl->bl_commit);
++	bl->bl_count = 0;
++	bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> 9;
++	INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize);
++	return &bl->bl_layout;
++}
++
++static void
++bl_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter\n", __func__);
++	kfree(lseg);
++}
++
++/* Because the generic infrastructure does not correctly merge layouts,
++ * we pretty much ignore lseg, and store all data layout wide, so we
++ * can correctly merge.  Eventually we should push some correct merge
++ * behavior up to the generic code, as the current behavior tends to
++ * cause lots of unnecessary overlapping LAYOUTGET requests.
++ */
++static struct pnfs_layout_segment *
++bl_alloc_lseg(struct pnfs_layout_hdr *lo,
++	      struct nfs4_layoutget_res *lgr)
++{
++	struct pnfs_layout_segment *lseg;
++	int status;
++
++	dprintk("%s enter\n", __func__);
++	lseg = kzalloc(sizeof(*lseg) + 0, GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++	status = nfs4_blk_process_layoutget(lo, lgr);
++	if (status) {
++		/* We don't want to call the full-blown bl_free_lseg,
++		 * since on error extents were not touched.
++		 */
++		/* STUB - we really want to distinguish between 2 error
++		 * conditions here.  This lseg failed, but lo data structures
++		 * are OK, or we hosed the lo data structures.  The calling
++		 * code probably needs to distinguish this too.
++		 */
++		kfree(lseg);
++		return ERR_PTR(status);
++	}
++	return lseg;
++}
++
++static int
++bl_setup_layoutcommit(struct pnfs_layout_hdr *lo,
++		      struct nfs4_layoutcommit_args *arg)
++{
++	struct nfs_server *nfss = PNFS_NFS_SERVER(lo);
++	struct bl_layoutupdate_data *layoutupdate_data;
++
++	dprintk("%s enter\n", __func__);
++	/* Need to ensure commit is block-size aligned */
++	if (nfss->pnfs_blksize) {
++		u64 mask = nfss->pnfs_blksize - 1;
++		u64 offset = arg->range.offset & mask;
++
++		arg->range.offset -= offset;
++		arg->range.length += offset + mask;
++		arg->range.length &= ~mask;
++	}
++
++	layoutupdate_data = kmalloc(sizeof(struct bl_layoutupdate_data),
++					 GFP_KERNEL);
++	if (unlikely(!layoutupdate_data))
++		return -ENOMEM;
++	INIT_LIST_HEAD(&layoutupdate_data->ranges);
++	arg->layoutdriver_data = layoutupdate_data;
++
++	return 0;
++}
++
++static void
++bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
++		       const struct nfs4_layoutcommit_args *arg)
++{
++	dprintk("%s enter\n", __func__);
++	encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg);
++}
++
++static void
++bl_cleanup_layoutcommit(struct pnfs_layout_hdr *lo,
++			struct nfs4_layoutcommit_args *arg, int status)
++{
++	dprintk("%s enter\n", __func__);
++	clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), arg, status);
++	kfree(arg->layoutdriver_data);
++}
++
++static void free_blk_mountid(struct block_mount_id *mid)
++{
++	if (mid) {
++		struct pnfs_block_dev *dev;
++		spin_lock(&mid->bm_lock);
++		while (!list_empty(&mid->bm_devlist)) {
++			dev = list_first_entry(&mid->bm_devlist,
++					       struct pnfs_block_dev,
++					       bm_node);
++			list_del(&dev->bm_node);
++			free_block_dev(dev);
++		}
++		spin_unlock(&mid->bm_lock);
++		kfree(mid);
++	}
++}
++
++/* This is mostly copied form the filelayout's get_device_info function.
++ * It seems much of this should be at the generic pnfs level.
++ */
++static struct pnfs_block_dev *
++nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
++			struct pnfs_deviceid *d_id,
++			struct list_head *sdlist)
++{
++	struct pnfs_device *dev;
++	struct pnfs_block_dev *rv = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	int i, rc;
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s max_resp_sz %u max_pages %d\n",
++		__func__, max_resp_sz, max_pages);
++
++	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++	if (!dev) {
++		dprintk("%s kmalloc failed\n", __func__);
++		return NULL;
++	}
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(dev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set dev->area */
++	dev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!dev->area)
++		goto out_free;
++
++	memcpy(&dev->dev_id, d_id, sizeof(*d_id));
++	dev->layout_type = LAYOUT_BLOCK_VOLUME;
++	dev->dev_notify_types = 0;
++	dev->pages = pages;
++	dev->pgbase = 0;
++	dev->pglen = PAGE_SIZE * max_pages;
++	dev->mincount = 0;
++
++	dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data);
++	rc = pnfs_block_callback_ops->nfs_getdeviceinfo(server, dev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	rv = nfs4_blk_decode_device(server, dev, sdlist);
++ out_free:
++	if (dev->area != NULL)
++		vunmap(dev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(dev);
++	return rv;
++}
++
++
++/*
++ * Retrieve the list of available devices for the mountpoint.
++ */
++static int
++bl_initialize_mountpoint(struct nfs_server *server, const struct nfs_fh *fh)
++{
++	struct block_mount_id *b_mt_id = NULL;
++	struct pnfs_mount_type *mtype = NULL;
++	struct pnfs_devicelist *dlist = NULL;
++	struct pnfs_block_dev *bdev;
++	LIST_HEAD(block_disklist);
++	int status = 0, i;
++
++	dprintk("%s enter\n", __func__);
++
++	if (server->pnfs_blksize == 0) {
++		dprintk("%s Server did not return blksize\n", __func__);
++		return -EINVAL;
++	}
++	b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_KERNEL);
++	if (!b_mt_id) {
++		status = -ENOMEM;
++		goto out_error;
++	}
++	/* Initialize nfs4 block layout mount id */
++	spin_lock_init(&b_mt_id->bm_lock);
++	INIT_LIST_HEAD(&b_mt_id->bm_devlist);
++
++	dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_KERNEL);
++	if (!dlist)
++		goto out_error;
++	dlist->eof = 0;
++	while (!dlist->eof) {
++		status = pnfs_block_callback_ops->nfs_getdevicelist(
++							server, fh, dlist);
++		if (status)
++			goto out_error;
++		dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n",
++			__func__, dlist->num_devs, dlist->eof);
++		/* For each device returned in dlist, call GETDEVICEINFO, and
++		 * decode the opaque topology encoding to create a flat
++		 * volume topology, matching VOLUME_SIMPLE disk signatures
++		 * to disks in the visible block disk list.
++		 * Construct an LVM meta device from the flat volume topology.
++		 */
++		for (i = 0; i < dlist->num_devs; i++) {
++			bdev = nfs4_blk_get_deviceinfo(server, fh,
++						     &dlist->dev_id[i],
++						     &block_disklist);
++			if (!bdev)
++				goto out_error;
++			spin_lock(&b_mt_id->bm_lock);
++			list_add(&bdev->bm_node, &b_mt_id->bm_devlist);
++			spin_unlock(&b_mt_id->bm_lock);
++		}
++	}
++	dprintk("%s SUCCESS\n", __func__);
++	server->pnfs_ld_data = b_mt_id;
++
++ out_return:
++	kfree(dlist);
++	return status;
++
++ out_error:
++	free_blk_mountid(b_mt_id);
++	kfree(mtype);
++	goto out_return;
++}
++
++static int
++bl_uninitialize_mountpoint(struct nfs_server *server)
++{
++	struct block_mount_id *b_mt_id = server->pnfs_ld_data;
++
++	dprintk("%s enter\n", __func__);
++	free_blk_mountid(b_mt_id);
++	dprintk("%s RETURNS\n", __func__);
++	return 0;
++}
++
++/* STUB - mark intersection of layout and page as bad, so is not
++ * used again.
++ */
++static void mark_bad_read(void)
++{
++	return;
++}
++
++/* Copied from buffer.c */
++static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
++{
++	if (uptodate) {
++		set_buffer_uptodate(bh);
++	} else {
++		/* This happens, due to failed READA attempts. */
++		clear_buffer_uptodate(bh);
++	}
++	unlock_buffer(bh);
++}
++
++/* Copied from buffer.c */
++static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
++{
++	__end_buffer_read_notouch(bh, uptodate);
++}
++
++/*
++ * map_block:  map a requested I/0 block (isect) into an offset in the LVM
++ * meta block_device
++ */
++static void
++map_block(sector_t isect, struct pnfs_block_extent *be, struct buffer_head *bh)
++{
++	dprintk("%s enter be=%p\n", __func__, be);
++
++	set_buffer_mapped(bh);
++	bh->b_bdev = be->be_mdev;
++	bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >>
++		(be->be_mdev->bd_inode->i_blkbits - 9);
++
++	dprintk("%s isect %ld, bh->b_blocknr %ld, using bsize %Zd\n",
++				__func__, (long)isect,
++				(long)bh->b_blocknr,
++				bh->b_size);
++	return;
++}
++
++/* Given an unmapped page, zero it (or read in page for COW),
++ * and set appropriate flags/markings, but it is safe to not initialize
++ * the range given in [from, to).
++ */
++/* This is loosely based on nobh_write_begin */
++static int
++init_page_for_write(struct pnfs_block_layout *bl, struct page *page,
++		    unsigned from, unsigned to, sector_t **pages_to_mark)
++{
++	struct buffer_head *bh;
++	int inval, ret = -EIO;
++	struct pnfs_block_extent *be = NULL, *cow_read = NULL;
++	sector_t isect;
++
++	dprintk("%s enter, %p\n", __func__, page);
++	bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0);
++	if (!bh) {
++		ret = -ENOMEM;
++		goto cleanup;
++	}
++
++	isect = (sector_t)page->index << (PAGE_CACHE_SHIFT - 9);
++	be = find_get_extent(bl, isect, &cow_read);
++	if (!be)
++		goto cleanup;
++	inval = is_hole(be, isect);
++	dprintk("%s inval=%i, from=%u, to=%u\n", __func__, inval, from, to);
++	if (inval) {
++		if (be->be_state == PNFS_BLOCK_NONE_DATA) {
++			dprintk("%s PANIC - got NONE_DATA extent %p\n",
++				__func__, be);
++			goto cleanup;
++		}
++		map_block(isect, be, bh);
++		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
++	}
++	if (PageUptodate(page)) {
++		/* Do nothing */
++	} else if (inval & !cow_read) {
++		zero_user_segments(page, 0, from, to, PAGE_CACHE_SIZE);
++	} else if (0 < from || PAGE_CACHE_SIZE > to) {
++		struct pnfs_block_extent *read_extent;
++
++		read_extent = (inval && cow_read) ? cow_read : be;
++		map_block(isect, read_extent, bh);
++		lock_buffer(bh);
++		bh->b_end_io = end_buffer_read_nobh;
++		submit_bh(READ, bh);
++		dprintk("%s: Waiting for buffer read\n", __func__);
++		/* XXX Don't really want to hold layout lock here */
++		wait_on_buffer(bh);
++		if (!buffer_uptodate(bh))
++			goto cleanup;
++	}
++	if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		/* There is a BUG here if is a short copy after write_begin,
++		 * but I think this is a generic fs bug.  The problem is that
++		 * we have marked the page as initialized, but it is possible
++		 * that the section not copied may never get copied.
++		 */
++		ret = mark_initialized_sectors(be->be_inval, isect,
++					       PAGE_CACHE_SECTORS,
++					       pages_to_mark);
++		/* Want to preallocate mem so above can't fail */
++		if (ret)
++			goto cleanup;
++	}
++	SetPageMappedToDisk(page);
++	ret = 0;
++
++cleanup:
++	free_buffer_head(bh);
++	put_extent(be);
++	put_extent(cow_read);
++	if (ret) {
++		/* Need to mark layout with bad read...should now
++		 * just use nfs4 for reads and writes.
++		 */
++		mark_bad_read();
++	}
++	return ret;
++}
++
++static int
++bl_write_begin(struct pnfs_layout_segment *lseg, struct page *page, loff_t pos,
++	       unsigned count, struct pnfs_fsdata *fsdata)
++{
++	unsigned from, to;
++	int ret;
++	sector_t *pages_to_mark = NULL;
++	struct pnfs_block_layout *bl = BLK_LSEG2EXT(lseg);
++
++	dprintk("%s enter, %u@%lld\n", __func__, count, pos);
++	print_page(page);
++	/* The following code assumes blocksize >= PAGE_CACHE_SIZE */
++	if (bl->bl_blocksize < (PAGE_CACHE_SIZE >> 9)) {
++		dprintk("%s Can't handle blocksize %llu\n", __func__,
++			(u64)bl->bl_blocksize);
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		return 0;
++	}
++	if (PageMappedToDisk(page)) {
++		/* Basically, this is a flag that says we have
++		 * successfully called write_begin already on this page.
++		 */
++		/* NOTE - there are cache consistency issues here.
++		 * For example, what if the layout is recalled, then regained?
++		 * If the file is closed and reopened, will the page flags
++		 * be reset?  If not, we'll have to use layout info instead of
++		 * the page flag.
++		 */
++		return 0;
++	}
++	from = pos & (PAGE_CACHE_SIZE - 1);
++	to = from + count;
++	ret = init_page_for_write(bl, page, from, to, &pages_to_mark);
++	if (ret) {
++		dprintk("%s init page failed with %i", __func__, ret);
++		/* Revert back to plain NFS and just continue on with
++		 * write.  This assumes there is no request attached, which
++		 * should be true if we get here.
++		 */
++		BUG_ON(PagePrivate(page));
++		put_lseg(fsdata->lseg);
++		fsdata->lseg = NULL;
++		kfree(pages_to_mark);
++		ret = 0;
++	} else {
++		fsdata->private = pages_to_mark;
++	}
++	return ret;
++}
++
++/* CAREFUL - what happens if copied < count??? */
++static int
++bl_write_end(struct inode *inode, struct page *page, loff_t pos,
++	     unsigned count, unsigned copied, struct pnfs_layout_segment *lseg)
++{
++	dprintk("%s enter, %u@%lld, lseg=%p\n", __func__, count, pos, lseg);
++	print_page(page);
++	if (lseg)
++		SetPageUptodate(page);
++	return 0;
++}
++
++/* Return any memory allocated to fsdata->private, and take advantage
++ * of no page locks to mark pages noted in write_begin as needing
++ * initialization.
++ */
++static void
++bl_write_end_cleanup(struct file *filp, struct pnfs_fsdata *fsdata)
++{
++	struct page *page;
++	pgoff_t index;
++	sector_t *pos;
++	struct address_space *mapping = filp->f_mapping;
++	struct pnfs_fsdata *fake_data;
++	struct pnfs_layout_segment *lseg;
++
++	if (!fsdata)
++		return;
++	lseg = fsdata->lseg;
++	if (!lseg)
++		return;
++	pos = fsdata->private;
++	if (!pos)
++		return;
++	dprintk("%s enter with pos=%llu\n", __func__, (u64)(*pos));
++	for (; *pos != ~0; pos++) {
++		index = *pos >> (PAGE_CACHE_SHIFT - 9);
++		/* XXX How do we properly deal with failures here??? */
++		page = grab_cache_page_write_begin(mapping, index, 0);
++		if (!page) {
++			printk(KERN_ERR "%s BUG BUG BUG NoMem\n", __func__);
++			continue;
++		}
++		dprintk("%s: Examining block page\n", __func__);
++		print_page(page);
++		if (!PageMappedToDisk(page)) {
++			/* XXX How do we properly deal with failures here??? */
++			dprintk("%s Marking block page\n", __func__);
++			init_page_for_write(BLK_LSEG2EXT(fsdata->lseg), page,
++					    PAGE_CACHE_SIZE, PAGE_CACHE_SIZE,
++					    NULL);
++			print_page(page);
++			fake_data = kzalloc(sizeof(*fake_data), GFP_KERNEL);
++			if (!fake_data) {
++				printk(KERN_ERR "%s BUG BUG BUG NoMem\n",
++				       __func__);
++				unlock_page(page);
++				continue;
++			}
++			get_lseg(lseg);
++			fake_data->lseg = lseg;
++			fake_data->bypass_eof = 1;
++			mapping->a_ops->write_end(filp, mapping,
++						  index << PAGE_CACHE_SHIFT,
++						  PAGE_CACHE_SIZE,
++						  PAGE_CACHE_SIZE,
++						  page, fake_data);
++			/* Note fake_data is freed by nfs_write_end */
++		} else
++			unlock_page(page);
++	}
++	kfree(fsdata->private);
++	fsdata->private = NULL;
++}
++
++static ssize_t
++bl_get_stripesize(struct pnfs_layout_hdr *lo)
++{
++	dprintk("%s enter\n", __func__);
++	return 0;
++}
++
++/* This is called by nfs_can_coalesce_requests via nfs_pageio_do_add_request.
++ * Should return False if there is a reason requests can not be coalesced,
++ * otherwise, should default to returning True.
++ */
++static int
++bl_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++	   struct nfs_page *req)
++{
++	dprintk("%s enter\n", __func__);
++	if (pgio->pg_iswrite)
++		return prev->wb_lseg == req->wb_lseg;
++	else
++		return 1;
++}
++
++static struct layoutdriver_io_operations blocklayout_io_operations = {
++	.commit				= bl_commit,
++	.read_pagelist			= bl_read_pagelist,
++	.write_pagelist			= bl_write_pagelist,
++	.write_begin			= bl_write_begin,
++	.write_end			= bl_write_end,
++	.write_end_cleanup		= bl_write_end_cleanup,
++	.alloc_layout			= bl_alloc_layout,
++	.free_layout			= bl_free_layout,
++	.alloc_lseg			= bl_alloc_lseg,
++	.free_lseg			= bl_free_lseg,
++	.setup_layoutcommit		= bl_setup_layoutcommit,
++	.encode_layoutcommit		= bl_encode_layoutcommit,
++	.cleanup_layoutcommit		= bl_cleanup_layoutcommit,
++	.initialize_mountpoint		= bl_initialize_mountpoint,
++	.uninitialize_mountpoint	= bl_uninitialize_mountpoint,
++};
++
++static struct layoutdriver_policy_operations blocklayout_policy_operations = {
++	.get_stripesize			= bl_get_stripesize,
++	.pg_test			= bl_pg_test,
++};
++
++static struct pnfs_layoutdriver_type blocklayout_type = {
++	.id = LAYOUT_BLOCK_VOLUME,
++	.name = "LAYOUT_BLOCK_VOLUME",
++	.ld_io_ops = &blocklayout_io_operations,
++	.ld_policy_ops = &blocklayout_policy_operations,
++};
++
++static int __init nfs4blocklayout_init(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
++
++	pnfs_block_callback_ops = pnfs_register_layoutdriver(&blocklayout_type);
++	bl_pipe_init();
++	return 0;
++}
++
++static void __exit nfs4blocklayout_exit(void)
++{
++	dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
++	       __func__);
++
++	pnfs_unregister_layoutdriver(&blocklayout_type);
++	bl_pipe_exit();
++}
++
++module_init(nfs4blocklayout_init);
++module_exit(nfs4blocklayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c.orig	2010-09-30 10:17:08.542991000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdev.c	2010-09-30 10:17:08.544989000 -0400
+@@ -0,0 +1,335 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#include <linux/module.h>
++#include <linux/buffer_head.h> /* __bread */
++
++#include <linux/genhd.h>
++#include <linux/blkdev.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes)
++{
++	uint32_t *q = p + XDR_QUADLEN(nbytes);
++	if (unlikely(q > end || q < p))
++		return NULL;
++	return p;
++}
++EXPORT_SYMBOL(blk_overflow);
++
++/* Open a block_device by device number. */
++struct block_device *nfs4_blkdev_get(dev_t dev)
++{
++	struct block_device *bd;
++
++	dprintk("%s enter\n", __func__);
++	bd = open_by_devnum(dev, FMODE_READ);
++	if (IS_ERR(bd))
++		goto fail;
++	return bd;
++fail:
++	dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++	return NULL;
++}
++
++/*
++ * Release the block device
++ */
++int nfs4_blkdev_put(struct block_device *bdev)
++{
++	dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev),
++			MINOR(bdev->bd_dev));
++	bd_release(bdev);
++	return blkdev_put(bdev, FMODE_READ);
++}
++
++/* Decodes pnfs_block_deviceaddr4 (draft-8) which is XDR encoded
++ * in dev->dev_addr_buf.
++ */
++struct pnfs_block_dev *
++nfs4_blk_decode_device(struct nfs_server *server,
++		       struct pnfs_device *dev,
++		       struct list_head *sdlist)
++{
++	struct pnfs_block_dev *rv = NULL;
++	struct block_device *bd = NULL;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint32_t major, minor;
++
++	dprintk("%s enter\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return NULL;
++	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
++	dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
++		dev->mincount);
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_MOUNT, 0, dev->area,
++				    dev->mincount);
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out_err;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out_err;
++	}
++	if (reply->status != BL_DEVICE_REQUEST_PROC) {
++		dprintk("%s failed to open device: %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++	memcpy(&major, (uint32_t *)(payload_of(reply)), sizeof(uint32_t));
++	memcpy(&minor, (uint32_t *)(payload_of(reply) + sizeof(uint32_t)),
++		sizeof(uint32_t));
++	bd = nfs4_blkdev_get(MKDEV(major, minor));
++	if (IS_ERR(bd)) {
++		dprintk("%s failed to open device : %ld\n",
++			__func__, PTR_ERR(bd));
++		goto out_err;
++	}
++
++	rv = kzalloc(sizeof(*rv), GFP_KERNEL);
++	if (!rv)
++		goto out_err;
++
++	rv->bm_mdev = bd;
++	memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct pnfs_deviceid));
++	dprintk("%s Created device %s with bd_block_size %u\n",
++		__func__,
++		bd->bd_disk->disk_name,
++		bd->bd_block_size);
++	kfree(reply);
++	kfree(msg);
++	return rv;
++
++out_err:
++	kfree(rv);
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return NULL;
++}
++
++/* Map deviceid returned by the server to constructed block_device */
++static struct block_device *translate_devid(struct pnfs_layout_hdr *lo,
++					    struct pnfs_deviceid *id)
++{
++	struct block_device *rv = NULL;
++	struct block_mount_id *mid;
++	struct pnfs_block_dev *dev;
++
++	dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
++	mid = BLK_ID(lo);
++	spin_lock(&mid->bm_lock);
++	list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
++		if (memcmp(id->data, dev->bm_mdevid.data,
++			   NFS4_PNFS_DEVICEID4_SIZE) == 0) {
++			rv = dev->bm_mdev;
++			goto out;
++		}
++	}
++ out:
++	spin_unlock(&mid->bm_lock);
++	dprintk("%s returning %p\n", __func__, rv);
++	return rv;
++}
++
++/* Tracks info needed to ensure extents in layout obey constraints of spec */
++struct layout_verification {
++	u32 mode;	/* R or RW */
++	u64 start;	/* Expected start of next non-COW extent */
++	u64 inval;	/* Start of INVAL coverage */
++	u64 cowread;	/* End of COW read coverage */
++};
++
++/* Verify the extent meets the layout requirements of the pnfs-block draft,
++ * section 2.3.1.
++ */
++static int verify_extent(struct pnfs_block_extent *be,
++			 struct layout_verification *lv)
++{
++	if (lv->mode == IOMODE_READ) {
++		if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
++		    be->be_state == PNFS_BLOCK_INVALID_DATA)
++			return -EIO;
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	}
++	/* lv->mode == IOMODE_RW */
++	if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		if (lv->cowread > lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		lv->inval = lv->start;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
++		if (be->be_f_offset != lv->start)
++			return -EIO;
++		lv->start += be->be_length;
++		return 0;
++	} else if (be->be_state == PNFS_BLOCK_READ_DATA) {
++		if (be->be_f_offset > lv->start)
++			return -EIO;
++		if (be->be_f_offset < lv->inval)
++			return -EIO;
++		if (be->be_f_offset < lv->cowread)
++			return -EIO;
++		/* It looks like you might want to min this with lv->start,
++		 * but you really don't.
++		 */
++		lv->inval = lv->inval + be->be_length;
++		lv->cowread = be->be_f_offset + be->be_length;
++		return 0;
++	} else
++		return -EIO;
++}
++
++/* XDR decode pnfs_block_layout4 structure */
++int
++nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
++			   struct nfs4_layoutget_res *lgr)
++{
++	struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t *end = (uint32_t *)((char *)lgr->layout.buf + lgr->layout.len);
++	int i, status = -EIO;
++	uint32_t count;
++	struct pnfs_block_extent *be = NULL, *save;
++	uint64_t tmp; /* Used by READSECTOR */
++	struct layout_verification lv = {
++		.mode = lgr->range.iomode,
++		.start = lgr->range.offset >> 9,
++		.inval = lgr->range.offset >> 9,
++		.cowread = lgr->range.offset >> 9,
++	};
++
++	LIST_HEAD(extents);
++
++	BLK_READBUF(p, end, 4);
++	READ32(count);
++
++	dprintk("%s enter, number of extents %i\n", __func__, count);
++	BLK_READBUF(p, end, (28 + NFS4_PNFS_DEVICEID4_SIZE) * count);
++
++	/* Decode individual extents, putting them in temporary
++	 * staging area until whole layout is decoded to make error
++	 * recovery easier.
++	 */
++	for (i = 0; i < count; i++) {
++		be = alloc_extent();
++		if (!be) {
++			status = -ENOMEM;
++			goto out_err;
++		}
++		READ_DEVID(&be->be_devid);
++		be->be_mdev = translate_devid(lo, &be->be_devid);
++		if (!be->be_mdev)
++			goto out_err;
++		/* The next three values are read in as bytes,
++		 * but stored as 512-byte sector lengths
++		 */
++		READ_SECTOR(be->be_f_offset);
++		READ_SECTOR(be->be_length);
++		READ_SECTOR(be->be_v_offset);
++		READ32(be->be_state);
++		if (be->be_state == PNFS_BLOCK_INVALID_DATA)
++			be->be_inval = &bl->bl_inval;
++		if (verify_extent(be, &lv)) {
++			dprintk("%s verify failed\n", __func__);
++			goto out_err;
++		}
++		list_add_tail(&be->be_node, &extents);
++	}
++	if (p != end) {
++		dprintk("%s Undecoded cruft at end of opaque\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lgr->range.offset + lgr->range.length != lv.start << 9) {
++		dprintk("%s Final length mismatch\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	if (lv.start < lv.cowread) {
++		dprintk("%s Final uncovered COW extent\n", __func__);
++		be = NULL;
++		goto out_err;
++	}
++	/* Extents decoded properly, now try to merge them in to
++	 * existing layout extents.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	list_for_each_entry_safe(be, save, &extents, be_node) {
++		list_del(&be->be_node);
++		status = add_and_merge_extent(bl, be);
++		if (status) {
++			spin_unlock(&bl->bl_ext_lock);
++			/* This is a fairly catastrophic error, as the
++			 * entire layout extent lists are now corrupted.
++			 * We should have some way to distinguish this.
++			 */
++			be = NULL;
++			goto out_err;
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	status = 0;
++ out:
++	dprintk("%s returns %i\n", __func__, status);
++	return status;
++
++ out_err:
++	put_extent(be);
++	while (!list_empty(&extents)) {
++		be = list_first_entry(&extents, struct pnfs_block_extent,
++				      be_node);
++		list_del(&be->be_node);
++		put_extent(be);
++	}
++	goto out;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c.orig	2010-09-30 10:17:08.546994000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayoutdm.c	2010-09-30 10:17:08.548993000 -0400
+@@ -0,0 +1,120 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayoutdm.c
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2007 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Fred Isaman <iisaman@umich.edu>
++ *  Andy Adamson <andros@citi.umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include <linux/genhd.h> /* gendisk - used in a dprintk*/
++#include <linux/sched.h>
++#include <linux/hash.h>
++
++#include "blocklayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Defines used for calculating memory usage in nfs4_blk_flatten() */
++#define ARGSIZE   24    /* Max bytes needed for linear target arg string */
++#define SPECSIZE (sizeof8(struct dm_target_spec) + ARGSIZE)
++#define SPECS_PER_PAGE (PAGE_SIZE / SPECSIZE)
++#define SPEC_HEADER_ADJUST (SPECS_PER_PAGE - \
++			    (PAGE_SIZE - sizeof8(struct dm_ioctl)) / SPECSIZE)
++#define roundup8(x) (((x)+7) & ~7)
++#define sizeof8(x) roundup8(sizeof(x))
++
++static int dev_remove(dev_t dev)
++{
++	int ret = 1;
++	struct pipefs_hdr *msg = NULL, *reply = NULL;
++	uint64_t bl_dev;
++	uint32_t major = MAJOR(dev), minor = MINOR(dev);
++
++	dprintk("Entering %s\n", __func__);
++
++	if (IS_ERR(bl_device_pipe))
++		return ret;
++
++	memcpy((void *)&bl_dev, &major, sizeof(uint32_t));
++	memcpy((void *)&bl_dev + sizeof(uint32_t), &minor, sizeof(uint32_t));
++	msg = pipefs_alloc_init_msg(0, BL_DEVICE_UMOUNT, 0, (void *)&bl_dev,
++				    sizeof(uint64_t));
++	if (IS_ERR(msg)) {
++		dprintk("ERROR: couldn't make pipefs message.\n");
++		goto out;
++	}
++	msg->msgid = hash_ptr(&msg, sizeof(msg->msgid) * 8);
++	msg->status = BL_DEVICE_REQUEST_INIT;
++
++	reply = pipefs_queue_upcall_waitreply(bl_device_pipe, msg,
++					      &bl_device_list, 0, 0);
++	if (IS_ERR(reply)) {
++		dprintk("ERROR: upcall_waitreply failed\n");
++		goto out;
++	}
++
++	if (reply->status == BL_DEVICE_REQUEST_PROC)
++		ret = 0; /*TODO: what to return*/
++out:
++	if (!IS_ERR(reply))
++		kfree(reply);
++	if (!IS_ERR(msg))
++		kfree(msg);
++	return ret;
++}
++
++/*
++ * Release meta device
++ */
++static int nfs4_blk_metadev_release(struct pnfs_block_dev *bdev)
++{
++	int rv;
++
++	dprintk("%s Releasing\n", __func__);
++	/* XXX Check return? */
++	rv = nfs4_blkdev_put(bdev->bm_mdev);
++	dprintk("%s nfs4_blkdev_put returns %d\n", __func__, rv);
++
++	rv = dev_remove(bdev->bm_mdev->bd_dev);
++	dprintk("%s Returns %d\n", __func__, rv);
++	return rv;
++}
++
++void free_block_dev(struct pnfs_block_dev *bdev)
++{
++	if (bdev) {
++		if (bdev->bm_mdev) {
++			dprintk("%s Removing DM device: %d:%d\n",
++				__func__,
++				MAJOR(bdev->bm_mdev->bd_dev),
++				MINOR(bdev->bm_mdev->bd_dev));
++			/* XXX Check status ?? */
++			nfs4_blk_metadev_release(bdev);
++		}
++		kfree(bdev);
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h.orig	2010-09-30 10:17:08.538988000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/blocklayout.h	2010-09-30 10:17:08.539994000 -0400
+@@ -0,0 +1,302 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++#ifndef FS_NFS_NFS4BLOCKLAYOUT_H
++#define FS_NFS_NFS4BLOCKLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/dm-ioctl.h> /* Needed for struct dm_ioctl*/
++
++#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> 9)
++
++#define PG_pnfserr PG_owner_priv_1
++#define PagePnfsErr(page)	test_bit(PG_pnfserr, &(page)->flags)
++#define SetPagePnfsErr(page)	set_bit(PG_pnfserr, &(page)->flags)
++#define ClearPagePnfsErr(page)	clear_bit(PG_pnfserr, &(page)->flags)
++
++extern int dm_dev_create(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_dev_remove(struct dm_ioctl *param); /* from dm-ioctl.c */
++extern int dm_do_resume(struct dm_ioctl *param);
++extern int dm_table_load(struct dm_ioctl *param, size_t param_size);
++
++struct block_mount_id {
++	spinlock_t			bm_lock;    /* protects list */
++	struct list_head		bm_devlist; /* holds pnfs_block_dev */
++};
++
++struct pnfs_block_dev {
++	struct list_head		bm_node;
++	struct pnfs_deviceid		bm_mdevid;    /* associated devid */
++	struct block_device		*bm_mdev;     /* meta device itself */
++};
++
++/* holds visible disks that can be matched against VOLUME_SIMPLE signatures */
++struct visible_block_device {
++	struct list_head	vi_node;
++	struct block_device	*vi_bdev;
++	int			vi_mapped;
++	int			vi_put_done;
++};
++
++enum blk_vol_type {
++	PNFS_BLOCK_VOLUME_SIMPLE   = 0,	/* maps to a single LU */
++	PNFS_BLOCK_VOLUME_SLICE    = 1,	/* slice of another volume */
++	PNFS_BLOCK_VOLUME_CONCAT   = 2,	/* concatenation of multiple volumes */
++	PNFS_BLOCK_VOLUME_STRIPE   = 3	/* striped across multiple volumes */
++};
++
++/* All disk offset/lengths are stored in 512-byte sectors */
++struct pnfs_blk_volume {
++	uint32_t		bv_type;
++	sector_t 		bv_size;
++	struct pnfs_blk_volume 	**bv_vols;
++	int 			bv_vol_n;
++	union {
++		dev_t			bv_dev;
++		sector_t		bv_stripe_unit;
++		sector_t 		bv_offset;
++	};
++};
++
++/* Since components need not be aligned, cannot use sector_t */
++struct pnfs_blk_sig_comp {
++	int64_t 	bs_offset;  /* In bytes */
++	uint32_t   	bs_length;  /* In bytes */
++	char 		*bs_string;
++};
++
++/* Maximum number of signatures components in a simple volume */
++# define PNFS_BLOCK_MAX_SIG_COMP 16
++
++struct pnfs_blk_sig {
++	int 				si_num_comps;
++	struct pnfs_blk_sig_comp	si_comps[PNFS_BLOCK_MAX_SIG_COMP];
++};
++
++enum exstate4 {
++	PNFS_BLOCK_READWRITE_DATA	= 0,
++	PNFS_BLOCK_READ_DATA		= 1,
++	PNFS_BLOCK_INVALID_DATA		= 2, /* mapped, but data is invalid */
++	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
++};
++
++#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
++
++struct my_tree_t {
++	sector_t		mtt_step_size;	/* Internal sector alignment */
++	struct list_head	mtt_stub; /* Should be a radix tree */
++};
++
++struct pnfs_inval_markings {
++	spinlock_t	im_lock;
++	struct my_tree_t im_tree;	/* Sectors that need LAYOUTCOMMIT */
++	sector_t	im_block_size;	/* Server blocksize in sectors */
++};
++
++struct pnfs_inval_tracking {
++	struct list_head it_link;
++	int		 it_sector;
++	int		 it_tags;
++};
++
++/* sector_t fields are all in 512-byte sectors */
++struct pnfs_block_extent {
++	struct kref	be_refcnt;
++	struct list_head be_node;	/* link into lseg list */
++	struct pnfs_deviceid be_devid;  /* STUB - remevable??? */
++	struct block_device *be_mdev;
++	sector_t	be_f_offset;	/* the starting offset in the file */
++	sector_t	be_length;	/* the size of the extent */
++	sector_t	be_v_offset;	/* the starting offset in the volume */
++	enum exstate4	be_state;	/* the state of this extent */
++	struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */
++};
++
++/* Shortened extent used by LAYOUTCOMMIT */
++struct pnfs_block_short_extent {
++	struct list_head bse_node;
++	struct pnfs_deviceid bse_devid;	/* STUB - removable??? */
++	struct block_device *bse_mdev;
++	sector_t	bse_f_offset;	/* the starting offset in the file */
++	sector_t	bse_length;	/* the size of the extent */
++};
++
++static inline void
++INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
++{
++	spin_lock_init(&marks->im_lock);
++	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
++	marks->im_block_size = blocksize;
++	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
++					   blocksize);
++}
++
++enum extentclass4 {
++	RW_EXTENT	= 0, /* READWRTE and INVAL */
++	RO_EXTENT	= 1, /* READ and NONE */
++	EXTENT_LISTS	= 2,
++};
++
++static inline int choose_list(enum exstate4 state)
++{
++	if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA)
++		return RO_EXTENT;
++	else
++		return RW_EXTENT;
++}
++
++struct pnfs_block_layout {
++	struct pnfs_layout_hdr bl_layout;
++	struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */
++	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
++	struct list_head	bl_extents[EXTENT_LISTS]; /* R and RW extents */
++	struct list_head	bl_commit;	/* Needs layout commit */
++	unsigned int		bl_count;	/* entries in bl_commit */
++	sector_t		bl_blocksize;  /* Server blocksize in sectors */
++};
++
++/* this struct is comunicated between:
++ * bl_setup_layoutcommit && bl_encode_layoutcommit && bl_cleanup_layoutcommit
++ */
++struct bl_layoutupdate_data {
++	struct list_head ranges;
++};
++
++#define BLK_ID(lo) ((struct block_mount_id *)(PNFS_NFS_SERVER(lo)->pnfs_ld_data))
++
++static inline struct pnfs_block_layout *
++BLK_LO2EXT(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct pnfs_block_layout, bl_layout);
++}
++
++static inline struct pnfs_block_layout *
++BLK_LSEG2EXT(struct pnfs_layout_segment *lseg)
++{
++	return BLK_LO2EXT(lseg->layout);
++}
++
++uint32_t *blk_overflow(uint32_t *p, uint32_t *end, size_t nbytes);
++
++#define BLK_READBUF(p, e, nbytes)  do { \
++	p = blk_overflow(p, e, nbytes); \
++	if (!p) { \
++		printk(KERN_WARNING \
++			"%s: reply buffer overflowed in line %d.\n", \
++			__func__, __LINE__); \
++		goto out_err; \
++	} \
++} while (0)
++
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {                  \
++	(x) = (uint64_t)ntohl(*p++) << 32;           \
++	(x) |= ntohl(*p++);                     \
++} while (0)
++#define COPYMEM(x, nbytes) do {                 \
++	memcpy((x), p, nbytes);                 \
++	p += XDR_QUADLEN(nbytes);               \
++} while (0)
++#define READ_DEVID(x)	COPYMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++#define READ_SECTOR(x)     do { \
++	READ64(tmp); \
++	if (tmp & 0x1ff) { \
++		printk(KERN_WARNING \
++		       "%s Value not 512-byte aligned at line %d\n", \
++		       __func__, __LINE__);			     \
++		goto out_err; \
++	} \
++	(x) = tmp >> 9; \
++} while (0)
++
++#define WRITE32(n)               do { \
++	*p++ = htonl(n); \
++	} while (0)
++#define WRITE64(n)               do {                           \
++	*p++ = htonl((uint32_t)((n) >> 32));			\
++	*p++ = htonl((uint32_t)(n));				\
++} while (0)
++#define WRITEMEM(ptr, nbytes)     do {                          \
++	p = xdr_encode_opaque_fixed(p, ptr, nbytes);	\
++} while (0)
++#define WRITE_DEVID(x)  WRITEMEM((x)->data, NFS4_PNFS_DEVICEID4_SIZE)
++
++/* blocklayoutdev.c */
++struct block_device *nfs4_blkdev_get(dev_t dev);
++int nfs4_blkdev_put(struct block_device *bdev);
++struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
++					      struct pnfs_device *dev,
++					      struct list_head *sdlist);
++int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
++			       struct nfs4_layoutget_res *lgr);
++int nfs4_blk_create_block_disk_list(struct list_head *);
++void nfs4_blk_destroy_disk_list(struct list_head *);
++/* blocklayoutdm.c */
++int nfs4_blk_flatten(struct pnfs_blk_volume *, int, struct pnfs_block_dev *);
++void free_block_dev(struct pnfs_block_dev *bdev);
++/* extents.c */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++		struct pnfs_block_extent **cow_read);
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages);
++void put_extent(struct pnfs_block_extent *be);
++struct pnfs_block_extent *alloc_extent(void);
++struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
++int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   struct xdr_stream *xdr,
++				   const struct nfs4_layoutcommit_args *arg);
++void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++				   const struct nfs4_layoutcommit_args *arg,
++				   int status);
++int add_and_merge_extent(struct pnfs_block_layout *bl,
++			 struct pnfs_block_extent *new);
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length);
++
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++extern struct pipefs_list bl_device_list;
++extern struct dentry *bl_device_pipe;
++
++int bl_pipe_init(void);
++void bl_pipe_exit(void);
++
++#define BL_DEVICE_UMOUNT               0x0 /* Umount--delete devices */
++#define BL_DEVICE_MOUNT                0x1 /* Mount--create devices*/
++#define BL_DEVICE_REQUEST_INIT         0x0 /* Start request */
++#define BL_DEVICE_REQUEST_PROC         0x1 /* User level process succeeds */
++#define BL_DEVICE_REQUEST_ERR          0x2 /* User level process fails */
++
++#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c.orig	2010-09-30 10:17:08.565989000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/extents.c	2010-09-30 10:17:08.567989000 -0400
+@@ -0,0 +1,948 @@
++/*
++ *  linux/fs/nfs/blocklayout/blocklayout.h
++ *
++ *  Module for the NFSv4.1 pNFS block layout driver.
++ *
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@citi.umich.edu>
++ *  Fred Isaman <iisaman@umich.edu>
++ *
++ * permission is granted to use, copy, create derivative works and
++ * redistribute this software and such derivative works for any purpose,
++ * so long as the name of the university of michigan is not used in
++ * any advertising or publicity pertaining to the use or distribution
++ * of this software without specific, written prior authorization.  if
++ * the above copyright notice or any other identification of the
++ * university of michigan is included in any copy of any portion of
++ * this software, then the disclaimer below must also be included.
++ *
++ * this software is provided as is, without representation from the
++ * university of michigan as to its fitness for any purpose, and without
++ * warranty by the university of michigan of any kind, either express
++ * or implied, including without limitation the implied warranties of
++ * merchantability and fitness for a particular purpose.  the regents
++ * of the university of michigan shall not be liable for any damages,
++ * including special, indirect, incidental, or consequential damages,
++ * with respect to any claim arising out or in connection with the use
++ * of the software, even if it has been or is hereafter advised of the
++ * possibility of such damages.
++ */
++
++#include "blocklayout.h"
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/* Bit numbers */
++#define EXTENT_INITIALIZED 0
++#define EXTENT_WRITTEN     1
++#define EXTENT_IN_COMMIT   2
++#define INTERNAL_EXISTS    MY_MAX_TAGS
++#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
++
++/* Returns largest t<=s s.t. t%base==0 */
++static inline sector_t normalize(sector_t s, int base)
++{
++	sector_t tmp = s; /* Since do_div modifies its argument */
++	return s - do_div(tmp, base);
++}
++
++static inline sector_t normalize_up(sector_t s, int base)
++{
++	return normalize(s + base - 1, base);
++}
++
++/* Complete stub using list while determine API wanted */
++
++/* Returns tags, or negative */
++static int32_t _find_entry(struct my_tree_t *tree, u64 s)
++{
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu) enter\n", __func__, s);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s)
++			return pos->it_tags & INTERNAL_MASK;
++		else
++			break;
++	}
++	return -ENOENT;
++}
++
++static inline
++int _has_tag(struct my_tree_t *tree, u64 s, int32_t tag)
++{
++	int32_t tags;
++
++	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
++	s = normalize(s, tree->mtt_step_size);
++	tags = _find_entry(tree, s);
++	if ((tags < 0) || !(tags & (1 << tag)))
++		return 0;
++	else
++		return 1;
++}
++
++/* Creates entry with tag, or if entry already exists, unions tag to it.
++ * If storage is not NULL, newly created entry will use it.
++ * Returns number of entries added, or negative on error.
++ */
++static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
++		      struct pnfs_inval_tracking *storage)
++{
++	int found = 0;
++	struct pnfs_inval_tracking *pos;
++
++	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector > s)
++			continue;
++		else if (pos->it_sector == s) {
++			found = 1;
++			break;
++		} else
++			break;
++	}
++	if (found) {
++		pos->it_tags |= (1 << tag);
++		return 0;
++	} else {
++		struct pnfs_inval_tracking *new;
++		if (storage)
++			new = storage;
++		else {
++			new = kmalloc(sizeof(*new), GFP_KERNEL);
++			if (!new)
++				return -ENOMEM;
++		}
++		new->it_sector = s;
++		new->it_tags = (1 << tag);
++		list_add(&new->it_link, &pos->it_link);
++		return 1;
++	}
++}
++
++/* XXXX Really want option to not create */
++/* Over range, unions tag with existing entries, else creates entry with tag */
++static int _set_range(struct my_tree_t *tree, int32_t tag, u64 s, u64 length)
++{
++	u64 i;
++
++	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
++	for (i = normalize(s, tree->mtt_step_size); i < s + length;
++	     i += tree->mtt_step_size)
++		if (_add_entry(tree, i, tag, NULL))
++			return -ENOMEM;
++	return 0;
++}
++
++/* Ensure that future operations on given range of tree will not malloc */
++static int _preload_range(struct my_tree_t *tree, u64 offset, u64 length)
++{
++	u64 start, end, s;
++	int count, i, used = 0, status = -ENOMEM;
++	struct pnfs_inval_tracking **storage;
++
++	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
++	start = normalize(offset, tree->mtt_step_size);
++	end = normalize_up(offset + length, tree->mtt_step_size);
++	count = (int)(end - start) / (int)tree->mtt_step_size;
++
++	/* Pre-malloc what memory we might need */
++	storage = kmalloc(sizeof(*storage) * count, GFP_KERNEL);
++	if (!storage)
++		return -ENOMEM;
++	for (i = 0; i < count; i++) {
++		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
++				     GFP_KERNEL);
++		if (!storage[i])
++			goto out_cleanup;
++	}
++
++	/* Now need lock - HOW??? */
++
++	for (s = start; s < end; s += tree->mtt_step_size)
++		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
++
++	/* Unlock - HOW??? */
++	status = 0;
++
++ out_cleanup:
++	for (i = used; i < count; i++) {
++		if (!storage[i])
++			break;
++		kfree(storage[i]);
++	}
++	kfree(storage);
++	return status;
++}
++
++static void set_needs_init(sector_t *array, sector_t offset)
++{
++	sector_t *p = array;
++
++	dprintk("%s enter\n", __func__);
++	if (!p)
++		return;
++	while (*p < offset)
++		p++;
++	if (*p == offset)
++		return;
++	else if (*p == ~0) {
++		*p++ = offset;
++		*p = ~0;
++		return;
++	} else {
++		sector_t *save = p;
++		dprintk("%s Adding %llu\n", __func__, (u64)offset);
++		while (*p != ~0)
++			p++;
++		p++;
++		memmove(save + 1, save, (char *)p - (char *)save);
++		*save = offset;
++		return;
++	}
++}
++
++/* We are relying on page lock to serialize this */
++int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Assume start, end already sector aligned */
++static int
++_range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
++{
++	struct pnfs_inval_tracking *pos;
++	u64 expect = 0;
++
++	dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
++	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
++		if (pos->it_sector >= end)
++			continue;
++		if (!expect) {
++			if ((pos->it_sector == end - tree->mtt_step_size) &&
++			    (pos->it_tags & (1 << tag))) {
++				expect = pos->it_sector - tree->mtt_step_size;
++				if (expect < start)
++					return 1;
++				continue;
++			} else {
++				return 0;
++			}
++		}
++		if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
++			return 0;
++		expect -= tree->mtt_step_size;
++		if (expect < start)
++			return 1;
++	}
++	return 0;
++}
++
++static int is_range_written(struct pnfs_inval_markings *marks,
++			    sector_t start, sector_t end)
++{
++	int rv;
++
++	spin_lock(&marks->im_lock);
++	rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
++	spin_unlock(&marks->im_lock);
++	return rv;
++}
++
++/* Marks sectors in [offest, offset_length) as having been initialized.
++ * All lengths are step-aligned, where step is min(pagesize, blocksize).
++ * Notes where partial block is initialized, and helps prepare it for
++ * complete initialization later.
++ */
++/* Currently assumes offset is page-aligned */
++int mark_initialized_sectors(struct pnfs_inval_markings *marks,
++			     sector_t offset, sector_t length,
++			     sector_t **pages)
++{
++	sector_t s, start, end;
++	sector_t *array = NULL; /* Pages to mark */
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n",
++		__func__, (u64)offset, (u64)length);
++	s = max((sector_t) 3,
++		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
++	dprintk("%s set max=%llu\n", __func__, (u64)s);
++	if (pages) {
++		array = kmalloc(s * sizeof(sector_t), GFP_KERNEL);
++		if (!array)
++			goto outerr;
++		array[0] = ~0;
++	}
++
++	start = normalize(offset, marks->im_block_size);
++	end = normalize_up(offset + length, marks->im_block_size);
++	if (_preload_range(&marks->im_tree, start, end - start))
++		goto outerr;
++
++	spin_lock(&marks->im_lock);
++
++	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
++	     s < offset; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s pre-area pages\n", __func__);
++		/* Portion of used block is not initialized */
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
++		goto out_unlock;
++	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
++	     s < end; s += PAGE_CACHE_SECTORS) {
++		dprintk("%s post-area pages\n", __func__);
++		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
++			set_needs_init(array, s);
++	}
++
++	spin_unlock(&marks->im_lock);
++
++	if (pages) {
++		if (array[0] == ~0) {
++			kfree(array);
++			*pages = NULL;
++		} else
++			*pages = array;
++	}
++	return 0;
++
++ out_unlock:
++	spin_unlock(&marks->im_lock);
++ outerr:
++	if (pages) {
++		kfree(array);
++		*pages = NULL;
++	}
++	return -ENOMEM;
++}
++
++/* Marks sectors in [offest, offset+length) as having been written to disk.
++ * All lengths should be block aligned.
++ */
++int mark_written_sectors(struct pnfs_inval_markings *marks,
++			 sector_t offset, sector_t length)
++{
++	int status;
++
++	dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
++		(u64)offset, (u64)length);
++	spin_lock(&marks->im_lock);
++	status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
++	spin_unlock(&marks->im_lock);
++	return status;
++}
++
++static void print_short_extent(struct pnfs_block_short_extent *be)
++{
++	dprintk("PRINT SHORT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->bse_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->bse_length);
++	}
++}
++
++void print_clist(struct list_head *list, unsigned int count)
++{
++	struct pnfs_block_short_extent *be;
++	unsigned int i = 0;
++
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, bse_node) {
++		i++;
++		print_short_extent(be);
++	}
++	if (i != count)
++		dprintk("\n\nExpected %u entries\n\n\n", count);
++	dprintk("****************\n");
++}
++
++/* Note: In theory, we should do more checking that devid's match between
++ * old and new, but if they don't, the lists are too corrupt to salvage anyway.
++ */
++/* Note this is very similar to add_and_merge_extent */
++static void add_to_commitlist(struct pnfs_block_layout *bl,
++			      struct pnfs_block_short_extent *new)
++{
++	struct list_head *clist = &bl->bl_commit;
++	struct pnfs_block_short_extent *old, *save;
++	sector_t end = new->bse_f_offset + new->bse_length;
++
++	dprintk("%s enter\n", __func__);
++	print_short_extent(new);
++	print_clist(clist, bl->bl_count);
++	bl->bl_count++;
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe(old, save, clist, bse_node) {
++		if (new->bse_f_offset < old->bse_f_offset)
++			break;
++		if (end <= old->bse_f_offset + old->bse_length) {
++			/* Range is already in list */
++			bl->bl_count--;
++			kfree(new);
++			return;
++		} else if (new->bse_f_offset <=
++				old->bse_f_offset + old->bse_length) {
++			/* new overlaps or abuts existing be */
++			if (new->bse_mdev == old->bse_mdev) {
++				/* extend new to fully replace old */
++				new->bse_length += new->bse_f_offset -
++						old->bse_f_offset;
++				new->bse_f_offset = old->bse_f_offset;
++				list_del(&old->bse_node);
++				bl->bl_count--;
++				kfree(old);
++			}
++		}
++	}
++	/* Note that if we never hit the above break, old will not point to a
++	 * valid extent.  However, in that case &old->bse_node==list.
++	 */
++	list_add_tail(&new->bse_node, &old->bse_node);
++	/* Scan forward for overlaps.  If we find any, extend new and
++	 * remove the overlapped extent.
++	 */
++	old = list_prepare_entry(new, clist, bse_node);
++	list_for_each_entry_safe_continue(old, save, clist, bse_node) {
++		if (end < old->bse_f_offset)
++			break;
++		/* new overlaps or abuts old */
++		if (new->bse_mdev == old->bse_mdev) {
++			if (end < old->bse_f_offset + old->bse_length) {
++				/* extend new to fully cover old */
++				end = old->bse_f_offset + old->bse_length;
++				new->bse_length = end - new->bse_f_offset;
++			}
++			list_del(&old->bse_node);
++			bl->bl_count--;
++			kfree(old);
++		}
++	}
++	dprintk("%s: after merging\n", __func__);
++	print_clist(clist, bl->bl_count);
++}
++
++/* Note the range described by offset, length is guaranteed to be contained
++ * within be.
++ */
++int mark_for_commit(struct pnfs_block_extent *be,
++		    sector_t offset, sector_t length)
++{
++	sector_t new_end, end = offset + length;
++	struct pnfs_block_short_extent *new;
++	struct pnfs_block_layout *bl = container_of(be->be_inval,
++						    struct pnfs_block_layout,
++						    bl_inval);
++
++	new = kmalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return -ENOMEM;
++
++	mark_written_sectors(be->be_inval, offset, length);
++	/* We want to add the range to commit list, but it must be
++	 * block-normalized, and verified that the normalized range has
++	 * been entirely written to disk.
++	 */
++	new->bse_f_offset = offset;
++	offset = normalize(offset, bl->bl_blocksize);
++	if (offset < new->bse_f_offset) {
++		if (is_range_written(be->be_inval, offset, new->bse_f_offset))
++			new->bse_f_offset = offset;
++		else
++			new->bse_f_offset = offset + bl->bl_blocksize;
++	}
++	new_end = normalize_up(end, bl->bl_blocksize);
++	if (end < new_end) {
++		if (is_range_written(be->be_inval, end, new_end))
++			end = new_end;
++		else
++			end = new_end - bl->bl_blocksize;
++	}
++	if (end <= new->bse_f_offset) {
++		kfree(new);
++		return 0;
++	}
++	new->bse_length = end - new->bse_f_offset;
++	new->bse_devid = be->be_devid;
++	new->bse_mdev = be->be_mdev;
++
++	spin_lock(&bl->bl_ext_lock);
++	/* new will be freed, either by add_to_commitlist if it decides not
++	 * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
++	 */
++	add_to_commitlist(bl, new);
++	spin_unlock(&bl->bl_ext_lock);
++	return 0;
++}
++
++static void print_bl_extent(struct pnfs_block_extent *be)
++{
++	dprintk("PRINT EXTENT extent %p\n", be);
++	if (be) {
++		dprintk("        be_f_offset %llu\n", (u64)be->be_f_offset);
++		dprintk("        be_length   %llu\n", (u64)be->be_length);
++		dprintk("        be_v_offset %llu\n", (u64)be->be_v_offset);
++		dprintk("        be_state    %d\n", be->be_state);
++	}
++}
++
++static void
++destroy_extent(struct kref *kref)
++{
++	struct pnfs_block_extent *be;
++
++	be = container_of(kref, struct pnfs_block_extent, be_refcnt);
++	dprintk("%s be=%p\n", __func__, be);
++	kfree(be);
++}
++
++void
++put_extent(struct pnfs_block_extent *be)
++{
++	if (be) {
++		dprintk("%s enter %p (%i)\n", __func__, be,
++			atomic_read(&be->be_refcnt.refcount));
++		kref_put(&be->be_refcnt, destroy_extent);
++	}
++}
++
++struct pnfs_block_extent *alloc_extent(void)
++{
++	struct pnfs_block_extent *be;
++
++	be = kmalloc(sizeof(struct pnfs_block_extent), GFP_KERNEL);
++	if (!be)
++		return NULL;
++	INIT_LIST_HEAD(&be->be_node);
++	kref_init(&be->be_refcnt);
++	be->be_inval = NULL;
++	return be;
++}
++
++struct pnfs_block_extent *
++get_extent(struct pnfs_block_extent *be)
++{
++	if (be)
++		kref_get(&be->be_refcnt);
++	return be;
++}
++
++void print_elist(struct list_head *list)
++{
++	struct pnfs_block_extent *be;
++	dprintk("****************\n");
++	dprintk("Extent list looks like:\n");
++	list_for_each_entry(be, list, be_node) {
++		print_bl_extent(be);
++	}
++	dprintk("****************\n");
++}
++
++static inline int
++extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new)
++{
++	/* Note this assumes new->be_f_offset >= old->be_f_offset */
++	return (new->be_state == old->be_state) &&
++		((new->be_state == PNFS_BLOCK_NONE_DATA) ||
++		 ((new->be_v_offset - old->be_v_offset ==
++		   new->be_f_offset - old->be_f_offset) &&
++		  new->be_mdev == old->be_mdev));
++}
++
++/* Adds new to appropriate list in bl, modifying new and removing existing
++ * extents as appropriate to deal with overlaps.
++ *
++ * See find_get_extent for list constraints.
++ *
++ * Refcount on new is already set.  If end up not using it, or error out,
++ * need to put the reference.
++ *
++ * Lock is held by caller.
++ */
++int
++add_and_merge_extent(struct pnfs_block_layout *bl,
++		     struct pnfs_block_extent *new)
++{
++	struct pnfs_block_extent *be, *tmp;
++	sector_t end = new->be_f_offset + new->be_length;
++	struct list_head *list;
++
++	dprintk("%s enter with be=%p\n", __func__, new);
++	print_bl_extent(new);
++	list = &bl->bl_extents[choose_list(new->be_state)];
++	print_elist(list);
++
++	/* Scan for proper place to insert, extending new to the left
++	 * as much as possible.
++	 */
++	list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
++		if (new->be_f_offset >= be->be_f_offset + be->be_length)
++			break;
++		if (new->be_f_offset >= be->be_f_offset) {
++			if (end <= be->be_f_offset + be->be_length) {
++				/* new is a subset of existing be*/
++				if (extents_consistent(be, new)) {
++					dprintk("%s: new is subset, ignoring\n",
++						__func__);
++					put_extent(new);
++					return 0;
++				} else {
++					goto out_err;
++				}
++			} else {
++				/* |<--   be   -->|
++				 *          |<--   new   -->| */
++				if (extents_consistent(be, new)) {
++					/* extend new to fully replace be */
++					new->be_length += new->be_f_offset -
++						be->be_f_offset;
++					new->be_f_offset = be->be_f_offset;
++					new->be_v_offset = be->be_v_offset;
++					dprintk("%s: removing %p\n", __func__, be);
++					list_del(&be->be_node);
++					put_extent(be);
++				} else {
++					goto out_err;
++				}
++			}
++		} else if (end >= be->be_f_offset + be->be_length) {
++			/* new extent overlap existing be */
++			if (extents_consistent(be, new)) {
++				/* extend new to fully replace be */
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		} else if (end > be->be_f_offset) {
++			/*           |<--   be   -->|
++			 *|<--   new   -->| */
++			if (extents_consistent(new, be)) {
++				/* extend new to fully replace be */
++				new->be_length += be->be_f_offset + be->be_length -
++					new->be_f_offset - new->be_length;
++				dprintk("%s: removing %p\n", __func__, be);
++				list_del(&be->be_node);
++				put_extent(be);
++			} else {
++				goto out_err;
++			}
++		}
++	}
++	/* Note that if we never hit the above break, be will not point to a
++	 * valid extent.  However, in that case &be->be_node==list.
++	 */
++	list_add(&new->be_node, &be->be_node);
++	dprintk("%s: inserting new\n", __func__);
++	print_elist(list);
++	/* STUB - The per-list consistency checks have all been done,
++	 * should now check cross-list consistency.
++	 */
++	return 0;
++
++ out_err:
++	put_extent(new);
++	return -EIO;
++}
++
++/* Returns extent, or NULL.  If a second READ extent exists, it is returned
++ * in cow_read, if given.
++ *
++ * The extents are kept in two seperate ordered lists, one for READ and NONE,
++ * one for READWRITE and INVALID.  Within each list, we assume:
++ * 1. Extents are ordered by file offset.
++ * 2. For any given isect, there is at most one extents that matches.
++ */
++struct pnfs_block_extent *
++find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
++	    struct pnfs_block_extent **cow_read)
++{
++	struct pnfs_block_extent *be, *cow, *ret;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	cow = ret = NULL;
++	spin_lock(&bl->bl_ext_lock);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret &&
++		    (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				if (!ret)
++					ret = be;
++				else if (be->be_state != PNFS_BLOCK_READ_DATA)
++					put_extent(be);
++				else
++					cow = be;
++				break;
++			}
++		}
++	}
++	spin_unlock(&bl->bl_ext_lock);
++	if (cow_read)
++		*cow_read = cow;
++	print_bl_extent(ret);
++	return ret;
++}
++
++/* Similar to find_get_extent, but called with lock held, and ignores cow */
++static struct pnfs_block_extent *
++find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
++{
++	struct pnfs_block_extent *be, *ret = NULL;
++	int i;
++
++	dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
++	for (i = 0; i < EXTENT_LISTS; i++) {
++		if (ret)
++			break;
++		list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
++			if (isect >= be->be_f_offset + be->be_length)
++				break;
++			if (isect >= be->be_f_offset) {
++				/* We have found an extent */
++				dprintk("%s Get %p (%i)\n", __func__, be,
++					atomic_read(&be->be_refcnt.refcount));
++				kref_get(&be->be_refcnt);
++				ret = be;
++				break;
++			}
++		}
++	}
++	print_bl_extent(ret);
++	return ret;
++}
++
++int
++encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			       struct xdr_stream *xdr,
++			       const struct nfs4_layoutcommit_args *arg)
++{
++	sector_t start, end;
++	struct pnfs_block_short_extent *lce, *save;
++	unsigned int count = 0;
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct list_head *ranges = &bld->ranges;
++	__be32 *p, *xdr_start;
++
++	dprintk("%s enter\n", __func__);
++	start = arg->range.offset >> 9;
++	end = start + (arg->range.length >> 9);
++	dprintk("%s set start=%llu, end=%llu\n",
++		__func__, (u64)start, (u64)end);
++
++	/* BUG - creation of bl_commit is buggy - need to wait for
++	 * entire block to be marked WRITTEN before it can be added.
++	 */
++	spin_lock(&bl->bl_ext_lock);
++	/* Want to adjust for possible truncate */
++	/* We now want to adjust argument range */
++
++	/* XDR encode the ranges found */
++	xdr_start = xdr_reserve_space(xdr, 8);
++	if (!xdr_start)
++		goto out;
++	list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) {
++		p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data));
++		if (!p)
++			break;
++		WRITE_DEVID(&lce->bse_devid);
++		WRITE64(lce->bse_f_offset << 9);
++		WRITE64(lce->bse_length << 9);
++		WRITE64(0LL);
++		WRITE32(PNFS_BLOCK_READWRITE_DATA);
++		list_del(&lce->bse_node);
++		list_add_tail(&lce->bse_node, ranges);
++		bl->bl_count--;
++		count++;
++	}
++	xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4);
++	xdr_start[1] = cpu_to_be32(count);
++out:
++	spin_unlock(&bl->bl_ext_lock);
++	dprintk("%s found %i ranges\n", __func__, count);
++	return 0;
++}
++
++/* Helper function to set_to_rw that initialize a new extent */
++static void
++_prep_new_extent(struct pnfs_block_extent *new,
++		 struct pnfs_block_extent *orig,
++		 sector_t offset, sector_t length, int state)
++{
++	kref_init(&new->be_refcnt);
++	/* don't need to INIT_LIST_HEAD(&new->be_node) */
++	memcpy(&new->be_devid, &orig->be_devid, sizeof(struct pnfs_deviceid));
++	new->be_mdev = orig->be_mdev;
++	new->be_f_offset = offset;
++	new->be_length = length;
++	new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset;
++	new->be_state = state;
++	new->be_inval = orig->be_inval;
++}
++
++/* Tries to merge be with extent in front of it in list.
++ * Frees storage if not used.
++ */
++static struct pnfs_block_extent *
++_front_merge(struct pnfs_block_extent *be, struct list_head *head,
++	     struct pnfs_block_extent *storage)
++{
++	struct pnfs_block_extent *prev;
++
++	if (!storage)
++		goto no_merge;
++	if (&be->be_node == head || be->be_node.prev == head)
++		goto no_merge;
++	prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node);
++	if ((prev->be_f_offset + prev->be_length != be->be_f_offset) ||
++	    !extents_consistent(prev, be))
++		goto no_merge;
++	_prep_new_extent(storage, prev, prev->be_f_offset,
++			 prev->be_length + be->be_length, prev->be_state);
++	list_replace(&prev->be_node, &storage->be_node);
++	put_extent(prev);
++	list_del(&be->be_node);
++	put_extent(be);
++	return storage;
++
++ no_merge:
++	kfree(storage);
++	return be;
++}
++
++static u64
++set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length)
++{
++	u64 rv = offset + length;
++	struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old;
++	struct pnfs_block_extent *children[3];
++	struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL;
++	int i = 0, j;
++
++	dprintk("%s(%llu, %llu)\n", __func__, offset, length);
++	/* Create storage for up to three new extents e1, e2, e3 */
++	e1 = kmalloc(sizeof(*e1), GFP_KERNEL);
++	e2 = kmalloc(sizeof(*e2), GFP_KERNEL);
++	e3 = kmalloc(sizeof(*e3), GFP_KERNEL);
++	/* BUG - we are ignoring any failure */
++	if (!e1 || !e2 || !e3)
++		goto out_nosplit;
++
++	spin_lock(&bl->bl_ext_lock);
++	be = find_get_extent_locked(bl, offset);
++	rv = be->be_f_offset + be->be_length;
++	if (be->be_state != PNFS_BLOCK_INVALID_DATA) {
++		spin_unlock(&bl->bl_ext_lock);
++		goto out_nosplit;
++	}
++	/* Add e* to children, bumping e*'s krefs */
++	if (be->be_f_offset != offset) {
++		_prep_new_extent(e1, be, be->be_f_offset,
++				 offset - be->be_f_offset,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e1;
++		print_bl_extent(e1);
++	} else
++		merge1 = e1;
++	_prep_new_extent(e2, be, offset,
++			 min(length, be->be_f_offset + be->be_length - offset),
++			 PNFS_BLOCK_READWRITE_DATA);
++	children[i++] = e2;
++	print_bl_extent(e2);
++	if (offset + length < be->be_f_offset + be->be_length) {
++		_prep_new_extent(e3, be, e2->be_f_offset + e2->be_length,
++				 be->be_f_offset + be->be_length -
++				 offset - length,
++				 PNFS_BLOCK_INVALID_DATA);
++		children[i++] = e3;
++		print_bl_extent(e3);
++	} else
++		merge2 = e3;
++
++	/* Remove be from list, and insert the e* */
++	/* We don't get refs on e*, since this list is the base reference
++	 * set when init'ed.
++	 */
++	if (i < 3)
++		children[i] = NULL;
++	new = children[0];
++	list_replace(&be->be_node, &new->be_node);
++	put_extent(be);
++	new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1);
++	for (j = 1; j < i; j++) {
++		old = new;
++		new = children[j];
++		list_add(&new->be_node, &old->be_node);
++	}
++	if (merge2) {
++		/* This is a HACK, should just create a _back_merge function */
++		new = list_entry(new->be_node.next,
++				 struct pnfs_block_extent, be_node);
++		new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2);
++	}
++	spin_unlock(&bl->bl_ext_lock);
++
++	/* Since we removed the base reference above, be is now scheduled for
++	 * destruction.
++	 */
++	put_extent(be);
++	dprintk("%s returns %llu after split\n", __func__, rv);
++	return rv;
++
++ out_nosplit:
++	kfree(e1);
++	kfree(e2);
++	kfree(e3);
++	dprintk("%s returns %llu without splitting\n", __func__, rv);
++	return rv;
++}
++
++void
++clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
++			      const struct nfs4_layoutcommit_args *arg,
++			      int status)
++{
++	struct bl_layoutupdate_data *bld = arg->layoutdriver_data;
++	struct pnfs_block_short_extent *lce, *save;
++
++	dprintk("%s status %d\n", __func__, status);
++	list_for_each_entry_safe_reverse(lce, save, &bld->ranges, bse_node) {
++		if (likely(!status)) {
++			u64 offset = lce->bse_f_offset;
++			u64 end = offset + lce->bse_length;
++
++			do {
++				offset = set_to_rw(bl, offset, end - offset);
++			} while (offset < end);
++
++			kfree(lce);
++		} else {
++			spin_lock(&bl->bl_ext_lock);
++			add_to_commitlist(bl, lce);
++			spin_unlock(&bl->bl_ext_lock);
++		}
++	}
++}
+diff -up linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile
+--- linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile.orig	2010-09-30 10:17:08.524988000 -0400
++++ linux-2.6.34.noarch/fs/nfs/blocklayout/Makefile	2010-09-30 10:17:08.525996000 -0400
+@@ -0,0 +1,6 @@
++#
++# Makefile for the pNFS block layout driver kernel module
++#
++obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o
++blocklayoutdriver-objs := blocklayout.o blocklayoutdev.o blocklayoutdm.o \
++			extents.o block-device-discovery-pipe.o
+diff -up linux-2.6.34.noarch/fs/nfs/callback.h.orig linux-2.6.34.noarch/fs/nfs/callback.h
+--- linux-2.6.34.noarch/fs/nfs/callback.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback.h	2010-09-30 10:17:08.585990000 -0400
+@@ -111,6 +111,13 @@ extern int nfs41_validate_delegation_sta
+ 
+ #define RCA4_TYPE_MASK_RDATA_DLG	0
+ #define RCA4_TYPE_MASK_WDATA_DLG	1
++#define RCA4_TYPE_MASK_DIR_DLG         2
++#define RCA4_TYPE_MASK_FILE_LAYOUT     3
++#define RCA4_TYPE_MASK_BLK_LAYOUT      4
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN  8
++#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX  9
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
++#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
+ 
+ struct cb_recallanyargs {
+ 	struct sockaddr	*craa_addr;
+@@ -127,6 +134,39 @@ struct cb_recallslotargs {
+ extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+ 					  void *dummy);
+ 
++struct cb_layoutrecallargs {
++	struct sockaddr		*cbl_addr;
++	struct nfs_fh		cbl_fh;
++	struct pnfs_layout_range cbl_seg;
++	struct nfs_fsid		cbl_fsid;
++	uint32_t		cbl_recall_type;
++	uint32_t		cbl_layout_type;
++	uint32_t		cbl_layoutchanged;
++	nfs4_stateid		cbl_stateid;
++};
++
++extern unsigned nfs4_callback_layoutrecall(
++	struct cb_layoutrecallargs *args,
++	void *dummy);
++
++struct cb_devicenotifyitem {
++	uint32_t		cbd_notify_type;
++	uint32_t		cbd_layout_type;
++	struct pnfs_deviceid	cbd_dev_id;
++	uint32_t		cbd_immediate;
++};
++
++/* XXX: Should be dynamic up to max compound size */
++#define NFS4_DEV_NOTIFY_MAXENTRIES 10
++struct cb_devicenotifyargs {
++	struct sockaddr			*addr;
++	int				 ndevs;
++	struct cb_devicenotifyitem	 devs[NFS4_DEV_NOTIFY_MAXENTRIES];
++};
++
++extern unsigned nfs4_callback_devicenotify(
++	struct cb_devicenotifyargs *args,
++	void *dummy);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
+diff -up linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig linux-2.6.34.noarch/fs/nfs/callback_proc.c
+--- linux-2.6.34.noarch/fs/nfs/callback_proc.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_proc.c	2010-09-30 10:17:08.591990000 -0400
+@@ -8,10 +8,15 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/slab.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/writeback.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #ifdef NFS_DEBUG
+ #define NFSDBG_FACILITY NFSDBG_CALLBACK
+@@ -62,16 +67,6 @@ out:
+ 	return res->status;
+ }
+ 
+-static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *)
+-{
+-#if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion > 0)
+-		return nfs41_validate_delegation_stateid;
+-#endif
+-	return nfs4_validate_delegation_stateid;
+-}
+-
+-
+ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+@@ -92,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_re
+ 		inode = nfs_delegation_find_inode(clp, &args->fh);
+ 		if (inode != NULL) {
+ 			/* Set up a helper thread to actually return the delegation */
+-			switch (nfs_async_inode_return_delegation(inode, &args->stateid,
+-								  nfs_validate_delegation_stateid(clp))) {
++			switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
+ 				case 0:
+ 					res = 0;
+ 					break;
+@@ -116,24 +110,364 @@ out:
+ 
+ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+-	if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
+-					 sizeof(delegation->stateid.data)) != 0)
++	if (delegation == NULL || memcmp(delegation->stateid.u.data,
++					 stateid->u.data,
++					 sizeof(delegation->stateid.u.data)))
+ 		return 0;
+ 	return 1;
+ }
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static bool
++pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
++			    const nfs4_stateid stateid)
++{
++	int seqlock;
++	bool res;
++	u32 oldseqid, newseqid;
++
++	do {
++		seqlock = read_seqbegin(&lo->seqlock);
++		oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
++		newseqid = be32_to_cpu(stateid.u.stateid.seqid);
++		res = !memcmp(lo->stateid.u.stateid.other,
++			      stateid.u.stateid.other,
++			      NFS4_STATEID_OTHER_SIZE);
++		if (res) { /* comparing layout stateids */
++			if (oldseqid == ~0)
++				res = (newseqid == 1);
++			else
++				res = (newseqid == oldseqid + 1);
++		} else { /* open stateid */
++			res = !memcmp(lo->stateid.u.data,
++				      &zero_stateid,
++				      NFS4_STATEID_SIZE);
++			if (res)
++				res = (newseqid == 1);
++		}
++	} while (read_seqretry(&lo->seqlock, seqlock));
++
++	return res;
++}
++
++/*
++ * Retrieve an inode based on layout recall parameters
++ *
++ * Note: caller must iput(inode) to dereference the inode.
++ */
++static struct inode *
++nfs_layoutrecall_find_inode(struct nfs_client *clp,
++			    const struct cb_layoutrecallargs *args)
++{
++	struct nfs_inode *nfsi;
++	struct pnfs_layout_hdr *lo;
++	struct nfs_server *server;
++	struct inode *ino = NULL;
++
++	dprintk("%s: Begin recall_type=%d clp %p\n",
++		__func__, args->cbl_recall_type, clp);
++
++	spin_lock(&clp->cl_lock);
++	list_for_each_entry(lo, &clp->cl_layouts, layouts) {
++		nfsi = PNFS_NFS_INODE(lo);
++		if (!nfsi)
++			continue;
++
++		dprintk("%s: Searching inode=%lu\n",
++			__func__, nfsi->vfs_inode.i_ino);
++
++		if (args->cbl_recall_type == RETURN_FILE) {
++		    if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
++			continue;
++		} else if (args->cbl_recall_type == RETURN_FSID) {
++			server = NFS_SERVER(&nfsi->vfs_inode);
++			if (server->fsid.major != args->cbl_fsid.major ||
++			    server->fsid.minor != args->cbl_fsid.minor)
++				continue;
++		}
++
++		/* Make sure client didn't clean up layout without
++		 * telling the server */
++		if (!has_layout(nfsi))
++			continue;
++
++		ino = igrab(&nfsi->vfs_inode);
++		dprintk("%s: Found inode=%p\n", __func__, ino);
++		break;
++	}
++	spin_unlock(&clp->cl_lock);
++	return ino;
++}
++
++struct recall_layout_threadargs {
++	struct inode *inode;
++	struct nfs_client *clp;
++	struct completion started;
++	struct cb_layoutrecallargs *rl;
++	int result;
++};
++
++static int pnfs_recall_layout(void *data)
++{
++	struct inode *inode, *ino;
++	struct nfs_client *clp;
++	struct cb_layoutrecallargs rl;
++	struct nfs4_layoutreturn *lrp;
++	struct recall_layout_threadargs *args =
++		(struct recall_layout_threadargs *)data;
++	int status = 0;
++
++	daemonize("nfsv4-layoutreturn");
++
++	dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
++		__func__, args->rl->cbl_recall_type,
++		args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
++
++	clp = args->clp;
++	inode = args->inode;
++	rl = *args->rl;
++
++	/* support whole file layouts only */
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	if (rl.cbl_recall_type == RETURN_FILE) {
++		if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
++						rl.cbl_stateid))
++			status = pnfs_return_layout(inode, &rl.cbl_seg,
++						    &rl.cbl_stateid, RETURN_FILE,
++						    false);
++		else
++			status = cpu_to_be32(NFS4ERR_DELAY);
++		if (status)
++			dprintk("%s RETURN_FILE error: %d\n", __func__, status);
++		else
++			status =  cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++		args->result = status;
++		complete(&args->started);
++		goto out;
++	}
++
++	status = cpu_to_be32(NFS4_OK);
++	args->result = status;
++	complete(&args->started);
++	args = NULL;
++
++	/* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
++	while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
++		/* FIXME: need to check status on pnfs_return_layout */
++		pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
++		iput(ino);
++	}
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (!lrp) {
++		dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
++			__func__);
++		goto out;
++	}
++
++	/* send final layoutreturn */
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = rl.cbl_layout_type;
++	lrp->args.return_type = rl.cbl_recall_type;
++	lrp->args.range = rl.cbl_seg;
++	lrp->args.inode = inode;
++	nfs4_proc_layoutreturn(lrp, true);
++
++out:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	nfs_put_client(clp);
++	module_put_and_exit(0);
++	dprintk("%s: exit status %d\n", __func__, 0);
++	return 0;
++}
++
++/*
++ * Asynchronous layout recall!
++ */
++static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
++				    struct cb_layoutrecallargs *rl)
++{
++	struct recall_layout_threadargs data = {
++		.clp = clp,
++		.inode = inode,
++		.rl = rl,
++	};
++	struct task_struct *t;
++	int status = -EAGAIN;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* FIXME: do not allow two concurrent layout recalls */
++	if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
++		return status;
++
++	init_completion(&data.started);
++	__module_get(THIS_MODULE);
++	if (!atomic_inc_not_zero(&clp->cl_count))
++		goto out_put_no_client;
++
++	t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
++	if (IS_ERR(t)) {
++		printk(KERN_INFO "NFS: Layout recall callback thread failed "
++			"for client (clientid %08x/%08x)\n",
++			(unsigned)(clp->cl_clientid >> 32),
++			(unsigned)(clp->cl_clientid));
++		status = PTR_ERR(t);
++		goto out_module_put;
++	}
++	wait_for_completion(&data.started);
++	return data.result;
++out_module_put:
++	nfs_put_client(clp);
++out_put_no_client:
++	clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
++	module_put(THIS_MODULE);
++	return status;
++}
++
++static int pnfs_recall_all_layouts(struct nfs_client *clp)
++{
++	struct cb_layoutrecallargs rl;
++	struct inode *inode;
++	int status = 0;
++
++	rl.cbl_recall_type = RETURN_ALL;
++	rl.cbl_seg.iomode = IOMODE_ANY;
++	rl.cbl_seg.offset = 0;
++	rl.cbl_seg.length = NFS4_MAX_UINT64;
++
++	/* we need the inode to get the nfs_server struct */
++	inode = nfs_layoutrecall_find_inode(clp, &rl);
++	if (!inode)
++		return status;
++	status = pnfs_async_return_layout(clp, inode, &rl);
++	iput(inode);
++
++	return status;
++}
++
++__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
++				  void *dummy)
++{
++	struct nfs_client *clp;
++	struct inode *inode = NULL;
++	__be32 res;
++	int status;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
++	clp  = nfs_find_client(args->cbl_addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->cbl_addr));
++		goto out;
++	}
++
++	res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		/* the callback must come from the MDS personality */
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
++			goto loop;
++		if (args->cbl_recall_type == RETURN_FILE) {
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (inode != NULL) {
++				status = pnfs_async_return_layout(clp, inode,
++								  args);
++				if (status)
++					res = cpu_to_be32(NFS4ERR_DELAY);
++				iput(inode);
++			}
++		} else { /* _ALL or _FSID */
++			/* we need the inode to get the nfs_server struct */
++			inode = nfs_layoutrecall_find_inode(clp, args);
++			if (!inode)
++				goto loop;
++			status = pnfs_async_return_layout(clp, inode, args);
++			if (status)
++				res = cpu_to_be32(NFS4ERR_DELAY);
++			iput(inode);
++		}
++loop:
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
++/* Remove the deviceid(s) from the nfs_client deviceid cache */
++static __be32 pnfs_devicenotify_client(struct nfs_client *clp,
++				       struct cb_devicenotifyargs *args)
++{
++	uint32_t type;
++	int i;
++
++	dprintk("%s: --> clp %p\n", __func__, clp);
++
++	for (i = 0; i < args->ndevs; i++) {
++		struct cb_devicenotifyitem *dev = &args->devs[i];
++		type = dev->cbd_notify_type;
++		if (type == NOTIFY_DEVICEID4_DELETE && clp->cl_devid_cache)
++			nfs4_delete_device(clp->cl_devid_cache,
++					   &dev->cbd_dev_id);
++		else if (type == NOTIFY_DEVICEID4_CHANGE)
++			printk(KERN_ERR "%s: NOTIFY_DEVICEID4_CHANGE "
++					"not supported\n", __func__);
++	}
++	return 0;
++}
++
++__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
++				  void *dummy)
++{
++	struct nfs_client *clp;
++	__be32 res = 0;
++	unsigned int num_client = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	res = __constant_htonl(NFS4ERR_INVAL);
++	clp = nfs_find_client(args->addr, 4);
++	if (clp == NULL) {
++		dprintk("%s: no client for addr %u.%u.%u.%u\n",
++			__func__, NIPQUAD(args->addr));
++		goto out;
++	}
++
++	do {
++		struct nfs_client *prev = clp;
++		num_client++;
++		res = pnfs_devicenotify_client(clp, args);
++		clp = nfs_find_client_next(prev);
++		nfs_put_client(prev);
++	} while (clp != NULL);
++
++out:
++	dprintk("%s: exit with status = %d numclient %u\n",
++		__func__, ntohl(res), num_client);
++	return res;
++}
++
+ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
+ {
+ 	if (delegation == NULL)
+ 		return 0;
+ 
+-	/* seqid is 4-bytes long */
+-	if (((u32 *) &stateid->data)[0] != 0)
++	if (stateid->u.stateid.seqid != 0)
+ 		return 0;
+-	if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
+-		   sizeof(stateid->data)-4))
++	if (memcmp(&delegation->stateid.u.stateid.other,
++		   &stateid->u.stateid.other,
++		   NFS4_STATEID_OTHER_SIZE))
+ 		return 0;
+ 
+ 	return 1;
+@@ -335,13 +669,37 @@ out:
+ 	return status;
+ }
+ 
++static inline bool
++validate_bitmap_values(const unsigned long *mask)
++{
++	int i;
++
++	if (*mask == 0)
++		return true;
++	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
++	    test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
++	    test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
++		return true;
++	for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
++	     i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
++		if (test_bit(i, mask))
++			return true;
++	return false;
++}
++
+ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+ {
+ 	struct nfs_client *clp;
+ 	__be32 status;
+ 	fmode_t flags = 0;
+ 
+-	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
++	status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ 	clp = nfs_find_client(args->craa_addr, 4);
+ 	if (clp == NULL)
+ 		goto out;
+@@ -349,16 +707,25 @@ __be32 nfs4_callback_recallany(struct cb
+ 	dprintk("NFS: RECALL_ANY callback request from %s\n",
+ 		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ 
++	status = cpu_to_be32(NFS4ERR_INVAL);
++	if (!validate_bitmap_values((const unsigned long *)
++				    &args->craa_type_mask))
++		return status;
++
++	status = cpu_to_be32(NFS4_OK);
+ 	if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags = FMODE_READ;
+ 	if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
+ 		     &args->craa_type_mask))
+ 		flags |= FMODE_WRITE;
++	if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
++		     &args->craa_type_mask))
++		if (pnfs_recall_all_layouts(clp) == -EAGAIN)
++			status = cpu_to_be32(NFS4ERR_DELAY);
+ 
+ 	if (flags)
+ 		nfs_expire_all_delegation_types(clp, flags);
+-	status = htonl(NFS4_OK);
+ out:
+ 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+ 	return status;
+diff -up linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig linux-2.6.34.noarch/fs/nfs/callback_xdr.c
+--- linux-2.6.34.noarch/fs/nfs/callback_xdr.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/callback_xdr.c	2010-09-30 10:17:08.597991000 -0400
+@@ -22,6 +22,8 @@
+ #define CB_OP_RECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ 
+ #if defined(CONFIG_NFS_V4_1)
++#define CB_OP_LAYOUTRECALL_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
++#define CB_OP_DEVICENOTIFY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+ #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
+ 					4 + 1 + 3)
+ #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+@@ -136,7 +138,7 @@ static __be32 decode_stateid(struct xdr_
+ 	p = read_buf(xdr, 16);
+ 	if (unlikely(p == NULL))
+ 		return htonl(NFS4ERR_RESOURCE);
+-	memcpy(stateid->data, p, 16);
++	memcpy(stateid->u.data, p, 16);
+ 	return 0;
+ }
+ 
+@@ -220,6 +222,148 @@ out:
+ 
+ #if defined(CONFIG_NFS_V4_1)
+ 
++static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
++				       struct xdr_stream *xdr,
++				       struct cb_layoutrecallargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++
++	args->cbl_addr = svc_addr(rqstp);
++	p = read_buf(xdr, 4 * sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_BADXDR);
++		goto out;
++	}
++
++	args->cbl_layout_type = ntohl(*p++);
++	args->cbl_seg.iomode = ntohl(*p++);
++	args->cbl_layoutchanged = ntohl(*p++);
++	args->cbl_recall_type = ntohl(*p++);
++
++	if (likely(args->cbl_recall_type == RETURN_FILE)) {
++		status = decode_fh(xdr, &args->cbl_fh);
++		if (unlikely(status != 0))
++			goto out;
++
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_seg.offset);
++		p = xdr_decode_hyper(p, &args->cbl_seg.length);
++		status = decode_stateid(xdr, &args->cbl_stateid);
++		if (unlikely(status != 0))
++			goto out;
++	} else if (args->cbl_recall_type == RETURN_FSID) {
++		p = read_buf(xdr, 2 * sizeof(uint64_t));
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_BADXDR);
++			goto out;
++		}
++		p = xdr_decode_hyper(p, &args->cbl_fsid.major);
++		p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
++	}
++	dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d "
++		"fsid %llx-%llx fhsize %d\n", __func__,
++		args->cbl_layout_type, args->cbl_seg.iomode,
++		args->cbl_layoutchanged, args->cbl_recall_type,
++		args->cbl_fsid.major, args->cbl_fsid.minor,
++		args->cbl_fh.size);
++out:
++	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
++	return status;
++}
++
++static
++__be32 decode_devicenotify_args(struct svc_rqst *rqstp,
++				struct xdr_stream *xdr,
++				struct cb_devicenotifyargs *args)
++{
++	__be32 *p;
++	__be32 status = 0;
++	u32 tmp;
++	int n, i;
++	args->ndevs = 0;
++
++	args->addr = svc_addr(rqstp);
++
++	/* Num of device notifications */
++	p = read_buf(xdr, sizeof(uint32_t));
++	if (unlikely(p == NULL)) {
++		status = htonl(NFS4ERR_RESOURCE);
++		goto out;
++	}
++	n = ntohl(*p++);
++	if (n <= 0)
++		goto out;
++
++	/* XXX: need to possibly return error in this case */
++	if (n > NFS4_DEV_NOTIFY_MAXENTRIES) {
++		dprintk("%s: Processing (%d) notifications out of (%d)\n",
++			__func__, NFS4_DEV_NOTIFY_MAXENTRIES, n);
++		n = NFS4_DEV_NOTIFY_MAXENTRIES;
++	}
++
++	/* Decode each dev notification */
++	for (i = 0; i < n; i++) {
++		struct cb_devicenotifyitem *dev = &args->devs[i];
++
++		p = read_buf(xdr, (4 * sizeof(uint32_t))
++			     + NFS4_PNFS_DEVICEID4_SIZE);
++		if (unlikely(p == NULL)) {
++			status = htonl(NFS4ERR_RESOURCE);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* bitmap size */
++		if (tmp != 1) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_notify_type = ntohl(*p++);
++		if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		    dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++
++		tmp = ntohl(*p++);	/* opaque size */
++		if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 8)) ||
++		    ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
++		     (tmp != NFS4_PNFS_DEVICEID4_SIZE + 4))) {
++			status = htonl(NFS4ERR_INVAL);
++			goto out;
++		}
++		dev->cbd_layout_type = ntohl(*p++);
++		memcpy(dev->cbd_dev_id.data, p, NFS4_PNFS_DEVICEID4_SIZE);
++		p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++
++		if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
++			p = read_buf(xdr, sizeof(uint32_t));
++			if (unlikely(p == NULL)) {
++				status = htonl(NFS4ERR_DELAY);
++				goto out;
++			}
++			dev->cbd_immediate = ntohl(*p++);
++		} else {
++			dev->cbd_immediate = 0;
++		}
++
++		args->ndevs++;
++
++		dprintk("%s: type %d layout 0x%x immediate %d\n",
++			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
++			dev->cbd_immediate);
++	}
++out:
++	dprintk("%s: status %d ndevs %d\n",
++		__func__, ntohl(status), args->ndevs);
++	return status;
++}
++
+ static __be32 decode_sessionid(struct xdr_stream *xdr,
+ 				 struct nfs4_sessionid *sid)
+ {
+@@ -574,11 +718,11 @@ preprocess_nfs41_op(int nop, unsigned in
+ 	case OP_CB_SEQUENCE:
+ 	case OP_CB_RECALL_ANY:
+ 	case OP_CB_RECALL_SLOT:
++	case OP_CB_LAYOUTRECALL:
++	case OP_CB_NOTIFY_DEVICEID:
+ 		*op = &callback_ops[op_nr];
+ 		break;
+ 
+-	case OP_CB_LAYOUTRECALL:
+-	case OP_CB_NOTIFY_DEVICEID:
+ 	case OP_CB_NOTIFY:
+ 	case OP_CB_PUSH_DELEG:
+ 	case OP_CB_RECALLABLE_OBJ_AVAIL:
+@@ -739,6 +883,18 @@ static struct callback_op callback_ops[]
+ 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
+ 	},
+ #if defined(CONFIG_NFS_V4_1)
++	[OP_CB_LAYOUTRECALL] = {
++		.process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
++		.decode_args =
++			(callback_decode_arg_t)decode_layoutrecall_args,
++		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
++	},
++	[OP_CB_NOTIFY_DEVICEID] = {
++		.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
++		.decode_args =
++			(callback_decode_arg_t)decode_devicenotify_args,
++		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
++	},
+ 	[OP_CB_SEQUENCE] = {
+ 		.process_op = (callback_process_op_t)nfs4_callback_sequence,
+ 		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
+diff -up linux-2.6.34.noarch/fs/nfs/client.c.orig linux-2.6.34.noarch/fs/nfs/client.c
+--- linux-2.6.34.noarch/fs/nfs/client.c.orig	2010-09-30 10:15:17.723710000 -0400
++++ linux-2.6.34.noarch/fs/nfs/client.c	2010-09-30 10:17:08.603991000 -0400
+@@ -39,6 +39,7 @@
+ #include <net/ipv6.h>
+ #include <linux/nfs_xdr.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include <asm/system.h>
+ 
+@@ -48,6 +49,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_CLIENT
+ 
+@@ -150,11 +152,14 @@ static struct nfs_client *nfs_alloc_clie
+ 	clp->cl_boot_time = CURRENT_TIME;
+ 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+ 	clp->cl_minorversion = cl_init->minorversion;
++	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ #endif
+ 	cred = rpc_lookup_machine_cred();
+ 	if (!IS_ERR(cred))
+ 		clp->cl_machine_cred = cred;
+-
++#if defined(CONFIG_NFS_V4_1)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++#endif
+ 	nfs_fscache_get_client_cookie(clp);
+ 
+ 	return clp;
+@@ -178,7 +183,7 @@ static void nfs4_clear_client_minor_vers
+ 		clp->cl_session = NULL;
+ 	}
+ 
+-	clp->cl_call_sync = _nfs4_call_sync;
++	clp->cl_mvops = nfs_v4_minor_ops[0];
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
+@@ -188,7 +193,7 @@ static void nfs4_clear_client_minor_vers
+ static void nfs4_destroy_callback(struct nfs_client *clp)
+ {
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+-		nfs_callback_down(clp->cl_minorversion);
++		nfs_callback_down(clp->cl_mvops->minor_version);
+ }
+ 
+ static void nfs4_shutdown_client(struct nfs_client *clp)
+@@ -251,6 +256,7 @@ void nfs_put_client(struct nfs_client *c
+ 		nfs_free_client(clp);
+ 	}
+ }
++EXPORT_SYMBOL(nfs_put_client);
+ 
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ /*
+@@ -343,7 +349,7 @@ static int nfs_sockaddr_match_ipaddr(con
+  * Test if two socket addresses represent the same actual socket,
+  * by comparing (only) relevant fields, including the port number.
+  */
+-static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ 			    const struct sockaddr *sa2)
+ {
+ 	if (sa1->sa_family != sa2->sa_family)
+@@ -357,6 +363,7 @@ static int nfs_sockaddr_cmp(const struct
+ 	}
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs_sockaddr_cmp);
+ 
+ /*
+  * Find a client by IP address and protocol version
+@@ -548,6 +555,7 @@ int nfs4_check_client_ready(struct nfs_c
+ 		return -EPROTONOSUPPORT;
+ 	return 0;
+ }
++EXPORT_SYMBOL(nfs4_check_client_ready);
+ 
+ /*
+  * Initialise the timeout values for a connection
+@@ -865,9 +873,34 @@ error:
+ }
+ 
+ /*
++ * Initialize the pNFS layout driver and setup pNFS related parameters
++ */
++static void nfs4_init_pnfs(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
++{
++#if defined(CONFIG_NFS_V4_1)
++	struct nfs_client *clp = server->nfs_client;
++
++	if (nfs4_has_session(clp) &&
++	    (clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
++		server->pnfs_blksize = fsinfo->blksize;
++		set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype);
++		pnfs_set_ds_iosize(server);
++	}
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++static void nfs4_uninit_pnfs(struct nfs_server *server)
++{
++#if defined(CONFIG_NFS_V4_1)
++	if (server->nfs_client && nfs4_has_session(server->nfs_client))
++		unmount_pnfs_layoutdriver(server);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
++/*
+  * Load up the server record from information gained in an fsinfo record
+  */
+-static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
++static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo)
+ {
+ 	unsigned long max_rpc_payload;
+ 
+@@ -897,6 +930,8 @@ static void nfs_server_set_fsinfo(struct
+ 	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+ 		server->wsize = NFS_MAX_FILE_IO_SIZE;
+ 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++	nfs4_init_pnfs(server, mntfh, fsinfo);
++
+ 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+ 
+ 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+@@ -938,7 +973,7 @@ static int nfs_probe_fsinfo(struct nfs_s
+ 	if (error < 0)
+ 		goto out_error;
+ 
+-	nfs_server_set_fsinfo(server, &fsinfo);
++	nfs_server_set_fsinfo(server, mntfh, &fsinfo);
+ 
+ 	/* Get some general file system info */
+ 	if (server->namelen == 0) {
+@@ -1016,6 +1051,7 @@ void nfs_free_server(struct nfs_server *
+ {
+ 	dprintk("--> nfs_free_server()\n");
+ 
++	nfs4_uninit_pnfs(server);
+ 	spin_lock(&nfs_client_lock);
+ 	list_del(&server->client_link);
+ 	list_del(&server->master_link);
+@@ -1126,7 +1162,7 @@ static int nfs4_init_callback(struct nfs
+ 				return error;
+ 		}
+ 
+-		error = nfs_callback_up(clp->cl_minorversion,
++		error = nfs_callback_up(clp->cl_mvops->minor_version,
+ 					clp->cl_rpcclient->cl_xprt);
+ 		if (error < 0) {
+ 			dprintk("%s: failed to start callback. Error = %d\n",
+@@ -1143,10 +1179,8 @@ static int nfs4_init_callback(struct nfs
+  */
+ static int nfs4_init_client_minor_version(struct nfs_client *clp)
+ {
+-	clp->cl_call_sync = _nfs4_call_sync;
+-
+ #if defined(CONFIG_NFS_V4_1)
+-	if (clp->cl_minorversion) {
++	if (clp->cl_mvops->minor_version) {
+ 		struct nfs4_session *session = NULL;
+ 		/*
+ 		 * Create the session and mark it expired.
+@@ -1158,7 +1192,13 @@ static int nfs4_init_client_minor_versio
+ 			return -ENOMEM;
+ 
+ 		clp->cl_session = session;
+-		clp->cl_call_sync = _nfs4_call_sync_session;
++		/*
++		 * The create session reply races with the server back
++		 * channel probe. Mark the client NFS_CS_SESSION_INITING
++		 * so that the client back channel can find the
++		 * nfs_client struct
++		 */
++		clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+@@ -1216,7 +1256,7 @@ error:
+ /*
+  * Set up an NFS4 client
+  */
+-static int nfs4_set_client(struct nfs_server *server,
++int nfs4_set_client(struct nfs_server *server,
+ 		const char *hostname,
+ 		const struct sockaddr *addr,
+ 		const size_t addrlen,
+@@ -1259,6 +1299,7 @@ error:
+ 	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ 	return error;
+ }
++EXPORT_SYMBOL(nfs4_set_client);
+ 
+ 
+ /*
+@@ -1448,7 +1489,7 @@ struct nfs_server *nfs4_create_referral_
+ 				data->authflavor,
+ 				parent_server->client->cl_xprt->prot,
+ 				parent_server->client->cl_timeout,
+-				parent_client->cl_minorversion);
++				parent_client->cl_mvops->minor_version);
+ 	if (error < 0)
+ 		goto error;
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig linux-2.6.34.noarch/fs/nfsd/bl_com.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_com.c.orig	2010-09-30 10:17:08.822996000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_com.c	2010-09-30 10:17:08.824003000 -0400
+@@ -0,0 +1,292 @@
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/init.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/socket.h>
++#include <linux/in.h>
++#include <linux/sched.h>
++#include <linux/exportfs.h>
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/workqueue.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/proc_fs.h>
++#include <linux/nfs_fs.h>
++
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd4_block.h>
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++static ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++    char __user *, size_t);
++static ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
++static void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops bl_upcall_ops = {
++	.upcall		= bl_pipe_upcall,
++	.downcall	= bl_pipe_downcall,
++	.destroy_msg	= bl_pipe_destroy_msg,
++};
++
++bl_comm_t	*bl_comm_global;
++
++int
++nfsd_bl_start(void)
++{
++	bl_comm_t	*bl_comm = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	dprintk("%s: starting pipe\n", __func__);
++	if (bl_comm_global)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	bl_comm = kzalloc(sizeof (*bl_comm), GFP_KERNEL);
++	if (!bl_comm) {
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	/* FIXME: rename to "spnfs_block" */
++	bl_comm->pipe_dentry = rpc_mkpipe(nd.path.dentry, "pnfs_block", bl_comm,
++					 &bl_upcall_ops, 0);
++	if (IS_ERR(bl_comm->pipe_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++	mutex_init(&bl_comm->lock);
++	mutex_init(&bl_comm->pipe_lock);
++	init_waitqueue_head(&bl_comm->pipe_wq);
++
++	bl_comm_global = bl_comm;
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(bl_comm);
++	return rc;
++}
++
++void
++nfsd_bl_stop(void)
++{
++	bl_comm_t	*c = bl_comm_global;
++
++	dprintk("%s: stopping pipe\n", __func__);
++	if (!c)
++		return;
++	rpc_unlink(c->pipe_dentry);
++	rpc_put_mount();
++	bl_comm_global = NULL;
++	kfree(c);
++}
++
++static ssize_t
++bl_pipe_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *dst,
++    size_t buflen)
++{
++	char	*data	= (char *)msg->data + msg->copied;
++	ssize_t	mlen	= msg->len - msg->copied,
++		left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen		-= left;
++	msg->copied	+= mlen;
++	msg->errno	= 0;
++
++	return mlen;
++}
++
++static ssize_t
++bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode	*rpci	= RPC_I(filp->f_dentry->d_inode);
++	bl_comm_t		*bc	= (bl_comm_t *)rpci->private;
++	bl_comm_msg_t		*im	= &bc->msg;
++	int			ret;
++	bl_comm_res_t		*res;
++	
++
++	if (mlen == 0) {
++		im->msg_status = PNFS_BLOCK_FAILURE;
++		im->msg_res = NULL;
++		wake_up(&bc->pipe_wq);
++		return -EFAULT;
++	}
++	
++	if ((res = kmalloc(mlen, GFP_KERNEL)) == NULL)
++		return -ENOMEM;
++	
++	if (copy_from_user(res, src, mlen)) {
++		kfree(res);
++		return -EFAULT;
++	}
++	
++	mutex_lock(&bc->pipe_lock);
++	
++	ret		= mlen;
++	im->msg_status	= res->res_status;
++	im->msg_res	= res;
++	
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++	return ret;
++}
++
++static void
++bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	bl_comm_msg_t	*im = msg->data;
++	bl_comm_t	*bc = container_of(im, struct bl_comm, msg);
++	
++	if (msg->errno >= 0)
++		return;
++
++	mutex_lock(&bc->pipe_lock);
++	im->msg_status = PNFS_BLOCK_FAILURE;
++	wake_up(&bc->pipe_wq);
++	mutex_unlock(&bc->pipe_lock);
++}
++
++int
++bl_upcall(bl_comm_t *bc, bl_comm_msg_t *upmsg, bl_comm_res_t **res)
++{
++	struct rpc_pipe_msg	msg;
++	DECLARE_WAITQUEUE(wq, current);
++	int			rval	= 1;
++	bl_comm_msg_t		*m	= &bc->msg;
++	
++	if (bc == NULL) {
++		dprintk("%s: No pNFS block daemon available\n", __func__);
++		return 1;
++	}
++	
++	mutex_lock(&bc->lock);
++	mutex_lock(&bc->pipe_lock);
++	
++	memcpy(m, upmsg, sizeof (*m));
++	
++	memset(&msg, 0, sizeof (msg));
++	msg.data = m;
++	msg.len = sizeof (*m);
++	
++	add_wait_queue(&bc->pipe_wq, &wq);
++	rval = rpc_queue_upcall(bc->pipe_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&bc->pipe_wq, &wq);
++		goto out;
++	}
++	
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&bc->pipe_lock);
++	schedule();
++	__set_current_state(TASK_RUNNING);
++	remove_wait_queue(&bc->pipe_wq, &wq);
++	mutex_lock(&bc->pipe_lock);
++	
++	if (m->msg_status == PNFS_BLOCK_SUCCESS) {
++		*res = m->msg_res;
++		rval = 0;
++	} else
++		rval = 1;
++	
++out:
++	mutex_unlock(&bc->pipe_lock);
++	mutex_unlock(&bc->lock);
++	return rval;
++}
++
++static ssize_t ctl_write(struct file *file, const char __user *buf, size_t len,
++    loff_t *offset)
++{
++	int		cmd,
++			rc;
++	bl_comm_t	*bc	= bl_comm_global;
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof (int)))
++		return -EFAULT;
++	switch (cmd) {
++	case PNFS_BLOCK_CTL_STOP:
++		msg.msg_type = PNFS_UPCALL_MSG_STOP;
++		(void) bl_upcall(bc, &msg, &res);
++		kfree(res);
++		nfsd_bl_stop();
++		break;
++		
++	case PNFS_BLOCK_CTL_START:
++		rc = nfsd_bl_start();
++		if (rc != 0)
++			return rc;
++		break;
++		
++	case PNFS_BLOCK_CTL_VERS:
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bc, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++			    __func__);
++			return 0;
++		}
++		kfree(res);
++		break;
++		
++	default:
++		dprintk("%s: unknown ctl command %d\n", __func__, cmd);
++		break;
++	}
++	return len;
++}
++
++static struct file_operations ctl_ops = {
++	.write	= ctl_write,
++};
++
++/*
++ * bl_init_proc -- set up proc interfaces
++ *
++ * Creating a pnfs_block directory isn't really required at this point
++ * since we've only got a single node in that directory. If the need for
++ * more nodes doesn't present itself shortly this code should revert
++ * to a single top level node. McNeal 11-Aug-2008.
++ */
++int
++bl_init_proc(void)
++{
++	struct proc_dir_entry *e;
++
++	e = proc_mkdir("fs/pnfs_block", NULL);
++	if (!e)
++		return -ENOMEM;
++
++	e = create_proc_entry("fs/pnfs_block/ctl", 0, NULL);
++	if (!e)
++		return -ENOMEM;
++	e->proc_fops = &ctl_ops;
++
++	return 0;
++}
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig linux-2.6.34.noarch/fs/nfsd/bl_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/bl_ops.c.orig	2010-09-30 10:17:08.827998000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/bl_ops.c	2010-09-30 10:17:08.829998000 -0400
+@@ -0,0 +1,1672 @@
++/*
++ *  bl_ops.c
++ *  spNFS
++ *
++ *  Created by Rick McNeal on 4/1/08.
++ *  Copyright 2008 __MyCompanyName__. All rights reserved.
++ *
++ */
++
++/*
++ * Block layout operations.
++ *
++ * These functions, with the exception of pnfs_block_enabled, are assigned to
++ * the super block s_export_op structure.
++ */
++#if defined(CONFIG_SPNFS_BLOCK)
++
++#include <linux/module.h>
++#include <linux/genhd.h>
++#include <linux/fs.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd/export.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/spinlock_types.h>
++#include <linux/dm-ioctl.h>
++#include <asm/uaccess.h>
++#include <linux/falloc.h>
++#include <linux/nfsd4_block.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY	NFSDDBG_PNFS
++
++#define MIN(a, b) ((a) < (b) ? (a) : (b))
++
++#define BL_LAYOUT_HASH_BITS	4
++#define BL_LAYOUT_HASH_SIZE	(1 << BL_LAYOUT_HASH_BITS)
++#define BL_LAYOUT_HASH_MASK	(BL_LAYOUT_HASH_SIZE - 1)
++#define BL_LIST_REQ	(sizeof (struct dm_ioctl) + 256)
++
++#define bl_layout_hashval(id) \
++	((id) & BL_LAYOUT_HASH_MASK)
++
++#define BLL_F_END(p) ((p)->bll_foff + (p)->bll_len)
++#define BLL_S_END(p) ((p)->bll_soff + (p)->bll_len)
++#define _2SECTS(v) ((v) >> 9)
++
++#ifndef READ32
++#define READ32(x)	(x) = ntohl(*p++)
++#define READ64(x)	do {			\
++(x) = (u64)ntohl(*p++) << 32;	\
++(x) |= ntohl(*p++);		\
++} while (0)
++#endif
++
++
++typedef enum {True, False} boolean_t;
++/* ---- block layoutget and commit structure ---- */
++typedef struct bl_layout_rec {
++	struct list_head	blr_hash,
++				blr_layouts;
++	dev_t			blr_rdev;
++	struct inode		*blr_inode;
++	int			blr_recalled;	// debug
++	u64			blr_orig_size,
++				blr_commit_size,
++				blr_ext_size;
++	spinlock_t		blr_lock;	// Protects blr_layouts
++} bl_layout_rec_t;
++
++static struct list_head layout_hash;
++static struct list_head layout_hashtbl[BL_LAYOUT_HASH_SIZE];
++static spinlock_t layout_hashtbl_lock;
++
++/* ---- prototypes ---- */
++static boolean_t device_slice(dev_t devid);
++static boolean_t device_dm(dev_t devid);
++static boolean_t layout_inode_add(struct inode *i, bl_layout_rec_t **);
++static bl_layout_rec_t *layout_inode_find(struct inode *i);
++static void layout_inode_del(struct inode *i);
++static char *map_state2name(enum pnfs_block_extent_state4 s);
++static pnfs_blocklayout_devinfo_t *bld_alloc(struct list_head *volume, int type);
++static void bld_free(pnfs_blocklayout_devinfo_t *bld);
++static pnfs_blocklayout_devinfo_t *bld_simple(struct list_head *volumes,
++    dev_t devid, int local_index);
++static pnfs_blocklayout_devinfo_t *bld_slice(struct list_head *volumes,
++    dev_t devid, int my_loc, int idx);
++static int layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg);
++struct list_head *layout_cache_iter(bl_layout_rec_t *r,
++    struct list_head *bl_possible, struct nfsd4_layout_seg *seg);
++static void layout_cache_merge(bl_layout_rec_t *r, struct list_head *h);
++static int layout_cache_update(bl_layout_rec_t *r, struct list_head *h);
++static void layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg);
++static void print_bll(pnfs_blocklayout_layout_t *b, char *);
++static inline boolean_t layout_cache_fill_from_list(bl_layout_rec_t *r,
++    struct list_head *h, struct nfsd4_layout_seg *seg);
++static inline void bll_collapse(bl_layout_rec_t *r,
++    pnfs_blocklayout_layout_t *c);
++static pnfs_blocklayout_layout_t *bll_alloc(u64 offset, u64 len,
++    enum bl_cache_state state, struct list_head *h);
++static pnfs_blocklayout_layout_t *bll_alloc_dup(pnfs_blocklayout_layout_t *b,
++    enum bl_cache_state c, struct list_head *h);
++static inline boolean_t layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++    enum pnfs_block_extent_state4 *s);
++static void extents_setup(struct fiemap_extent_info *fei);
++static void extents_count(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_get(struct fiemap_extent_info *fei, struct inode *i,
++    u64 foff, u64 len);
++static boolean_t extents_process(struct fiemap_extent_info *fei,
++    struct list_head *bl_candidates, struct nfsd4_layout_seg *, dev_t dev,
++    pnfs_blocklayout_layout_t *b);
++static void extents_cleanup(struct fiemap_extent_info *fei);
++
++void
++nfsd_bl_init(void)
++{
++	int	i;
++	dprintk("%s loaded\n", __func__);
++
++	spin_lock_init(&layout_hashtbl_lock);
++	INIT_LIST_HEAD(&layout_hash);
++	for (i = 0; i < BL_LAYOUT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&layout_hashtbl[i]);
++	bl_init_proc();
++}
++
++/*
++ * pnfs_block_enabled -- check to see if this file system should be export as
++ * block pnfs
++ */
++int
++pnfs_block_enabled(struct inode *inode, int ex_flags)
++{
++	bl_comm_msg_t	msg;
++	bl_comm_res_t	*res	= NULL;
++	static int bl_comm_once	= 0;
++	
++	dprintk("--> %s\n", __func__);
++	/*
++	 * FIXME: Figure out method to determine if this file system should
++	 * be exported. The following areas need to be checked.
++	 * (1) Validate that this file system was exported as a pNFS
++	 *     block-layout
++	 * (2) Has there been successful communication with the
++	 *     volume daemon?
++	 */
++	/* Check #1 */
++#ifdef notyet
++	if (!(ex_flags & NFSEXP_PNFS_BLOCK)) {
++		dprintk("%s: pnfs_block not set in export\n", __func__);
++		return 0;
++	}
++#endif
++	
++	/* Check #1 */
++	if (!bl_comm_once) {
++		msg.msg_type = PNFS_UPCALL_MSG_VERS;
++		msg.u.msg_vers = PNFS_UPCALL_VERS;
++		if (bl_upcall(bl_comm_global, &msg, &res)) {
++			dprintk("%s: Failed to contact pNFS block daemon\n",
++				__func__);
++			return 0;
++		}
++		if (msg.u.msg_vers != res->u.vers) {
++			dprintk("%s: vers mismatch, kernel != daemon\n",
++				__func__);
++			kfree(res);
++			return 0;
++		}
++	}
++	bl_comm_once = 1;
++
++	kfree(res);
++	
++	dprintk("<-- %s okay\n", __func__);
++	return 1;
++}
++
++int
++bl_layout_type(struct super_block *sb)
++{
++	return LAYOUT_BLOCK_VOLUME;
++}
++
++int
++bl_getdeviceiter(struct super_block *sb,
++		 u32 layout_type,
++		 struct nfsd4_pnfs_dev_iter_res *res)
++{
++	res->gd_eof = 1;	
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_devid	= sb->s_dev;
++	res->gd_verf	= 1;
++	res->gd_cookie	= 1;
++	return 0;
++}
++
++static int
++bl_getdeviceinfo_slice(struct super_block *sb, struct exp_xdr_stream *xdr,
++		       const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld_slice_p,
++					*bld_simple_p,
++					*bld;
++	int				status		= -EIO,
++					location	= 0;
++	struct list_head		volumes;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++
++	bld_simple_p = bld_simple(&volumes, devid->devid,
++				  location++);
++	if (!bld_simple_p)
++		goto out;
++	bld_slice_p = bld_slice(&volumes, devid->devid, location++,
++	    bld_simple_p->bld_index_loc);
++
++	if (!bld_slice_p)
++		goto out;
++	
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		if (bld->bld_type == PNFS_BLOCK_VOLUME_SIMPLE)
++			kfree(bld->u.simple.bld_sig);
++		bld_free(bld);
++	}
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++static int
++bl_getdeviceinfo_dm(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	pnfs_blocklayout_devinfo_t	*bld		= NULL;
++	int				status		= -EIO,	// default to error
++					i,
++					location	= 0;
++	struct list_head		volumes;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	INIT_LIST_HEAD(&volumes);
++	
++	msg.msg_type = PNFS_UPCALL_MSG_DMGET;
++	msg.u.msg_dev = devid->devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: upcall for DMGET failed\n", __func__);
++		goto out;
++	}
++		
++	/*
++	 * Don't use bld_alloc() here. If used this will be the first volume
++	 * type added to the list whereas the protocol requires it to be the
++	 * last.
++	 */
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		goto out;
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type			= PNFS_BLOCK_VOLUME_STRIPE;
++	bld->u.stripe.bld_stripes	= res->u.stripe.num_stripes;
++	bld->u.stripe.bld_chunk_size	= res->u.stripe.stripe_size * 512LL;
++	dprintk("%s: stripes %d, chunk_size %Lu\n", __func__,
++	    bld->u.stripe.bld_stripes, bld->u.stripe.bld_chunk_size / 512LL);
++	
++	bld->u.stripe.bld_stripe_indexs = kmalloc(bld->u.stripe.bld_stripes *
++						  sizeof (int), GFP_KERNEL);
++	if (!bld->u.stripe.bld_stripe_indexs)
++		goto out;
++
++	for (i = 0; i < bld->u.stripe.bld_stripes; i++) {
++		dev_t			dev;
++		pnfs_blocklayout_devinfo_t	*bldp;
++		
++		dev = MKDEV(res->u.stripe.devs[i].major,
++			    res->u.stripe.devs[i].minor);
++		if (dev == 0)
++			goto out;
++		
++		bldp = bld_simple(&volumes, dev, location++);
++		if (!bldp) {
++			dprintk("%s: bld_simple failed\n", __func__);
++			goto out;
++		}
++		bldp = bld_slice(&volumes, dev, location++, bldp->bld_index_loc);
++
++		if (!bldp) {
++			dprintk("%s: bld_slice failed\n", __func__);
++			goto out;
++		}
++		bld->u.stripe.bld_stripe_indexs[i] = bldp->bld_index_loc;
++
++	}
++	list_add_tail(&bld->bld_list, &volumes);
++	status = blocklayout_encode_devinfo(xdr, &volumes);
++	
++out:
++	while (!list_empty(&volumes)) {
++		bld = list_entry(volumes.next, pnfs_blocklayout_devinfo_t,
++		    bld_list);
++		switch (bld->bld_type) {
++			case PNFS_BLOCK_VOLUME_SLICE:
++			case PNFS_BLOCK_VOLUME_CONCAT:
++				// No memory to release for these
++				break;
++			case PNFS_BLOCK_VOLUME_SIMPLE:
++				kfree(bld->u.simple.bld_sig);
++				break;
++			case PNFS_BLOCK_VOLUME_STRIPE:
++				kfree(bld->u.stripe.bld_stripe_indexs);
++				break;
++		}
++		bld_free(bld);
++	}
++	kfree(res);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++/*
++ * bl_getdeviceinfo -- determine device tree for requested devid
++ */
++int
++bl_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		 u32 layout_type,
++		 const struct nfsd4_pnfs_deviceid *devid)
++{
++	if (device_slice(devid->devid) == True)
++		return bl_getdeviceinfo_slice(sb, xdr, devid);
++	else if (device_dm(devid->devid) == True)
++		return bl_getdeviceinfo_dm(sb, xdr, devid);
++	return -EINVAL;
++}
++
++enum nfsstat4
++bl_layoutget(struct inode *i, struct exp_xdr_stream *xdr,
++	     const struct nfsd4_pnfs_layoutget_arg *arg,
++	     struct nfsd4_pnfs_layoutget_res *res)
++{
++	pnfs_blocklayout_layout_t	*b;
++	bl_layout_rec_t			*r;
++	struct list_head		bl_possible,
++					*bl_candidates	= NULL;
++	boolean_t			del_on_error	= False;
++	int				adj;
++	enum nfsstat4			nfserr		= NFS4_OK;
++	
++	dprintk("--> %s (inode=[0x%x:%lu], offset=%Lu, len=%Lu, iomode=%d)\n",
++	    __func__, i->i_sb->s_dev, i->i_ino, _2SECTS(res->lg_seg.offset),
++	    _2SECTS(res->lg_seg.length), res->lg_seg.iomode);
++
++	if (res->lg_seg.length == 0) {
++		printk("%s: request length of 0, error condition\n", __func__);
++		return NFS4ERR_BADLAYOUT;
++	}
++	
++	/*
++	 * Adjust the length as required per spec.
++	 * - First case is were the length is set to (u64)-1. Cheap means to
++	 *   define the end of the file.
++	 * - Second case is were the I/O mode is read-only, but the request is
++	 *   past the end of the file so the request needs to be trimed.
++	 */
++	if ((res->lg_seg.length == NFS4_MAX_UINT64) ||
++	    (((res->lg_seg.offset + res->lg_seg.length) > i->i_size) &&
++	     (res->lg_seg.iomode == IOMODE_READ)))
++		res->lg_seg.length = i->i_size - res->lg_seg.offset;
++	
++	adj = (res->lg_seg.offset & 511) ? res->lg_seg.offset & 511 : 0;
++	res->lg_seg.offset -= adj;
++	res->lg_seg.length = (res->lg_seg.length + adj + 511) & ~511;
++	
++	if (res->lg_seg.iomode != IOMODE_READ)
++		if (i->i_op->fallocate(i, FALLOC_FL_KEEP_SIZE,
++				       res->lg_seg.offset, res->lg_seg.length))
++			return NFS4ERR_IO;
++		
++	INIT_LIST_HEAD(&bl_possible);
++	
++	if ((r = layout_inode_find(i)) == NULL) {
++		if (layout_inode_add(i, &r) == False) {
++			printk("%s: layout_inode_add failed\n", __func__);
++			return NFS4ERR_IO;
++		}
++		del_on_error = True;
++	}
++	BUG_ON(!r);
++	
++	spin_lock(&r->blr_lock);
++	
++	if (layout_cache_fill_from(r, &bl_possible, &res->lg_seg)) {
++		/*
++		 * This will send LAYOUTTRYAGAIN error to the client.
++		 */
++		dprintk("%s: layout_cache_fill_from() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	res->lg_return_on_close	= 1;
++	res->lg_seg.length	= 0;
++	
++	bl_candidates = layout_cache_iter(r, &bl_possible, &res->lg_seg);
++	if (!bl_candidates) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	layout_cache_merge(r, bl_candidates);
++	if (layout_cache_update(r, bl_candidates)) {
++		/* ---- Failed to allocate memory. ---- */
++		dprintk("%s: layout_cache_update() failed\n", __func__);
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	
++	nfserr = blocklayout_encode_layout(xdr, bl_candidates);
++	if (nfserr)
++		dprintk("%s: layoutget xdr routine failed\n", __func__);
++	
++layoutget_cleanup:
++	if (bl_candidates) {
++		while (!list_empty(bl_candidates)) {
++			b = list_entry(bl_candidates->next,
++			    struct pnfs_blocklayout_layout, bll_list);
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++	}
++
++	spin_unlock(&r->blr_lock);
++	if (unlikely(nfserr)) {
++		if (del_on_error == True)
++			layout_inode_del(i);
++		res->lg_seg.length = 0;
++		res->lg_seg.offset = 0;
++	}
++	
++	dprintk("<-- %s (rval %u)\n", __func__, nfserr);
++	return nfserr;
++}
++
++/*
++ * bl_layoutcommit -- commit changes, especially size, to file systemj
++ *
++ * Currently this routine isn't called and everything is handled within
++ * nfsd4_layoutcommit(). By not calling this routine the server doesn't
++ * handle a partial return, a set of extents, of the layout. The extents
++ * are decoded here, but nothing is done with them. If this routine is
++ * be called the interface must change to pass the 'dentry' pointer such
++ * that notify_change() can be called.
++ */
++int
++bl_layoutcommit(struct inode *i,
++		const struct nfsd4_pnfs_layoutcommit_arg *args,
++		struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	bl_layout_rec_t			*r;
++	int				status	= 0;
++	u64				lw_plus;
++	
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	r = layout_inode_find(i);
++	if (r) {
++		lw_plus = args->lc_last_wr + 1;
++		if (args->lc_newoffset) {
++			dprintk("  lc_last_wr %Lu\n", lw_plus);
++			if (r->blr_orig_size < lw_plus) {
++				r->blr_orig_size	= lw_plus;
++				res->lc_size_chg	= 1;
++				res->lc_newsize		= lw_plus;
++			}
++		}
++
++		if (args->lc_up_len) {
++			int	extents,
++				i;
++			struct pnfs_blocklayout_layout *b;
++			__be32 *p = args->lc_up_layout;
++			
++			/*
++			 * Client is returning a set of extents which
++			 * should/could be used to update the file system.
++			 * See section 2.3.2 in draft-ietf-nfsv4-pnfs-block-08
++			 */
++			READ32(extents);
++			dprintk("  Client returning %d extents: data size %d\n",
++			    extents, args->lc_up_len);
++			b = kmalloc(sizeof (struct pnfs_blocklayout_layout) *
++				    extents, GFP_KERNEL);
++			if (b) {
++				for (i = 0; i < extents; i++) {
++					READ64(b[i].bll_vol_id.sbid);
++					READ64(b[i].bll_vol_id.devid);
++					READ64(b[i].bll_foff);
++					READ64(b[i].bll_len);
++					READ64(b[i].bll_soff);
++					READ32(b[i].bll_es);
++					dprintk("  %d: foff %Lu, len %Lu, soff %Lu "
++					    "state %s\n",
++					    i, _2SECTS(b[i].bll_foff),
++					    _2SECTS(b[i].bll_len),
++					    _2SECTS(b[i].bll_soff),
++					    map_state2name(b[i].bll_es));
++				}
++				kfree(b);
++			} else {
++				status = -ENOMEM;
++			}
++		}
++	} else
++		dprintk("%s: Unexpected commit to inode %p\n", __func__, i);
++	
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutreturn(struct inode *i,
++		const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	int				status	= 0;
++	bl_layout_rec_t			*r;
++
++	dprintk("--> %s (ino [0x%x:%lu])\n", __func__, i->i_sb->s_dev, i->i_ino);
++	
++	r = layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		layout_cache_del(r, &args->lr_seg);
++		spin_unlock(&r->blr_lock);
++		dprintk("    ext_size %Lu, i_size %Lu, orig_size %Lu\n",
++		    r->blr_ext_size, i->i_size, r->blr_orig_size);
++	}
++
++	layout_inode_del(i);
++	dprintk("<-- %s (rval %d)\n", __func__, status);
++	return status;
++}
++
++int
++bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block		*sb;
++	struct nfsd4_pnfs_cb_layout	lr;
++	bl_layout_rec_t			*r;
++	pnfs_blocklayout_layout_t	*b;
++	u64				adj;
++	
++	dprintk("--> %s\n", __func__);
++	BUG_ON(!len);
++	switch (type) {
++		case RETURN_FILE:
++			sb = inode->i_sb;
++			dprintk("  recalling layout [0x%x:%lu], %Lu:%Lu\n",
++			    inode->i_sb->s_dev, inode->i_ino,
++				_2SECTS(offset), _2SECTS(len));
++			break;
++		case RETURN_FSID:
++			sb = inode->i_sb;
++			dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++				__func__);
++			return 0;
++		case RETURN_ALL:
++			/*
++			 * XXX figure out how to get a sb since there's no
++			 * inode ptr
++			 */
++			dprintk("%s: recalling all layouts (unimplemented)\n",
++				__func__);
++			return 0;
++		default:
++			return -EINVAL;
++	}
++	
++restart:
++	r = layout_inode_find(inode);
++	if (r && len && !r->blr_recalled) {
++		spin_lock(&r->blr_lock);
++		list_for_each_entry(b, &r->blr_layouts, bll_list) {
++			if (!r->blr_recalled && !b->bll_recalled &&
++			    (offset >= b->bll_foff) && (offset < BLL_F_END(b))) {
++				b->bll_recalled		= 1;
++				lr.cbl_recall_type	= type;
++				lr.cbl_seg.layout_type	= LAYOUT_BLOCK_VOLUME;
++				lr.cbl_seg.clientid	= 0;
++				lr.cbl_seg.offset	= 0;
++				lr.cbl_seg.length	= NFS4_MAX_UINT64;
++				r->blr_recalled		= 1;
++				dprintk("  FULL LAYOUTRECALL\n");
++				lr.cbl_seg.iomode = IOMODE_ANY;
++
++				/*
++				 * Currently there are only two cases where the
++				 * layout is being returned.
++				 *    (1) Someone is issuing a NFS_WRITE operation
++				 *        to this layout.
++				 *    (2) The file has been truncated which means
++				 *        the layout is immediately made invalid.
++				 * In both cases the client must write any
++				 * uncommitted modifications to the server via
++				 * NFS_WRITE.
++				 */
++				lr.cbl_layoutchanged = 1;
++
++				/*
++				 * Need to drop the lock because we'll get a
++				 * layoutreturn which will block waiting for
++				 * the lock. The request will come in on the
++				 * same thread which will cause a deadlock.
++				 */
++				spin_unlock(&r->blr_lock);
++				nfsd_layout_recall_cb(sb, inode, &lr);
++				adj = MIN(b->bll_len - (offset - b->bll_foff),
++				    len);
++				offset += adj;
++				len -= adj;
++				if (!len) {
++					spin_lock(&r->blr_lock);
++					break;
++				}
++				/*
++				 * Since layoutreturn will have been called we
++				 * can't assume blr_layouts is still valid,
++				 * so restart.
++				 */
++				goto restart;
++			}
++		}
++		spin_unlock(&r->blr_lock);
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++/*
++ * []------------------------------------------------------------------[]
++ * | Support functions from here on down.				|
++ * []------------------------------------------------------------------[]
++ */
++
++/*
++ * bld_simple -- given a dev_t build a simple volume structure
++ *
++ * Simple volume contains the device signature and offset to that data in
++ * the storage volume.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_simple(struct list_head *volumes, dev_t devid, int local_index)
++{
++	pnfs_blocklayout_devinfo_t	*bld	= NULL;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res	= NULL;
++	
++	msg.msg_type = PNFS_UPCALL_MSG_GETSIG;
++	msg.u.msg_dev = devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("%s: Failed to get signature information\n", __func__);
++		goto error;
++	}
++	
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SIMPLE);
++	if (!bld)
++		return NULL;
++	
++	bld->u.simple.bld_offset = (res->u.sig.sector * 512LL) + res->u.sig.offset;
++	bld->u.simple.bld_sig_len = res->u.sig.len;
++	bld->u.simple.bld_sig = kmalloc(res->u.sig.len, GFP_KERNEL);
++	if (!bld->u.simple.bld_sig)
++		goto error;
++	
++	memcpy(bld->u.simple.bld_sig, res->u.sig.sig, res->u.sig.len);
++	kfree(res);
++	return bld;
++	
++error:
++	if (bld)
++		bld_free(bld);
++	if (res)
++		kfree(res);
++	dprintk("%s: error in bld_simple\n", __func__);
++	return NULL;
++}
++
++/*
++ * bld_slice -- given a dev_t build a slice volume structure
++ *
++ * A slice volume contains the length of the slice/partition and its offset
++ * from the beginning of the storage volume. There's also a reference to
++ * the "simple" volume which contains this slice.
++ */
++static pnfs_blocklayout_devinfo_t *
++bld_slice(struct list_head *volumes, dev_t devid, int my_loc, int simple_loc)
++{
++	pnfs_blocklayout_devinfo_t	*bld;
++	bl_comm_msg_t			msg;
++	bl_comm_res_t			*res;
++	
++	dprintk("--> %s\n", __func__);
++	bld = bld_alloc(volumes, PNFS_BLOCK_VOLUME_SLICE);
++	if (!bld)
++		return NULL;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_GETSLICE;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Upcall to get slice info failed\n");
++		bld_free(bld);
++		return NULL;
++	}
++	
++	bld->bld_devid.devid = devid;
++	bld->bld_index_loc	= my_loc;
++	bld->u.slice.bld_start	= res->u.slice.start * 512LL;
++	bld->u.slice.bld_len	= res->u.slice.length * 512LL;
++	bld->u.slice.bld_index	= simple_loc;
++
++	dprintk("%s: start %Lu, len %Lu\n", __func__,
++		bld->u.slice.bld_start / 512LL, bld->u.slice.bld_len / 512LL);
++
++	kfree(res);
++	dprintk("<-- %s (rval %p)\n", __func__, bld);
++	return bld;
++}
++
++static int
++layout_cache_fill_from(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	dprintk("--> %s\n", __func__);
++	
++	if (!list_empty(&r->blr_layouts))
++		if (layout_cache_fill_from_list(r, h, seg) == False)
++			return -EIO;
++	
++	/*
++	 * This deals with two conditions.
++	 *    (1) When blr_layouts is empty we need to create the first entry
++	 *    (2) When the range requested falls past the end of any current
++	 *        layout the residual must be taken care of.
++	 */	
++	if (seg->length) {
++		n = bll_alloc(seg->offset, seg->length, BLOCK_LAYOUT_NEW, h);
++		if (!n)
++			return -ENOMEM;
++		dprintk("  remaining at %Lu, len %Lu\n", _2SECTS(n->bll_foff),
++			_2SECTS(n->bll_len));
++	}
++	
++	dprintk("<-- %s\n", __func__);
++	return 0;
++}
++
++struct list_head *
++layout_cache_iter(bl_layout_rec_t *r, struct list_head *bl_possible,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n		= NULL;
++	struct list_head		*bl_candidates	= NULL;
++	struct fiemap_extent_info	fei;
++	struct inode			*i;
++	dev_t				dev;
++	
++	dev	= r->blr_rdev;
++	i	= r->blr_inode;
++	
++	dprintk("--> %s\n", __func__);
++	bl_candidates = kmalloc(sizeof (*bl_candidates), GFP_KERNEL);
++	if (!bl_candidates)
++		return NULL;
++	INIT_LIST_HEAD(bl_candidates);
++	extents_setup(&fei);
++	
++	list_for_each_entry(b, bl_possible, bll_list) {
++		if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			extents_count(&fei, i, b->bll_foff, b->bll_len);
++			if (fei.fi_extents_mapped) {
++				
++				/*
++				 * Common case here. Got a range which has
++				 * extents. Now get those extents and process
++				 * them into pNFS extents.
++				 */
++				if (extents_get(&fei, i, b->bll_foff,
++				    b->bll_len) == False)
++					goto cleanup;
++				if (extents_process(&fei, bl_candidates,
++				    seg, dev, b) == False)
++					goto cleanup;
++				extents_cleanup(&fei);
++				
++			} else if (seg->iomode == IOMODE_READ) {
++				
++				/*
++				 * Found a hole in a file while reading. No 
++				 * problem, just create a pNFS extent for the
++				 * range and let the client know there's no
++				 * backing store.
++				 */
++				n = bll_alloc(b->bll_foff, b->bll_len,
++				    BLOCK_LAYOUT_NEW, bl_candidates);
++				n->bll_es = PNFS_BLOCK_NONE_DATA;
++				n->bll_vol_id.sbid = 0;
++				n->bll_vol_id.devid = dev;
++				seg->length += b->bll_len;
++			} else {
++				
++				/*
++				 * There's a problem here. Since the iomode
++				 * is read/write fallocate should have allocated
++				 * any necessary storage for the given range.
++				 */
++				dprintk("    Extent count for RW is 0\n");
++				goto cleanup;
++			}
++			
++		} else {
++			n = bll_alloc_dup(b, b->bll_cache_state, bl_candidates);
++			seg->length += n->bll_len;
++		}
++
++		if (r->blr_ext_size < (b->bll_foff + b->bll_len))
++			r->blr_ext_size = b->bll_foff + b->bll_len;
++	}
++	
++	while (!list_empty(bl_possible)) {
++		b = list_entry(bl_possible->next,
++		    struct pnfs_blocklayout_layout, bll_list);
++		list_del(&b->bll_list);
++		kfree(b);
++	}
++		
++	b = list_first_entry(bl_candidates, struct pnfs_blocklayout_layout,
++	    bll_list);
++	seg->offset = b->bll_foff;
++	dprintk("<-- %s okay\n", __func__);
++	return bl_candidates;
++	
++cleanup:
++	extents_cleanup(&fei);
++	if (bl_candidates)
++		kfree(bl_candidates);
++	dprintk("<-- %s, error occurred\n", __func__);
++	return NULL;
++}
++
++/*
++ * layout_cache_merge -- collapse layouts which make up a contiguous range.
++ */
++static void
++layout_cache_merge(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*p;
++	
++	dprintk("--> %s\n", __func__);
++restart:
++	p = NULL;
++	list_for_each_entry(b, h, bll_list) {
++		if (p && (BLL_S_END(p) == b->bll_soff) &&
++		    (p->bll_es == b->bll_es) &&
++		    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++			/*
++			 * We've got a condidate.
++			 */
++#ifdef too_verbose
++			dprintk("  merge %Lu(f):%Lu(l):%Lu(s) into %Lu(f):%Lu(l):%Lu(s)\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff),
++				_2SECTS(p->bll_foff), _2SECTS(p->bll_len),
++				_2SECTS(b->bll_soff));
++#endif
++			
++			if (p->bll_cache_state == BLOCK_LAYOUT_CACHE)
++				p->bll_cache_state = BLOCK_LAYOUT_UPDATE;
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else if (p && (BLL_F_END(p) == b->bll_foff) &&
++			   (p->bll_es == b->bll_es) &&
++			   (b->bll_es == PNFS_BLOCK_NONE_DATA)) {
++			p->bll_len += b->bll_len;
++			list_del(&b->bll_list);
++			kfree(b);
++			goto restart;
++		} else
++			p = b;
++	}
++	dprintk("<-- %s\n", __func__);
++}
++
++static int
++layout_cache_update(bl_layout_rec_t *r, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*c,
++					*n;
++	boolean_t			status = 0;
++	
++	dprintk("--> %s\n", __func__);
++	if (list_empty(&r->blr_layouts)) {
++		/* ---- Just add entries and return ---- */
++		dprintk("  cache empty for inode 0x%x:%ld\n", r->blr_rdev,
++			r->blr_inode->i_ino);
++		list_for_each_entry(b, h, bll_list) {
++			c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE,
++					  &r->blr_layouts);
++			if (!c) {
++				status = -ENOMEM;
++				break;
++			}
++			dprintk("    adding %Lu(f):%Lu(l):%Lu(s):%d\n",
++				_2SECTS(c->bll_foff), _2SECTS(c->bll_len),
++				_2SECTS(c->bll_soff), c->bll_es);
++		}
++		return status;
++	}
++	
++	list_for_each_entry(b, h, bll_list) {
++		BUG_ON(!b->bll_vol_id.devid);
++		if (b->bll_cache_state == BLOCK_LAYOUT_UPDATE) {
++			boolean_t found = False;
++			list_for_each_entry(c, &r->blr_layouts, bll_list) {
++				if ((b->bll_soff >= c->bll_soff) &&
++				    (b->bll_soff < BLL_S_END(c)) &&
++				    (b->bll_es != PNFS_BLOCK_NONE_DATA)) {
++					u64	u;
++					
++					if ((b->bll_foff < c->bll_foff) ||
++					    (b->bll_foff > BLL_F_END(c)))
++						BUG();
++					
++					u = BLL_S_END(b) - BLL_S_END(c);
++					/*
++					 * The updated cache entry has to be
++					 * different than the current.
++					 * Otherwise the cache state for 'b'
++					 * should be BLOCK_LAYOUT_CACHE.
++					 */
++					BUG_ON(BLL_S_END(b) < BLL_S_END(c));
++					
++					dprintk("  "
++						"updating %Lu(f):%Lu(l):%Lu(s) to len %Lu\n",
++						_2SECTS(c->bll_foff),
++						_2SECTS(c->bll_len),
++						_2SECTS(c->bll_soff),
++						_2SECTS(c->bll_len + u));
++					c->bll_len += u;
++					bll_collapse(r, c);
++					found = True;
++					break;
++				}
++			}
++
++			if (found == False) {
++				dprintk("  ERROR Expected to find"
++				    " %Lu(f):%Lu(l):%Lu(s), but didn't\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff));
++				list_for_each_entry(c, &r->blr_layouts, bll_list)
++					print_bll(c, "Cached");
++				BUG();
++			}
++		} else if (b->bll_cache_state == BLOCK_LAYOUT_NEW) {
++			
++			c = list_first_entry(&r->blr_layouts,
++			    struct pnfs_blocklayout_layout, bll_list);
++			if (b->bll_foff < c->bll_foff) {
++				/*
++				 * Special case where new entry is before
++				 * first cached entry.
++				 */
++				c = bll_alloc_dup(b, BLOCK_LAYOUT_CACHE, NULL);
++				list_add(&c->bll_list, &r->blr_layouts);
++				dprintk("  new entry at head of list at %Lu, "
++					"len %Lu\n",
++					_2SECTS(c->bll_foff), _2SECTS(c->bll_len));
++			} else {
++				list_for_each_entry(c, &r->blr_layouts,
++				    bll_list) {
++					n = list_entry(c->bll_list.next,
++					    struct pnfs_blocklayout_layout,
++					    bll_list);
++					/*
++					 * This is ugly, but can't think of
++					 * another way to examine this case.
++					 * Consider the following. Need to
++					 * add an entry which starts at 40
++					 * and the cache has the following
++					 * entries:
++					 * Start    Length
++					 * 10       5
++					 * 30       5
++					 * 50       5
++					 * So, need to look and see if the new
++					 * entry starts after the current
++					 * cache, but before the next one.
++					 * There's a catch in that the next
++					 * entry might not be valid as it's
++					 * really just a pointer to the list
++					 * head.
++					 */
++					if (((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (c->bll_list.next == &r->blr_layouts)) ||
++					    ((b->bll_foff >=
++					      BLL_F_END(c)) &&
++					     (b->bll_foff < n->bll_foff))) {
++						
++						n = bll_alloc_dup(b,
++								  BLOCK_LAYOUT_CACHE, NULL);
++						dprintk("  adding new %Lu:%Lu"
++							" after %Lu:%Lu\n",
++							_2SECTS(n->bll_foff),
++							_2SECTS(n->bll_len),
++							_2SECTS(c->bll_foff),
++							_2SECTS(c->bll_len));
++						list_add(&n->bll_list,
++							 &c->bll_list);
++						break;
++					}
++				}
++			}
++		}
++	}
++	dprintk("<-- %s\n", __func__);
++	return status;
++}
++
++static void
++layout_cache_del(bl_layout_rec_t *r, const struct nfsd4_layout_seg *seg_in)
++{
++	struct pnfs_blocklayout_layout	*b,
++					*n;
++	u64				len;
++	struct nfsd4_layout_seg		seg = *seg_in;
++	
++	dprintk("--> %s\n", __func__);
++	if (seg.length == NFS4_MAX_UINT64) {
++		r->blr_recalled = 0;
++		dprintk("  Fast return of all layouts\n");
++		while (!list_empty(&r->blr_layouts)) {
++			b = list_entry(r->blr_layouts.next,
++				       struct pnfs_blocklayout_layout, bll_list);
++			dprintk("    foff %Lu, len %Lu, soff %Lu\n",
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				_2SECTS(b->bll_soff));
++			list_del(&b->bll_list);
++			kfree(b);
++		}
++		dprintk("<-- %s\n", __func__);
++		return;
++	}
++
++restart:
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg.offset == b->bll_foff) {
++			/*
++			 * This handle the following three cases:
++			 * (1) return layout matches entire cache layout
++			 * (2) return layout matches beginning portion of cache
++			 * (3) return layout matches entire cache layout and
++			 *     into next entry. Varies from #1 in end case.
++			 */
++			dprintk("  match on offsets, %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length));
++			len = MIN(seg.length, b->bll_len);
++			b->bll_foff	+= len;
++			b->bll_soff	+= len;
++			b->bll_len	-= len;
++			seg.length	-= len;
++			seg.offset	+= len;
++			if (!b->bll_len) {
++				list_del(&b->bll_list);
++				kfree(b);
++				dprintk("    removing cache line\n");
++				if (!seg.length) {
++					dprintk("    also finished\n");
++					goto complete;
++				}
++				/*
++				 * Since 'b' was freed we can't continue at the
++				 * next entry which is referenced as
++				 * b->bll_list.next by the list_for_each_entry
++				 * macro. Need to restart the loop.
++				 * TODO: Think about creating a dummy 'b' which
++				 *       would keep list_for_each_entry() happy.
++				 */
++				goto restart;
++			}
++			if (!seg.length) {
++				dprintk("    finished, but cache line not"
++					"empty\n");
++				goto complete;
++			}
++		} else if ((seg.offset >= b->bll_foff) &&
++		    (seg.offset < BLL_F_END(b))) {
++			/*
++			 * layout being returned is within this cache line.
++			 */
++			dprintk("  layout %Lu:%Lu within cache line %Lu:%Lu\n",
++				_2SECTS(seg.offset), _2SECTS(seg.length),
++				_2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			BUG_ON(!seg.length);
++			if ((seg.offset + seg.length) >= BLL_F_END(b)) {
++				/*
++				 * Layout returned starts in the middle of
++				 * cache entry and just need to trim back
++				 * cache to shorter length.
++				 */
++				dprintk("    trim back cache line\n");
++				len = seg.offset - b->bll_foff;
++				seg.offset += b->bll_len - len;
++				seg.length -= b->bll_len - len;
++				b->bll_len = len;
++				if (!seg.length)
++					return;
++			} else {
++				/*
++				 * Need to split current cache layout because
++				 * chunk is being removed from the middle.
++				 */
++				dprintk("    split cache line\n");
++				len = seg.offset + seg.length;
++				n = bll_alloc(len,
++					      (b->bll_foff + b->bll_len) - len,
++					      BLOCK_LAYOUT_CACHE, NULL);
++				n->bll_soff = b->bll_soff + len;
++				list_add(&n->bll_list, &b->bll_list);
++				b->bll_len = seg.offset - b->bll_foff;
++				return;
++			}
++		}
++	}
++complete:
++	if (list_empty(&r->blr_layouts))
++		r->blr_recalled = 0;
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++ * layout_cache_fill_from_list -- fills from cache list
++ *
++ * NOTE: This routine was only seperated out from layout_cache_file_from()
++ * to reduce the indentation level which makes the code easier to read.
++ */
++static inline boolean_t
++layout_cache_fill_from_list(bl_layout_rec_t *r, struct list_head *h,
++    struct nfsd4_layout_seg *seg)
++{
++	pnfs_blocklayout_layout_t	*b,
++					*n;
++	enum pnfs_block_extent_state4	s;
++	
++	list_for_each_entry(b, &r->blr_layouts, bll_list) {
++		if (seg->offset < b->bll_foff) {
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, b->bll_foff - seg->offset),
++			    BLOCK_LAYOUT_NEW, NULL);
++			if (!n)
++				return False;
++			
++			list_add(&n->bll_list, h->prev);
++			dprintk("  new: %Lu:%Lu, added before %Lu:%Lu\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len));
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++		
++		if ((seg->offset >= b->bll_foff) &&
++		    (seg->offset < BLL_F_END(b))) {
++			if (layout_conflict(b, seg->iomode, &s) == False) {
++				dprintk("  CONFLICT FOUND: "
++				    "%Lu(f):%Lu(l):%Lu(s) state %d, iomode %d\n",
++				    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++				    _2SECTS(b->bll_soff), b->bll_es,
++				    seg->iomode);
++				return False;
++			}
++			n = bll_alloc(seg->offset,
++			    MIN(seg->length, BLL_F_END(b) - seg->offset),
++			    BLOCK_LAYOUT_CACHE, h);
++			dprintk("  CACHE hit: Found %Lu(f):%Lu(l): "
++			    "in %Lu(f):%Lu(l):%Lu(s):%d\n",
++			    _2SECTS(n->bll_foff), _2SECTS(n->bll_len),
++			    _2SECTS(b->bll_foff), _2SECTS(b->bll_len),
++			    _2SECTS(b->bll_soff), b->bll_es);
++			if (!n)
++				return False;
++			
++			n->bll_soff = b->bll_soff + seg->offset - b->bll_foff;
++			n->bll_vol_id.sbid = 0;
++			n->bll_vol_id.devid = b->bll_vol_id.devid;
++			n->bll_es = s;
++			seg->offset += n->bll_len;
++			seg->length -= n->bll_len;
++			if (!seg->length)
++				break;
++		}
++	}
++	return True;
++}
++
++static u64
++bll_alloc_holey(struct list_head *bl_candidates, u64 offset, u64 length,
++    dev_t dev)
++{
++	pnfs_blocklayout_layout_t	*n;
++	
++	n = bll_alloc(offset, length, BLOCK_LAYOUT_NEW, bl_candidates);
++	if (!n)
++		return 0;
++	n->bll_es = PNFS_BLOCK_NONE_DATA;
++	n->bll_vol_id.sbid = 0;
++	n->bll_vol_id.devid = dev;
++	
++	return n->bll_len;
++}
++
++static void
++extents_setup(struct fiemap_extent_info *fei)
++{
++	fei->fi_extents_start	= NULL;
++}
++
++/*
++ * extents_count -- Determine the number of extents for a given range.
++ *
++ * No need to call set_fs() here because the function
++ * doesn't use copy_to_user() if it's only counting
++ * the number of extents needed.
++ */
++static void
++extents_count(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	dprintk("    Need fiemap of %Ld:%Ld\n", _2SECTS(foff), _2SECTS(len));
++	fei->fi_flags		= FIEMAP_FLAG_SYNC;
++	fei->fi_extents_max	= 0;
++	fei->fi_extents_start	= NULL;
++	fei->fi_extents_mapped	= 0;
++	i->i_op->fiemap(i, fei, foff, len + (1 << i->i_sb->s_blocksize_bits) - 1);
++}
++
++/*
++ * extents_get -- Get list of extents for range
++ *
++ * extents_count() must have been called before this routine such that
++ * fi_extents_mapped is known.
++ */
++static boolean_t
++extents_get(struct fiemap_extent_info *fei, struct inode *i, u64 foff, u64 len)
++{
++	int			m_space,
++				rval;
++	struct fiemap_extent	*fe;
++	mm_segment_t		old_fs = get_fs();
++	
++	/*
++	 * Now malloc the correct amount of space
++	 * needed. It's possible for the file to have changed
++	 * between calls which would require more space for
++	 * the extents. If that occurs the last extent will
++	 * not have FIEMAP_EXTENT_LAST set and the error will
++	 * be caught in extents_process().
++	 */
++	m_space = fei->fi_extents_mapped * sizeof (struct fiemap_extent);
++	fe = kmalloc(m_space, GFP_KERNEL);
++	if (!fe)
++		return False;
++	memset(fe, 0, m_space);
++	
++	fei->fi_extents_max	= fei->fi_extents_mapped;
++	fei->fi_extents_mapped	= 0;
++	fei->fi_extents_start	= fe;
++	
++	set_fs(KERNEL_DS);
++	rval = i->i_op->fiemap(i, fei, foff, len +
++	    (1 << i->i_sb->s_blocksize_bits) - 1);
++	set_fs(old_fs);
++	
++	if (rval || !fei->fi_extents_mapped) {
++		dprintk("    No extents. Wanted %d, got %d\n",
++			fei->fi_extents_max, fei->fi_extents_mapped);
++		kfree(fe);
++		fei->fi_extents_start = NULL;
++		return False;
++	} else
++		return True;
++}
++
++/*
++ * extents_process -- runs through the extent returned from the file system and
++ *	 creates block layout entries.
++ */
++static boolean_t
++extents_process(struct fiemap_extent_info *fei, struct list_head *bl_candidates,
++    struct nfsd4_layout_seg *seg, dev_t dev, pnfs_blocklayout_layout_t *b)
++{
++	struct fiemap_extent		*fep,
++					*fep_last	= NULL;
++	int				i;
++	pnfs_blocklayout_layout_t	*n;
++	u64				last_end,
++					rval;
++	
++	dprintk("--> %s\n", __func__);
++	for (fep = fei->fi_extents_start, i = 0; i < fei->fi_extents_mapped;
++	    i++, fep++) {
++		
++		BUG_ON(!fep->fe_physical);
++		/*
++		 * Deal with corner cases of hoel-y files.
++		 */
++		if (fep_last && ((fep_last->fe_logical + fep_last->fe_length) !=
++				 fep->fe_logical)) {
++			
++			/*
++			 * If the last extent doesn't end logically
++			 * at the beginning of the current we've got
++			 * hole and need to create a pNFS extent.
++			 */
++			dprintk("    Got a hole at %Ld:%Ld \n", 
++			    _2SECTS(fep_last->fe_logical),
++			    _2SECTS(fep_last->fe_length));
++			last_end = fep_last->fe_logical + fep_last->fe_length;
++			rval = bll_alloc_holey(bl_candidates, last_end,
++			    fep->fe_logical - last_end, dev);
++			if (!rval)
++				return False;
++			seg->length += rval;
++		}
++		
++		n = bll_alloc(fep->fe_logical, fep->fe_length,
++		    BLOCK_LAYOUT_NEW, bl_candidates);
++		if (unlikely(n == NULL)) {
++			dprintk("%s: bll_alloc failed\n", __func__);
++			return False;
++		}
++		
++		n->bll_soff = fep->fe_physical;
++		n->bll_es = seg->iomode == IOMODE_READ ?
++		    PNFS_BLOCK_READ_DATA : PNFS_BLOCK_READWRITE_DATA;
++		n->bll_vol_id.sbid = 0;
++		n->bll_vol_id.devid = dev;
++		seg->length += fep->fe_length;
++		print_bll(n, "New extent");
++		fep_last = fep;
++	}
++	dprintk("<-- %s (i=%d)\n", __func__, i);
++	
++	return True;
++}
++
++static void
++extents_cleanup(struct fiemap_extent_info *fei)
++{
++	if (fei->fi_extents_start) {
++		kfree(fei->fi_extents_start);
++		fei->fi_extents_start = NULL;
++	}
++}
++
++/*
++ * device_slice -- check to see if device is a slice or DM
++ */
++static boolean_t
++device_slice(dev_t devid)
++{
++	struct block_device	*bd	= open_by_devnum(devid, FMODE_READ);
++	boolean_t		rval	= False;
++	
++	if (bd) {
++		if (bd->bd_disk->minors > 1)
++			rval = True;
++		blkdev_put(bd, FMODE_READ);
++	}
++	return rval;
++}
++
++/*
++ * device_dm -- check to see if device is a Device Mapper volume.
++ *
++ * Returns 1 for DM or 0 if not
++ */
++static boolean_t
++device_dm(dev_t devid)
++{
++	boolean_t		rval = False;
++	bl_comm_msg_t		msg;
++	bl_comm_res_t		*res;
++	
++	msg.msg_type	= PNFS_UPCALL_MSG_DMCHK;
++	msg.u.msg_dev	= devid;
++	if (bl_upcall(bl_comm_global, &msg, &res)) {
++		dprintk("Failed upcall to check on DM status\n");
++	} else if (res->u.dm_vol) {
++		rval = True;
++		dprintk("Device is DM volume\n");
++	} else
++		dprintk("Device is not DM volume\n");
++	kfree(res);
++	
++	return rval;
++}
++
++static boolean_t
++layout_inode_add(struct inode *i, bl_layout_rec_t **p)
++{
++	bl_layout_rec_t		*r	= NULL;
++
++	if (!i->i_op->fiemap || !i->i_op->fallocate) {
++		printk("pNFS: file system doesn't support required fiemap or"
++		    "fallocate methods\n");
++		return False;
++	}
++	
++	r = kmalloc(sizeof (*r), GFP_KERNEL);
++	if (!r)
++		goto error;
++
++	r->blr_rdev	= i->i_sb->s_dev;
++	r->blr_inode	= i;
++	r->blr_orig_size = i->i_size;
++	r->blr_ext_size	= 0;
++	r->blr_recalled	= 0;
++	INIT_LIST_HEAD(&r->blr_layouts);
++	spin_lock_init(&r->blr_lock);
++	spin_lock(&layout_hashtbl_lock);
++	list_add_tail(&r->blr_hash, &layout_hash);
++	spin_unlock(&layout_hashtbl_lock);
++	*p = r;
++	return True;
++	
++error:
++	if (r)
++		kfree(r);
++	return False;
++}
++
++static bl_layout_rec_t *
++__layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	if (!list_empty(&layout_hash)) {
++		list_for_each_entry(r, &layout_hash, blr_hash) {
++			if ((r->blr_inode->i_ino == i->i_ino) &&
++			    (r->blr_rdev == i->i_sb->s_dev)) {
++				return r;
++			}
++		}
++	}
++	return NULL;
++}
++
++static bl_layout_rec_t *
++layout_inode_find(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	spin_unlock(&layout_hashtbl_lock);
++	
++	return r;
++}
++
++static void
++layout_inode_del(struct inode *i)
++{
++	bl_layout_rec_t	*r;
++	
++	spin_lock(&layout_hashtbl_lock);
++	r = __layout_inode_find(i);
++	if (r) {
++		spin_lock(&r->blr_lock);
++		if (list_empty(&r->blr_layouts)) {
++			list_del(&r->blr_hash);
++			spin_unlock(&r->blr_lock);
++			kfree(r);
++		} else {
++			spin_unlock(&r->blr_lock);
++		}
++	} else {
++		dprintk("%s: failed to find inode [0x%x:%lu] in table for delete\n",
++			__func__, i->i_sb->s_dev, i->i_ino);
++	}
++	spin_unlock(&layout_hashtbl_lock);
++}
++
++/*
++ * map_state2name -- converts state in ascii string.
++ *
++ * Used for debug messages only.
++ */
++static char *
++map_state2name(enum pnfs_block_extent_state4 s)
++{
++	switch (s) {
++	case PNFS_BLOCK_READWRITE_DATA:	return "     RW";
++	case PNFS_BLOCK_READ_DATA:	return "     RO";
++	case PNFS_BLOCK_INVALID_DATA:	return "INVALID";
++	case PNFS_BLOCK_NONE_DATA:	return "   NONE";
++	default:
++		BUG();
++	}
++}
++
++static pnfs_blocklayout_devinfo_t *
++bld_alloc(struct list_head *volumes, int type)
++{
++	pnfs_blocklayout_devinfo_t *bld;
++	
++	bld = kmalloc(sizeof (*bld), GFP_KERNEL);
++	if (!bld)
++		return NULL;
++
++	memset(bld, 0, sizeof (*bld));
++	bld->bld_type = type;
++	list_add_tail(&bld->bld_list, volumes);
++
++	return bld;
++}
++
++static void
++bld_free(pnfs_blocklayout_devinfo_t *bld)
++{
++	list_del(&bld->bld_list);
++	kfree(bld);
++}
++
++static void
++print_bll(pnfs_blocklayout_layout_t *b, char *text)
++{
++	dprintk("    BLL: %s\n", text);
++	dprintk("    foff %Lu, soff %Lu, len %Lu, state %s\n",
++	    _2SECTS(b->bll_foff), _2SECTS(b->bll_soff), _2SECTS(b->bll_len),
++	    map_state2name(b->bll_es));
++}
++
++static inline void
++bll_collapse(bl_layout_rec_t *r, pnfs_blocklayout_layout_t *c)
++{
++	pnfs_blocklayout_layout_t	*n;
++	int				dbg_count	= 0;
++	u64				endpoint;
++	
++	BUG_ON(c->bll_es == PNFS_BLOCK_NONE_DATA);
++	while (c->bll_list.next != &r->blr_layouts) {
++		n = list_entry(c->bll_list.next,
++			       struct pnfs_blocklayout_layout, bll_list);
++		endpoint = BLL_S_END(c);
++		if ((n->bll_soff >= c->bll_soff) &&
++		    (n->bll_soff < endpoint)) {
++			if (endpoint < BLL_S_END(n)) {
++				/*
++				 * The following is possible.
++				 *
++				 * 
++				 * Existing: +---+                 +---+
++				 *      New: +-----------------------+
++				 * The client request merge entries together
++				 * but didn't require picking up all of the
++				 * last entry. So, we still need to delete
++				 * the last entry and add the remaining space
++				 * to the new entry.
++				 */
++				c->bll_len += BLL_S_END(n) - endpoint;
++			}
++			dbg_count++;
++			list_del(&n->bll_list);
++			kfree(n);
++		} else {
++			break;
++		}
++	}
++	/* ---- Debug only, remove before integration ---- */
++	if (dbg_count)
++		dprintk("  Collapsed %d cache entries between %Lu(s) and %Lu(s)\n",
++			dbg_count, _2SECTS(c->bll_soff), _2SECTS(BLL_S_END(c)));
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc(u64 offset, u64 len, enum bl_cache_state state, struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = kmalloc(sizeof (*n), GFP_KERNEL);
++	if (n) {
++		memset(n, 0, sizeof (*n));
++		n->bll_foff		= offset;
++		n->bll_len		= len;
++		n->bll_cache_state	= state;
++		if (h)
++			list_add_tail(&n->bll_list, h);
++	}
++	return n;
++}
++
++static pnfs_blocklayout_layout_t *
++bll_alloc_dup(pnfs_blocklayout_layout_t *b, enum bl_cache_state c,
++	      struct list_head *h)
++{
++	pnfs_blocklayout_layout_t	*n	= NULL;
++	
++	n = bll_alloc(b->bll_foff, b->bll_len, c, h);
++	if (n) {
++		n->bll_es			= b->bll_es;
++		n->bll_soff			= b->bll_soff;
++		n->bll_vol_id.devid		= b->bll_vol_id.devid;
++	}
++	return n;
++}
++
++static inline boolean_t
++layout_conflict(pnfs_blocklayout_layout_t *b, u32 iomode,
++		enum pnfs_block_extent_state4 *s)
++{
++	/* ---- Normal case ---- */
++	*s = b->bll_es;
++	
++	switch (b->bll_es) {
++	case PNFS_BLOCK_READWRITE_DATA:
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_READ_DATA;
++		/* ---- Any use is permitted. ---- */
++		break;
++	case PNFS_BLOCK_READ_DATA:
++		/* ---- Committed as read only data. ---- */
++		if (iomode == IOMODE_RW)
++			return False;
++		break;
++	case PNFS_BLOCK_INVALID_DATA:
++		/* ---- Blocks have been allocated, but not initialized ---- */
++		if (iomode == IOMODE_READ)
++			*s = PNFS_BLOCK_NONE_DATA;
++		break;
++	case PNFS_BLOCK_NONE_DATA:
++		/* ---- Hole-y file. No backing store avail. ---- */
++		if (iomode != IOMODE_READ)
++			return False;
++		break;
++	default:
++		BUG();
++	}
++	return True;
++}
++
++#endif /* CONFIG_SPNFS_BLOCK */
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.c.orig linux-2.6.34.noarch/fs/nfs/delegation.c
+--- linux-2.6.34.noarch/fs/nfs/delegation.c.orig	2010-09-30 10:15:17.729711000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.c	2010-09-30 10:17:08.609991000 -0400
+@@ -104,7 +104,8 @@ again:
+ 			continue;
+ 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 			continue;
+-		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
++		if (memcmp(state->stateid.u.data, stateid->u.data,
++			   sizeof(state->stateid.u.data)) != 0)
+ 			continue;
+ 		get_nfs_open_context(ctx);
+ 		spin_unlock(&inode->i_lock);
+@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct
+ 	if (delegation != NULL) {
+ 		spin_lock(&delegation->lock);
+ 		if (delegation->inode != NULL) {
+-			memcpy(delegation->stateid.data, res->delegation.data,
+-			       sizeof(delegation->stateid.data));
++			memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			       sizeof(delegation->stateid.u.data));
+ 			delegation->type = res->delegation_type;
+ 			delegation->maxsize = res->maxsize;
+ 			oldcred = delegation->cred;
+@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach
+ 	if (delegation == NULL)
+ 		goto nomatch;
+ 	spin_lock(&delegation->lock);
+-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+-				sizeof(delegation->stateid.data)) != 0)
++	if (stateid != NULL && memcmp(delegation->stateid.u.data,
++				      stateid->u.data,
++				      sizeof(delegation->stateid.u.data)) != 0)
+ 		goto nomatch_unlock;
+ 	list_del_rcu(&delegation->super_list);
+ 	delegation->inode = NULL;
+@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inod
+ 	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ 	if (delegation == NULL)
+ 		return -ENOMEM;
+-	memcpy(delegation->stateid.data, res->delegation.data,
+-			sizeof(delegation->stateid.data));
++	memcpy(delegation->stateid.u.data, res->delegation.u.data,
++			sizeof(delegation->stateid.u.data));
+ 	delegation->type = res->delegation_type;
+ 	delegation->maxsize = res->maxsize;
+ 	delegation->change_attr = nfsi->change_attr;
+@@ -471,9 +473,7 @@ void nfs_expire_unreferenced_delegations
+ /*
+  * Asynchronous delegation recall!
+  */
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid))
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
+ {
+ 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ 	struct nfs_delegation *delegation;
+@@ -481,7 +481,7 @@ int nfs_async_inode_return_delegation(st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(NFS_I(inode)->delegation);
+ 
+-	if (!validate_stateid(delegation, stateid)) {
++	if (!clp->cl_mvops->validate_stateid(delegation, stateid)) {
+ 		rcu_read_unlock();
+ 		return -ENOENT;
+ 	}
+@@ -562,7 +562,8 @@ int nfs4_copy_delegation_stateid(nfs4_st
+ 	rcu_read_lock();
+ 	delegation = rcu_dereference(nfsi->delegation);
+ 	if (delegation != NULL) {
+-		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
++		memcpy(dst->u.data, delegation->stateid.u.data,
++		       sizeof(dst->u.data));
+ 		ret = 1;
+ 	}
+ 	rcu_read_unlock();
+diff -up linux-2.6.34.noarch/fs/nfs/delegation.h.orig linux-2.6.34.noarch/fs/nfs/delegation.h
+--- linux-2.6.34.noarch/fs/nfs/delegation.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/delegation.h	2010-09-30 10:17:08.615000000 -0400
+@@ -34,9 +34,7 @@ enum {
+ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+ int nfs_inode_return_delegation(struct inode *inode);
+-int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid,
+-				      int (*validate_stateid)(struct nfs_delegation *delegation,
+-							      const nfs4_stateid *stateid));
++int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+ void nfs_inode_return_delegation_noreclaim(struct inode *inode);
+ 
+ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
+diff -up linux-2.6.34.noarch/fs/nfsd/export.c.orig linux-2.6.34.noarch/fs/nfsd/export.c
+--- linux-2.6.34.noarch/fs/nfsd/export.c.orig	2010-09-30 10:15:18.314726000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/export.c	2010-09-30 10:17:08.834999000 -0400
+@@ -17,11 +17,19 @@
+ #include <linux/module.h>
+ #include <linux/exportfs.h>
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++#if defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
++#endif
+ #include <linux/nfsd/syscall.h>
+ #include <net/ipv6.h>
+ 
+ #include "nfsd.h"
+ #include "nfsfh.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
+ 
+@@ -352,6 +360,40 @@ static int svc_export_upcall(struct cach
+ 	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static struct pnfsd_cb_operations pnfsd_cb_op = {
++	.cb_layout_recall = nfsd_layout_recall_cb,
++	.cb_device_notify = nfsd_device_notify_cb,
++
++	.cb_get_state = nfs4_pnfs_cb_get_state,
++	.cb_change_state = nfs4_pnfs_cb_change_state,
++};
++
++#if defined(CONFIG_SPNFS)
++static struct pnfs_export_operations spnfs_export_ops = {
++	.layout_type = spnfs_layout_type,
++	.get_device_info = spnfs_getdeviceinfo,
++	.get_device_iter = spnfs_getdeviceiter,
++	.layout_get = spnfs_layoutget,
++	.layout_return = spnfs_layoutreturn,
++};
++
++static struct pnfs_export_operations spnfs_ds_export_ops = {
++	.get_state = spnfs_get_state,
++};
++
++#if defined(CONFIG_SPNFS_BLOCK)
++static struct pnfs_export_operations bl_export_ops = {
++	.layout_type = bl_layout_type,
++	.get_device_info = bl_getdeviceinfo,
++	.get_device_iter = bl_getdeviceiter,
++	.layout_get = bl_layoutget,
++	.layout_return = bl_layoutreturn,
++};
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_SPNFS */
++#endif /* CONFIG_PNFSD */
++
+ static struct svc_export *svc_export_update(struct svc_export *new,
+ 					    struct svc_export *old);
+ static struct svc_export *svc_export_lookup(struct svc_export *);
+@@ -395,6 +437,47 @@ static int check_export(struct inode *in
+ 		return -EINVAL;
+ 	}
+ 
++#if !defined(CONFIG_SPNFS)
++	if (inode->i_sb->s_pnfs_op &&
++	    (!inode->i_sb->s_pnfs_op->layout_type ||
++	     !inode->i_sb->s_pnfs_op->get_device_info ||
++	     !inode->i_sb->s_pnfs_op->layout_get)) {
++		dprintk("exp_export: export of invalid fs pnfs export ops.\n");
++		return -EINVAL;
++	}
++#endif /* CONFIG_SPNFS */
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	if (!inode->i_sb->s_pnfs_op)
++		pnfsd_lexp_init(inode);
++	return 0;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(inode, *flags)) {
++		dprintk("set pnfs block export structure... \n");
++		inode->i_sb->s_pnfs_op = &bl_export_ops;
++	} else
++#endif /* CONFIG_SPNFS_BLOCK */
++	/*
++	 * spnfs_enabled() indicates we're an MDS.
++	 * XXX Better to check an export time option as well.
++	 */
++	if (spnfs_enabled()) {
++		dprintk("set spnfs export structure...\n");
++		inode->i_sb->s_pnfs_op = &spnfs_export_ops;
++	} else {
++		dprintk("%s spnfs not in use\n", __func__);
++
++		/*
++		 * get_state is needed if we're a DS using spnfs.
++		 * XXX Better to check an export time option instead.
++		 */
++		inode->i_sb->s_pnfs_op = &spnfs_ds_export_ops;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	return 0;
+ 
+ }
+@@ -586,6 +669,8 @@ static int svc_export_parse(struct cache
+ 					if (exp.ex_uuid == NULL)
+ 						err = -ENOMEM;
+ 				}
++			} else if (strcmp(buf, "pnfs") == 0) {
++				exp.ex_pnfs = 1;
+ 			} else if (strcmp(buf, "secinfo") == 0)
+ 				err = secinfo_parse(&mesg, buf, &exp);
+ 			else
+@@ -660,6 +745,8 @@ static int svc_export_show(struct seq_fi
+ 				seq_printf(m, "%02x", exp->ex_uuid[i]);
+ 			}
+ 		}
++		if (exp->ex_pnfs)
++			seq_puts(m, ",pnfs");
+ 		show_secinfo(m, exp);
+ 	}
+ 	seq_puts(m, ")\n");
+@@ -687,6 +774,7 @@ static void svc_export_init(struct cache
+ 	new->ex_fslocs.locations = NULL;
+ 	new->ex_fslocs.locations_count = 0;
+ 	new->ex_fslocs.migrated = 0;
++	new->ex_pnfs = 0;
+ }
+ 
+ static void export_update(struct cache_head *cnew, struct cache_head *citem)
+@@ -699,6 +787,7 @@ static void export_update(struct cache_h
+ 	new->ex_anon_uid = item->ex_anon_uid;
+ 	new->ex_anon_gid = item->ex_anon_gid;
+ 	new->ex_fsid = item->ex_fsid;
++	new->ex_pnfs = item->ex_pnfs;
+ 	new->ex_uuid = item->ex_uuid;
+ 	item->ex_uuid = NULL;
+ 	new->ex_pathname = item->ex_pathname;
+@@ -1635,8 +1724,17 @@ nfsd_export_init(void)
+ 	if (rv)
+ 		return rv;
+ 	rv = cache_register(&svc_expkey_cache);
+-	if (rv)
++	if (rv) {
+ 		cache_unregister(&svc_export_cache);
++		goto out;
++	}
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = THIS_MODULE;
++	pnfsd_cb_ctl.cb_op = &pnfsd_cb_op;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
++out:
+ 	return rv;
+ 
+ }
+@@ -1664,6 +1762,12 @@ nfsd_export_shutdown(void)
+ 
+ 	exp_writelock();
+ 
++#if defined(CONFIG_PNFSD)
++	spin_lock(&pnfsd_cb_ctl.lock);
++	pnfsd_cb_ctl.module = NULL;
++	pnfsd_cb_ctl.cb_op = NULL;
++	spin_unlock(&pnfsd_cb_ctl.lock);
++#endif /* CONFIG_PNFSD */
+ 	cache_unregister(&svc_expkey_cache);
+ 	cache_unregister(&svc_export_cache);
+ 	svcauth_unix_purge();
+diff -up linux-2.6.34.noarch/fs/nfs/direct.c.orig linux-2.6.34.noarch/fs/nfs/direct.c
+--- linux-2.6.34.noarch/fs/nfs/direct.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/direct.c	2010-09-30 10:17:08.620991000 -0400
+@@ -267,6 +267,38 @@ static const struct rpc_call_ops nfs_rea
+ 	.rpc_release = nfs_direct_read_release,
+ };
+ 
++static long nfs_direct_read_execute(struct nfs_read_data *data,
++				    struct rpc_task_setup *task_setup_data,
++				    struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	nfs_fattr_init(&data->fattr);
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	NFS_PROTO(inode)->read_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct read call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
+  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
+@@ -283,7 +315,6 @@ static ssize_t nfs_direct_read_schedule_
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+ 	size_t rsize = NFS_SERVER(inode)->rsize;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -343,26 +374,9 @@ static ssize_t nfs_direct_read_schedule_
+ 		data->res.fattr = &data->fattr;
+ 		data->res.eof = 0;
+ 		data->res.count = bytes;
+-		nfs_fattr_init(&data->fattr);
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct read call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_read_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+@@ -448,12 +462,15 @@ static void nfs_direct_free_writedata(st
+ }
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg);
++
+ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+ {
+ 	struct inode *inode = dreq->inode;
+ 	struct list_head *p;
+ 	struct nfs_write_data *data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = dreq->ctx->cred,
+ 	};
+@@ -487,25 +504,7 @@ static void nfs_direct_write_reschedule(
+ 		 * Reuse data->task; data->args should not have changed
+ 		 * since the original request was sent.
+ 		 */
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		/*
+-		 * We're called via an RPC callback, so BKL is already held.
+-		 */
+-		task = rpc_run_task(&task_setup_data);
+-		if (!IS_ERR(task))
+-			rpc_put_task(task);
+-
+-		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				data->args.count,
+-				(unsigned long long)data->args.offset);
++		nfs_direct_write_execute(data, &task_setup_data, &msg);
+ 	}
+ 
+ 	if (put_dreq(dreq))
+@@ -548,10 +547,31 @@ static const struct rpc_call_ops nfs_com
+ 	.rpc_release = nfs_direct_commit_release,
+ };
+ 
++static long nfs_direct_commit_execute(struct nfs_direct_req *dreq,
++				      struct nfs_write_data *data,
++				      struct rpc_task_setup *task_setup_data,
++				      struct rpc_message *msg)
++{
++	struct rpc_task *task;
++
++	NFS_PROTO(data->inode)->commit_setup(data, msg);
++
++	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
++	dreq->commit_data = NULL;
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++	return 0;
++}
++
+ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+ {
+ 	struct nfs_write_data *data = dreq->commit_data;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+@@ -579,16 +599,7 @@ static void nfs_direct_commit_schedule(s
+ 	data->res.verf = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	NFS_PROTO(data->inode)->commit_setup(data, &msg);
+-
+-	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+-	dreq->commit_data = NULL;
+-
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (!IS_ERR(task))
+-		rpc_put_task(task);
++	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
+ }
+ 
+ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+@@ -690,6 +701,36 @@ static const struct rpc_call_ops nfs_wri
+ 	.rpc_release = nfs_direct_write_release,
+ };
+ 
++static long nfs_direct_write_execute(struct nfs_write_data *data,
++				     struct rpc_task_setup *task_setup_data,
++				     struct rpc_message *msg)
++{
++	struct inode *inode = data->inode;
++	struct rpc_task *task;
++
++	task_setup_data->task = &data->task;
++	task_setup_data->callback_data = data;
++	msg->rpc_argp = &data->args;
++	msg->rpc_resp = &data->res;
++	NFS_PROTO(inode)->write_setup(data, msg);
++
++	task = rpc_run_task(task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
++	rpc_put_task(task);
++
++	dprintk("NFS: %5u initiated direct write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	return 0;
++}
++
+ /*
+  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+@@ -705,7 +746,6 @@ static ssize_t nfs_direct_write_schedule
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 	unsigned long user_addr = (unsigned long)iov->iov_base;
+ 	size_t count = iov->iov_len;
+-	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_cred = ctx->cred,
+ 	};
+@@ -771,24 +811,8 @@ static ssize_t nfs_direct_write_schedule
+ 		data->res.verf = &data->verf;
+ 		nfs_fattr_init(&data->fattr);
+ 
+-		task_setup_data.task = &data->task;
+-		task_setup_data.callback_data = data;
+-		msg.rpc_argp = &data->args;
+-		msg.rpc_resp = &data->res;
+-		NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-		task = rpc_run_task(&task_setup_data);
+-		if (IS_ERR(task))
+-			break;
+-		rpc_put_task(task);
+-
+-		dprintk("NFS: %5u initiated direct write call "
+-			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
+-				data->task.tk_pid,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				bytes,
+-				(unsigned long long)data->args.offset);
++		if (nfs_direct_write_execute(data, &task_setup_data, &msg))
++			break;
+ 
+ 		started += bytes;
+ 		user_addr += bytes;
+diff -up linux-2.6.34.noarch/fs/nfsd/Kconfig.orig linux-2.6.34.noarch/fs/nfsd/Kconfig
+--- linux-2.6.34.noarch/fs/nfsd/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Kconfig	2010-09-30 10:17:08.815000000 -0400
+@@ -79,3 +79,52 @@ config NFSD_V4
+ 	  available from http://linux-nfs.org/.
+ 
+ 	  If unsure, say N.
++
++config PNFSD
++	bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
++	depends on NFSD_V4 && EXPERIMENTAL
++	select EXPORTFS_FILE_LAYOUT
++	help
++	  This option enables support for the parallel NFS features of the
++	  minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
++	  in the kernel's NFS server.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFSD_LOCAL_EXPORT
++	bool "Enable pNFS support for exporting local filesystems for debugging purposes"
++	depends on PNFSD
++	help
++	  Say Y here if you want your pNFS server to export local file systems
++	  over the files layout type.  With this option the MDS (metadata
++	  server) functions also as a single DS (data server).  This is mostly
++	  useful for development and debugging purposes.
++
++	  If unsure, say N.
++
++config SPNFS
++	bool "Provide spNFS server support (EXPERIMENTAL)"
++	depends on PNFSD
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS server support.
++
++	  If unsure, say N.
++
++config SPNFS_LAYOUTSEGMENTS
++	bool "Allow spNFS to return partial file layouts (EXPERIMENTAL)"
++	depends on SPNFS
++	select RPCSEC_GSS_KRB5
++	help
++	  Say Y here if you want spNFS to be able to return layout segments.
++
++	  If unsure, say N.
++
++config SPNFS_BLOCK
++	bool "Provide Block Layout server support (EXPERIMENTAL)"
++	depends on SPNFS
++	select EXPORTFS_BLOCK_LAYOUT
++	help
++	  Say Y here if you want spNFS block layout support
++
++	  If unsure, say N.
+diff -up linux-2.6.34.noarch/fs/nfsd/Makefile.orig linux-2.6.34.noarch/fs/nfsd/Makefile
+--- linux-2.6.34.noarch/fs/nfsd/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/Makefile	2010-09-30 10:17:08.820000000 -0400
+@@ -11,3 +11,7 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ 			   nfs4acl.o nfs4callback.o nfs4recover.o
++nfsd-$(CONFIG_PNFSD)	+= nfs4pnfsd.o nfs4pnfsdlm.o nfs4pnfsds.o
++nfsd-$(CONFIG_PNFSD_LOCAL_EXPORT) += pnfsd_lexp.o
++nfsd-$(CONFIG_SPNFS)	+= spnfs_com.o spnfs_ops.o
++nfsd-$(CONFIG_SPNFS_BLOCK) += bl_com.o bl_ops.o
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4callback.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4callback.c.orig	2010-09-30 10:15:18.320728000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4callback.c	2010-09-30 10:17:08.841998000 -0400
+@@ -40,7 +40,6 @@
+ 
+ #define NFSPROC4_CB_NULL 0
+ #define NFSPROC4_CB_COMPOUND 1
+-#define NFS4_STATEID_SIZE 16
+ 
+ /* Index of predefined Linux callback client operations */
+ 
+@@ -48,11 +47,17 @@ enum {
+ 	NFSPROC4_CLNT_CB_NULL = 0,
+ 	NFSPROC4_CLNT_CB_RECALL,
+ 	NFSPROC4_CLNT_CB_SEQUENCE,
++#if defined(CONFIG_PNFSD)
++	NFSPROC4_CLNT_CB_LAYOUT,
++	NFSPROC4_CLNT_CB_DEVICE,
++#endif
+ };
+ 
+ enum nfs_cb_opnum4 {
+ 	OP_CB_RECALL            = 4,
++	OP_CB_LAYOUT            = 5,
+ 	OP_CB_SEQUENCE          = 11,
++	OP_CB_DEVICE            = 14,
+ };
+ 
+ #define NFS4_MAXTAGLEN		20
+@@ -78,6 +83,19 @@ enum nfs_cb_opnum4 {
+ #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+ 					cb_sequence_dec_sz +            \
+ 					op_dec_sz)
++#define NFS4_enc_cb_layout_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 3 +                         \
++					enc_nfs4_fh_sz + 4)
++#define NFS4_dec_cb_layout_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
++#define NFS4_enc_cb_device_sz		(cb_compound_enc_hdr_sz +       \
++					cb_sequence_enc_sz +            \
++					1 + 6)
++#define NFS4_dec_cb_device_sz		(cb_compound_dec_hdr_sz  +      \
++					cb_sequence_dec_sz +            \
++					op_dec_sz)
+ 
+ /*
+ * Generic encode routines from fs/nfs/nfs4xdr.c
+@@ -94,6 +112,10 @@ xdr_writemem(__be32 *p, const void *ptr,
+ }
+ 
+ #define WRITE32(n)               *p++ = htonl(n)
++#define WRITE64(n)               do {				\
++	*p++ = htonl((u32)((n) >> 32));				\
++	*p++ = htonl((u32)(n));					\
++} while (0)
+ #define WRITEMEM(ptr,nbytes)     do {                           \
+ 	p = xdr_writemem(p, ptr, nbytes);                       \
+ } while (0)
+@@ -204,6 +226,16 @@ nfs_cb_stat_to_errno(int stat)
+  */
+ 
+ static void
++encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
++{
++	__be32 *p;
++
++	RESERVE_SPACE(sizeof(stateid_t));
++	WRITE32(sid->si_generation);
++	WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
++}
++
++static void
+ encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+ {
+ 	__be32 * p;
+@@ -228,10 +260,10 @@ encode_cb_recall(struct xdr_stream *xdr,
+ 	__be32 *p;
+ 	int len = dp->dl_fh.fh_size;
+ 
+-	RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len);
++	RESERVE_SPACE(4);
+ 	WRITE32(OP_CB_RECALL);
+-	WRITE32(dp->dl_stateid.si_generation);
+-	WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t));
++	encode_stateid(xdr, &dp->dl_stateid);
++	RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2));
+ 	WRITE32(0); /* truncate optimization not implemented */
+ 	WRITE32(len);
+ 	WRITEMEM(&dp->dl_fh.fh_base, len);
+@@ -259,6 +291,111 @@ encode_cb_sequence(struct xdr_stream *xd
+ 	hdr->nops++;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++#include "pnfsd.h"
++
++static void
++encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(20);
++	WRITE32(OP_CB_LAYOUT);
++	WRITE32(clr->cb.cbl_seg.layout_type);
++	WRITE32(clr->cb.cbl_seg.iomode);
++	WRITE32(clr->cb.cbl_layoutchanged);
++	WRITE32(clr->cb.cbl_recall_type);
++	if (unlikely(clr->cb.cbl_recall_type == RETURN_FSID)) {
++		struct nfs4_fsid fsid = clr->cb.cbl_fsid;
++
++		RESERVE_SPACE(16);
++		WRITE64(fsid.major);
++		WRITE64(fsid.minor);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"fsid 0x%llx-0x%llx\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type, fsid.major, fsid.minor);
++	} else if (clr->cb.cbl_recall_type == RETURN_FILE) {
++		int len = clr->clr_file->fi_fhlen;
++		stateid_t *cbl_sid = (stateid_t *)&clr->cb.cbl_sid;
++
++		RESERVE_SPACE(20 + len);
++		WRITE32(len);
++		WRITEMEM(clr->clr_file->fi_fhval, len);
++		WRITE64(clr->cb.cbl_seg.offset);
++		WRITE64(clr->cb.cbl_seg.length);
++		encode_stateid(xdr, cbl_sid);
++		dprintk("%s: type %x iomode %d changed %d recall_type %d "
++			"offset %lld length %lld stateid " STATEID_FMT "\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type,
++			clr->cb.cbl_seg.offset, clr->cb.cbl_seg.length,
++			STATEID_VAL(cbl_sid));
++	} else {
++		dprintk("%s: type %x iomode %d changed %d recall_type %d\n",
++			__func__, clr->cb.cbl_seg.layout_type,
++			clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
++			clr->cb.cbl_recall_type);
++	}
++	hdr->nops++;
++}
++
++static void
++encode_cb_device(struct xdr_stream *xdr, struct nfs4_notify_device *nd,
++		 struct nfs4_cb_compound_hdr *hdr)
++{
++	u32 *p;
++	int i;
++	int len					= nd->nd_list->cbd_len;
++	struct nfsd4_pnfs_cb_dev_item *cbd	= nd->nd_list->cbd_list;
++
++	dprintk("NFSD %s: --> num %d\n", __func__, len);
++
++	BUG_ON(hdr->minorversion == 0);
++
++	RESERVE_SPACE(8);
++	WRITE32(OP_CB_DEVICE);
++
++	/* notify4 cnda_changes<>; */
++	WRITE32(len);
++	for (i = 0; i < len; i++) {
++		dprintk("%s: nt %d lt %d devid x%llx-x%llx im %d i %d\n",
++			__func__, cbd[i].cbd_notify_type,
++			cbd[i].cbd_layout_type,
++			cbd[i].cbd_devid.sbid,
++			cbd[i].cbd_devid.devid,
++			cbd[i].cbd_immediate, i);
++
++		BUG_ON(cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
++		       cbd[i].cbd_notify_type != NOTIFY_DEVICEID4_DELETE);
++		RESERVE_SPACE(32);
++		/* bitmap4         notify_mask; */
++		WRITE32(1);
++		WRITE32(cbd[i].cbd_notify_type);
++		/* opaque     notify_vals<>; */
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
++			WRITE32(24);
++		else
++			WRITE32(20);
++		WRITE32(cbd[i].cbd_layout_type);
++		WRITE64(cbd[i].cbd_devid.sbid);
++		WRITE64(cbd[i].cbd_devid.devid);
++
++		if (cbd[i].cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) {
++			RESERVE_SPACE(4);
++			WRITE32(cbd[i].cbd_immediate);
++		}
++	}
++	hdr->nops++;
++}
++#endif /* CONFIG_PNFSD */
++
+ static int
+ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
+ {
+@@ -288,6 +425,45 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_enc_cb_layout(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_layoutrecall *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_layout(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++
++static int
++nfs4_xdr_enc_cb_device(struct rpc_rqst *req, u32 *p,
++		       struct nfs4_rpc_args *rpc_args)
++{
++	struct xdr_stream xdr;
++	struct nfs4_notify_device *args = rpc_args->args_op;
++	struct nfs4_cb_compound_hdr hdr = {
++		.ident = 0,
++		.minorversion = rpc_args->args_seq.cbs_minorversion,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_cb_compound_hdr(&xdr, &hdr);
++	encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr);
++	encode_cb_device(&xdr, args, &hdr);
++	encode_cb_nops(&hdr);
++	return 0;
++}
++#endif /* CONFIG_PNFSD */
+ 
+ static int
+ decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+@@ -403,6 +579,48 @@ out:
+ 	return status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static int
++nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_LAYOUT);
++out:
++	return status;
++}
++
++static int
++nfs4_xdr_dec_cb_device(struct rpc_rqst *rqstp, u32 *p,
++		       struct nfsd4_cb_sequence *seq)
++{
++	struct xdr_stream xdr;
++	struct nfs4_cb_compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_cb_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_cb_sequence(&xdr, seq, rqstp);
++	if (status)
++		goto out;
++	status = decode_cb_op_hdr(&xdr, OP_CB_DEVICE);
++out:
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * RPC procedure tables
+  */
+@@ -420,6 +638,10 @@ out:
+ static struct rpc_procinfo     nfs4_cb_procedures[] = {
+     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
+     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
++#if defined(CONFIG_PNFSD)
++    PROC(CB_LAYOUT,    COMPOUND,   enc_cb_layout,      dec_cb_layout),
++    PROC(CB_DEVICE,    COMPOUND,   enc_cb_device,      dec_cb_device),
++#endif
+ };
+ 
+ static struct rpc_version       nfs_cb_version4 = {
+@@ -606,10 +828,9 @@ out:
+  * TODO: cb_sequence should support referring call lists, cachethis, multiple
+  * slots, and mark callback channel down on communication errors.
+  */
+-static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_prepare_sequence(struct rpc_task *task,
++				      struct nfs4_client *clp)
+ {
+-	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
+ 	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+ 	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+ 	int status = 0;
+@@ -629,11 +850,15 @@ static void nfsd4_cb_prepare(struct rpc_
+ 	rpc_call_start(task);
+ }
+ 
+-static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
++static void nfsd4_cb_recall_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs4_delegation *dp = calldata;
+-	struct nfs4_client *clp = dp->dl_client;
++	nfsd4_cb_prepare_sequence(task, dp->dl_client);
++}
+ 
++static void nfsd4_cb_done_sequence(struct rpc_task *task,
++				   struct nfs4_client *clp)
++{
+ 	dprintk("%s: minorversion=%d\n", __func__,
+ 		clp->cl_cb_conn.cb_minorversion);
+ 
+@@ -657,7 +882,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	struct nfs4_client *clp = dp->dl_client;
+ 	struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
+ 
+-	nfsd4_cb_done(task, calldata);
++	nfsd4_cb_done_sequence(task, clp);
+ 
+ 	if (current_rpc_client == NULL) {
+ 		/* We're shutting down; give up. */
+@@ -688,7 +913,7 @@ static void nfsd4_cb_recall_done(struct 
+ 	if (dp->dl_retries--) {
+ 		rpc_delay(task, 2*HZ);
+ 		task->tk_status = 0;
+-		rpc_restart_call(task);
++		rpc_restart_call_prepare(task);
+ 		return;
+ 	} else {
+ 		atomic_set(&clp->cl_cb_set, 0);
+@@ -704,7 +929,7 @@ static void nfsd4_cb_recall_release(void
+ }
+ 
+ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+-	.rpc_call_prepare = nfsd4_cb_prepare,
++	.rpc_call_prepare = nfsd4_cb_recall_prepare,
+ 	.rpc_call_done = nfsd4_cb_recall_done,
+ 	.rpc_release = nfsd4_cb_recall_release,
+ };
+@@ -781,3 +1006,173 @@ void nfsd4_cb_recall(struct nfs4_delegat
+ {
+ 	queue_work(callback_wq, &dp->dl_recall.cb_work);
+ }
++
++#if defined(CONFIG_PNFSD)
++static void nfsd4_cb_layout_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	nfsd4_cb_prepare_sequence(task, clr->clr_client);
++}
++
++static void nfsd4_cb_layout_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	struct nfs4_client *clp = clr->clr_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	if (!task->tk_status)
++		return;
++
++	printk("%s: clp %p cb_client %p fp %p failed with status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       clr->clr_file,
++	       task->tk_status);
++
++	switch (task->tk_status) {
++	case -EIO:
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++		/* FIXME:
++		 * The pnfs standard states that we need to only expire
++		 * the client after at-least "lease time" .eg lease-time * 2
++		 * when failing to communicate a recall
++		 */
++		break;
++	case -NFS4ERR_DELAY:
++		/* Pole the client until it's done with the layout */
++		rpc_delay(task, HZ/100); /* 10 mili-seconds */
++		task->tk_status = 0;
++		rpc_restart_call_prepare(task);
++		break;
++	case -NFS4ERR_NOMATCHING_LAYOUT:
++		task->tk_status = 0;
++		nomatching_layout(clr);
++	}
++}
++
++static void nfsd4_cb_layout_release(void *calldata)
++{
++	struct nfs4_layoutrecall *clr = calldata;
++	kfree(clr->clr_args);
++	clr->clr_args = NULL;
++	put_layoutrecall(clr);
++}
++
++static const struct rpc_call_ops nfsd4_cb_layout_ops = {
++	.rpc_call_prepare = nfsd4_cb_layout_prepare,
++	.rpc_call_done = nfsd4_cb_layout_done,
++	.rpc_release = nfsd4_cb_layout_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfs4_client *clp = clr->clr_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_LAYOUT],
++		.rpc_cred = callback_cred
++	};
++	int status;
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	clr->clr_args = args;
++	args->args_op = clr;
++	msg.rpc_argp = args;
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_layout_ops, clr);
++out:
++	if (status) {
++		kfree(args);
++		put_layoutrecall(clr);
++	}
++	dprintk("NFSD: nfsd4_cb_layout: status %d\n", status);
++	return status;
++}
++
++static void nfsd4_cb_device_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	nfsd4_cb_prepare_sequence(task, cbnd->nd_client);
++}
++
++static void nfsd4_cb_device_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	struct nfs4_client *clp = cbnd->nd_client;
++
++	nfsd4_cb_done_sequence(task, clp);
++
++	dprintk("%s: clp %p cb_client %p: status %d\n",
++	       __func__,
++	       clp,
++	       clp->cl_cb_client,
++	       task->tk_status);
++
++	if (task->tk_status == -EIO) {
++		/* Network partition? */
++		atomic_set(&clp->cl_cb_set, 0);
++		warn_no_callback_path(clp, task->tk_status);
++	}
++}
++
++static void nfsd4_cb_device_release(void *calldata)
++{
++	struct nfs4_notify_device *cbnd = calldata;
++	kfree(cbnd->nd_args);
++	cbnd->nd_args = NULL;
++	kfree(cbnd);
++}
++
++static const struct rpc_call_ops nfsd4_cb_device_ops = {
++	.rpc_call_prepare = nfsd4_cb_device_prepare,
++	.rpc_call_done = nfsd4_cb_device_done,
++	.rpc_release = nfsd4_cb_device_release,
++};
++
++/*
++ * Called with state lock.
++ */
++int
++nfsd4_cb_notify_device(struct nfs4_notify_device *cbnd)
++{
++	struct nfs4_client *clp = cbnd->nd_client;
++	struct rpc_clnt *clnt = clp->cl_cb_client;
++	struct nfs4_rpc_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_DEVICE],
++		.rpc_cred = callback_cred
++	};
++	int status = -EIO;
++
++	dprintk("%s: clp %p\n", __func__, clp);
++
++	args = kzalloc(sizeof(*args), GFP_KERNEL);
++	if (!args) {
++		status = -ENOMEM;
++		goto out;
++	}
++	args->args_op = cbnd;
++	msg.rpc_argp = args;
++
++	status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
++				&nfsd4_cb_device_ops, cbnd);
++out:
++	if (status)
++		kfree(args);
++	dprintk("%s: status %d\n", __func__, status);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c.orig	2010-09-30 10:17:08.845997000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsd.c	2010-09-30 10:17:08.863998000 -0400
+@@ -0,0 +1,1679 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *****************************************************************************/
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Globals */
++static u32 current_layoutid = 1;
++
++/*
++ * Currently used for manipulating the layout state.
++ */
++static DEFINE_SPINLOCK(layout_lock);
++
++#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_SMP)
++#  define BUG_ON_UNLOCKED_LAYOUT() BUG_ON(!spin_is_locked(&layout_lock))
++#else
++#  define BUG_ON_UNLOCKED_LAYOUT()
++#endif
++
++/*
++ * Layout state - NFSv4.1 pNFS
++ */
++static struct kmem_cache *pnfs_layout_slab;
++static struct kmem_cache *pnfs_layoutrecall_slab;
++
++/* hash table for nfsd4_pnfs_deviceid.sbid */
++#define SBID_HASH_BITS	8
++#define SBID_HASH_SIZE	(1 << SBID_HASH_BITS)
++#define SBID_HASH_MASK	(SBID_HASH_SIZE - 1)
++
++struct sbid_tracker {
++	u64 id;
++	struct super_block *sb;
++	struct list_head hash;
++};
++
++static u64 current_sbid;
++static struct list_head sbid_hashtbl[SBID_HASH_SIZE];
++
++static inline unsigned long
++sbid_hashval(struct super_block *sb)
++{
++	return hash_ptr(sb, SBID_HASH_BITS);
++}
++
++static inline struct sbid_tracker *
++alloc_sbid(void)
++{
++	return kmalloc(sizeof(struct sbid_tracker), GFP_KERNEL);
++}
++
++static void
++destroy_sbid(struct sbid_tracker *sbid)
++{
++	spin_lock(&layout_lock);
++	list_del(&sbid->hash);
++	spin_unlock(&layout_lock);
++	kfree(sbid);
++}
++
++void
++nfsd4_free_pnfs_slabs(void)
++{
++	int i;
++	struct sbid_tracker *sbid;
++
++	nfsd4_free_slab(&pnfs_layout_slab);
++	nfsd4_free_slab(&pnfs_layoutrecall_slab);
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		while (!list_empty(&sbid_hashtbl[i])) {
++			sbid = list_first_entry(&sbid_hashtbl[i],
++						struct sbid_tracker,
++						hash);
++			destroy_sbid(sbid);
++		}
++	}
++}
++
++int
++nfsd4_init_pnfs_slabs(void)
++{
++	int i;
++
++	pnfs_layout_slab = kmem_cache_create("pnfs_layouts",
++			sizeof(struct nfs4_layout), 0, 0, NULL);
++	if (pnfs_layout_slab == NULL)
++		return -ENOMEM;
++	pnfs_layoutrecall_slab = kmem_cache_create("pnfs_layoutrecalls",
++			sizeof(struct nfs4_layoutrecall), 0, 0, NULL);
++	if (pnfs_layoutrecall_slab == NULL)
++		return -ENOMEM;
++
++	for (i = 0; i < SBID_HASH_SIZE; i++) {
++		INIT_LIST_HEAD(&sbid_hashtbl[i]);
++	}
++
++	return 0;
++}
++
++/* XXX: Need to implement the notify types and track which
++ * clients have which devices. */
++void pnfs_set_device_notify(clientid_t *clid, unsigned int types)
++{
++	struct nfs4_client *clp;
++	dprintk("%s: -->\n", __func__);
++
++	nfs4_lock_state();
++	/* Indicate that client has a device so we can only notify
++	 * the correct clients */
++	clp = find_confirmed_client(clid);
++	if (clp) {
++		atomic_inc(&clp->cl_deviceref);
++		dprintk("%s: Incr device count (clnt %p) to %d\n",
++			__func__, clp, atomic_read(&clp->cl_deviceref));
++	}
++	nfs4_unlock_state();
++}
++
++/* Clear notifications for this client
++ * XXX: Do we need to loop through a clean up all
++ *      krefs when nfsd cleans up the client? */
++void pnfs_clear_device_notify(struct nfs4_client *clp)
++{
++	atomic_dec(&clp->cl_deviceref);
++	dprintk("%s: Decr device count (clnt %p) to %d\n",
++		__func__, clp, atomic_read(&clp->cl_deviceref));
++}
++
++static struct nfs4_layout_state *
++alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
++			stateid_t *stateid)
++{
++	struct nfs4_layout_state *new;
++
++	/* FIXME: use a kmem_cache */
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return new;
++	get_nfs4_file(fp);
++	INIT_LIST_HEAD(&new->ls_perfile);
++	INIT_LIST_HEAD(&new->ls_layouts);
++	kref_init(&new->ls_ref);
++	new->ls_client = clp;
++	new->ls_file = fp;
++	new->ls_stateid.si_boot = stateid->si_boot;
++	new->ls_stateid.si_stateownerid = 0; /* identifies layout stateid */
++	new->ls_stateid.si_generation = 1;
++	spin_lock(&layout_lock);
++	new->ls_stateid.si_fileid = current_layoutid++;
++	list_add(&new->ls_perfile, &fp->fi_layout_states);
++	spin_unlock(&layout_lock);
++	return new;
++}
++
++static inline void
++get_layout_state(struct nfs4_layout_state *ls)
++{
++	kref_get(&ls->ls_ref);
++}
++
++static void
++destroy_layout_state_common(struct nfs4_layout_state *ls)
++{
++	struct nfs4_file *fp = ls->ls_file;
++
++	dprintk("pNFS %s: ls %p fp %p clp %p\n", __func__, ls, fp,
++		ls->ls_client);
++	BUG_ON(!list_empty(&ls->ls_layouts));
++	kfree(ls);
++	put_nfs4_file(fp);
++}
++
++static void
++destroy_layout_state(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	spin_lock(&layout_lock);
++	list_del(&ls->ls_perfile);
++	spin_unlock(&layout_lock);
++	destroy_layout_state_common(ls);
++}
++
++static void
++destroy_layout_state_locked(struct kref *kref)
++{
++	struct nfs4_layout_state *ls =
++			container_of(kref, struct nfs4_layout_state, ls_ref);
++
++	list_del(&ls->ls_perfile);
++	destroy_layout_state_common(ls);
++}
++
++static inline void
++put_layout_state(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state);
++}
++
++static inline void
++put_layout_state_locked(struct nfs4_layout_state *ls)
++{
++	dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
++		atomic_read(&ls->ls_ref.refcount));
++	kref_put(&ls->ls_ref, destroy_layout_state_locked);
++}
++
++/*
++ * Search the fp->fi_layout_state list for a layout state with the clientid.
++ * If not found, then this is a 'first open/delegation/lock stateid' from
++ * the client for this file.
++ * Called under the layout_lock.
++ */
++static struct nfs4_layout_state *
++find_get_layout_state(struct nfs4_client *clp, struct nfs4_file *fp)
++{
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_for_each_entry(ls, &fp->fi_layout_states, ls_perfile) {
++		if (ls->ls_client == clp) {
++			dprintk("pNFS %s: before GET ls %p ls_ref %d\n",
++				__func__, ls,
++				atomic_read(&ls->ls_ref.refcount));
++			get_layout_state(ls);
++			return ls;
++		}
++	}
++	return NULL;
++}
++
++static __be32
++verify_stateid(struct nfs4_file *fp, stateid_t *stateid)
++{
++	struct nfs4_stateid *local = NULL;
++	struct nfs4_delegation *temp = NULL;
++
++	/* check if open or lock stateid */
++	local = find_stateid(stateid, RD_STATE);
++	if (local)
++		return 0;
++	temp = find_delegation_stateid(fp->fi_inode, stateid);
++	if (temp)
++		return 0;
++	return nfserr_bad_stateid;
++}
++
++/*
++ * nfs4_preocess_layout_stateid ()
++ *
++ * We have looked up the nfs4_file corresponding to the current_fh, and
++ * confirmed the clientid. Pull the few tests from nfs4_preprocess_stateid_op()
++ * that make sense with a layout stateid.
++ *
++ * Called with the state_lock held
++ * Returns zero and stateid is updated, or error.
++ *
++ * Note: the struct nfs4_layout_state pointer is only set by layoutget.
++ */
++static __be32
++nfs4_process_layout_stateid(struct nfs4_client *clp, struct nfs4_file *fp,
++			    stateid_t *stateid, struct nfs4_layout_state **lsp)
++{
++	struct nfs4_layout_state *ls = NULL;
++	__be32 status = 0;
++
++	dprintk("--> %s clp %p fp %p \n", __func__, clp, fp);
++
++	dprintk("%s: operation stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		goto out;
++
++	/* Is this the first use of this layout ? */
++	spin_lock(&layout_lock);
++	ls = find_get_layout_state(clp, fp);
++	spin_unlock(&layout_lock);
++	if (!ls) {
++		/* Only alloc layout state on layoutget (which sets lsp). */
++		if (!lsp) {
++			dprintk("%s ERROR: Not layoutget & no layout stateid\n",
++				__func__);
++			status = nfserr_bad_stateid;
++			goto out;
++		}
++		dprintk("%s Initial stateid for layout: file %p client %p\n",
++			__func__, fp, clp);
++
++		/* verify input stateid */
++		status = verify_stateid(fp, stateid);
++		if (status) {
++			dprintk("%s ERROR: invalid open/deleg/lock stateid\n",
++				__func__);
++			goto out;
++		}
++		ls = alloc_init_layout_state(clp, fp, stateid);
++		if (!ls) {
++			dprintk("%s pNFS ERROR: no memory for layout state\n",
++				__func__);
++			status = nfserr_resource;
++			goto out;
++		}
++	} else {
++		dprintk("%s Not initial stateid. Layout state %p file %p\n",
++			__func__, ls, fp);
++
++		/* BAD STATEID */
++		status = nfserr_bad_stateid;
++		if (memcmp(&ls->ls_stateid.si_opaque, &stateid->si_opaque,
++			sizeof(stateid_opaque_t)) != 0) {
++
++			/* if a LAYOUTGET operation and stateid is a valid
++			 * open/deleg/lock stateid, accept it as a parallel
++			 * initial layout stateid
++			 */
++			if (lsp && ((verify_stateid(fp, stateid)) == 0)) {
++				dprintk("%s parallel initial layout state\n",
++					__func__);
++				goto update;
++			}
++
++			dprintk("%s ERROR bad opaque in stateid 1\n", __func__);
++			goto out_put;
++		}
++
++		/* stateid is a valid layout stateid for this file. */
++		if (stateid->si_generation > ls->ls_stateid.si_generation) {
++			dprintk("%s bad stateid 1\n", __func__);
++			goto out_put;
++		}
++update:
++		update_stateid(&ls->ls_stateid);
++		dprintk("%s Updated ls_stateid to %d on layoutstate %p\n",
++			__func__, ls->ls_stateid.si_generation, ls);
++	}
++	status = 0;
++	/* Set the stateid to be encoded */
++	memcpy(stateid, &ls->ls_stateid, sizeof(stateid_t));
++
++	/* Return the layout state if requested */
++	if (lsp) {
++		get_layout_state(ls);
++		*lsp = ls;
++	}
++	dprintk("%s: layout stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(&ls->ls_stateid));
++out_put:
++	dprintk("%s PUT LO STATE:\n", __func__);
++	put_layout_state(ls);
++out:
++	dprintk("<-- %s status %d\n", __func__, htonl(status));
++
++	return status;
++}
++
++static inline struct nfs4_layout *
++alloc_layout(void)
++{
++	return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
++}
++
++static inline void
++free_layout(struct nfs4_layout *lp)
++{
++	kmem_cache_free(pnfs_layout_slab, lp);
++}
++
++static void
++init_layout(struct nfs4_layout_state *ls,
++	    struct nfs4_layout *lp,
++	    struct nfs4_file *fp,
++	    struct nfs4_client *clp,
++	    struct svc_fh *current_fh,
++	    struct nfsd4_layout_seg *seg)
++{
++	dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__,
++		ls, lp, clp, fp, fp->fi_inode);
++
++	get_nfs4_file(fp);
++	lp->lo_client = clp;
++	lp->lo_file = fp;
++	get_layout_state(ls);
++	lp->lo_state = ls;
++	memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
++	spin_lock(&layout_lock);
++	list_add_tail(&lp->lo_perstate, &ls->ls_layouts);
++	list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
++	list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
++	spin_unlock(&layout_lock);
++	dprintk("pNFS %s end\n", __func__);
++}
++
++static void
++dequeue_layout(struct nfs4_layout *lp)
++{
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del(&lp->lo_perclnt);
++	list_del(&lp->lo_perfile);
++	list_del(&lp->lo_perstate);
++}
++
++static void
++destroy_layout(struct nfs4_layout *lp)
++{
++	struct nfs4_client *clp;
++	struct nfs4_file *fp;
++	struct nfs4_layout_state *ls;
++
++	BUG_ON_UNLOCKED_LAYOUT();
++	clp = lp->lo_client;
++	fp = lp->lo_file;
++	ls = lp->lo_state;
++	dprintk("pNFS %s: lp %p clp %p fp %p ino %p ls_layouts empty %d\n",
++		__func__, lp, clp, fp, fp->fi_inode,
++		list_empty(&ls->ls_layouts));
++
++	kmem_cache_free(pnfs_layout_slab, lp);
++	/* release references taken by init_layout */
++	put_layout_state_locked(ls);
++	put_nfs4_file(fp);
++}
++
++void fs_layout_return(struct super_block *sb, struct inode *ino,
++		      struct nfsd4_pnfs_layoutreturn *lrp, int flags,
++		      void *recall_cookie)
++{
++	int ret;
++
++	if (unlikely(!sb->s_pnfs_op->layout_return))
++		return;
++
++	lrp->lr_flags = flags;
++	lrp->args.lr_cookie = recall_cookie;
++
++	if (!ino) /* FSID or ALL */
++		ino = sb->s_root->d_inode;
++
++	ret = sb->s_pnfs_op->layout_return(ino, &lrp->args);
++	dprintk("%s: inode %lu iomode=%d offset=0x%llx length=0x%llx "
++		"cookie = %p flags 0x%x status=%d\n",
++		__func__, ino->i_ino, lrp->args.lr_seg.iomode,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length,
++		recall_cookie, flags, ret);
++}
++
++static u64
++alloc_init_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	struct sbid_tracker *new = alloc_sbid();
++	unsigned long hash_idx = sbid_hashval(sb);
++	u64 id = 0;
++
++	if (likely(new)) {
++		spin_lock(&layout_lock);
++		id = ++current_sbid;
++		new->id = (id << SBID_HASH_BITS) | (hash_idx & SBID_HASH_MASK);
++		id = new->id;
++		BUG_ON(id == 0);
++		new->sb = sb;
++
++		list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash)
++			if (sbid->sb == sb) {
++				kfree(new);
++				id = sbid->id;
++				spin_unlock(&layout_lock);
++				return id;
++			}
++		list_add(&new->hash, &sbid_hashtbl[hash_idx]);
++		spin_unlock(&layout_lock);
++	}
++	return id;
++}
++
++struct super_block *
++find_sbid_id(u64 id)
++{
++	struct sbid_tracker *sbid;
++	struct super_block *sb = NULL;
++	unsigned long hash_idx = id & SBID_HASH_MASK;
++	int pos = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->id != id)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		sb = sbid->sb;
++		break;
++	}
++	spin_unlock(&layout_lock);
++	return sb;
++}
++
++u64
++find_create_sbid(struct super_block *sb)
++{
++	struct sbid_tracker *sbid;
++	unsigned long hash_idx = sbid_hashval(sb);
++	int pos = 0;
++	u64 id = 0;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (sbid, &sbid_hashtbl[hash_idx], hash) {
++		pos++;
++		if (sbid->sb != sb)
++			continue;
++		if (pos > 1)
++			list_move(&sbid->hash, &sbid_hashtbl[hash_idx]);
++		id = sbid->id;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	if (!id)
++		id = alloc_init_sbid(sb);
++
++	return id;
++}
++
++/*
++ * Create a layoutrecall structure
++ * An optional layoutrecall can be cloned (except for the layoutrecall lists)
++ */
++static struct nfs4_layoutrecall *
++alloc_init_layoutrecall(struct nfsd4_pnfs_cb_layout *cbl,
++			struct nfs4_client *clp,
++			struct nfs4_file *lrfile)
++{
++	struct nfs4_layoutrecall *clr;
++
++	dprintk("NFSD %s\n", __func__);
++	clr = kmem_cache_alloc(pnfs_layoutrecall_slab, GFP_KERNEL);
++	if (clr == NULL)
++		return clr;
++
++	dprintk("NFSD %s -->\n", __func__);
++
++	memset(clr, 0, sizeof(*clr));
++	if (lrfile)
++		get_nfs4_file(lrfile);
++	clr->clr_client = clp;
++	clr->clr_file = lrfile;
++	clr->cb = *cbl;
++
++	kref_init(&clr->clr_ref);
++	INIT_LIST_HEAD(&clr->clr_perclnt);
++
++	dprintk("NFSD %s return %p\n", __func__, clr);
++	return clr;
++}
++
++static void
++get_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	kref_get(&clr->clr_ref);
++}
++
++static void
++destroy_layoutrecall(struct kref *kref)
++{
++	struct nfs4_layoutrecall *clr =
++			container_of(kref, struct nfs4_layoutrecall, clr_ref);
++	dprintk("pNFS %s: clr %p fp %p clp %p\n", __func__, clr,
++		clr->clr_file, clr->clr_client);
++	BUG_ON(!list_empty(&clr->clr_perclnt));
++	if (clr->clr_file)
++		put_nfs4_file(clr->clr_file);
++	kmem_cache_free(pnfs_layoutrecall_slab, clr);
++}
++
++int
++put_layoutrecall(struct nfs4_layoutrecall *clr)
++{
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	return kref_put(&clr->clr_ref, destroy_layoutrecall);
++}
++
++void *
++layoutrecall_done(struct nfs4_layoutrecall *clr)
++{
++	void *recall_cookie = clr->cb.cbl_cookie;
++	struct nfs4_layoutrecall *parent = clr->parent;
++
++	dprintk("pNFS %s: clr %p clr_ref %d\n", __func__, clr,
++		atomic_read(&clr->clr_ref.refcount));
++	BUG_ON_UNLOCKED_LAYOUT();
++	list_del_init(&clr->clr_perclnt);
++	put_layoutrecall(clr);
++
++	if (parent && !put_layoutrecall(parent))
++		recall_cookie = NULL;
++
++	return recall_cookie;
++}
++
++/*
++ * get_state() and cb_get_state() are
++ */
++void
++release_pnfs_ds_dev_list(struct nfs4_stateid *stp)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	while (!list_empty(&stp->st_pnfs_ds_id)) {
++		ddp = list_entry(stp->st_pnfs_ds_id.next,
++				 struct pnfs_ds_dev_entry, dd_dev_entry);
++		list_del(&ddp->dd_dev_entry);
++		kfree(ddp);
++	}
++}
++
++static int
++nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 dsid)
++{
++	struct pnfs_ds_dev_entry *ddp;
++
++	ddp = kmalloc(sizeof(*ddp), GFP_KERNEL);
++	if (!ddp)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&ddp->dd_dev_entry);
++	list_add(&ddp->dd_dev_entry, &stp->st_pnfs_ds_id);
++	ddp->dd_dsid = dsid;
++	return 0;
++}
++
++/*
++ * are two octet ranges overlapping?
++ * start1            last1
++ *   |-----------------|
++ *                start2            last2
++ *                  |----------------|
++ */
++static inline int
++lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 last1 = last_byte_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 last2 = last_byte_offset(start2, l2->length);
++	int ret;
++
++	/* if last1 == start2 there's a single byte overlap */
++	ret = (last2 >= start1) && (last1 >= start2);
++	dprintk("%s: l1 %llu:%lld l2 %llu:%lld ret=%d\n", __func__,
++		l1->offset, l1->length, l2->offset, l2->length, ret);
++	return ret;
++}
++
++static inline int
++same_fsid_major(struct nfs4_fsid *fsid, u64 major)
++{
++	return fsid->major == major;
++}
++
++static inline int
++same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
++{
++	return same_fsid_major(fsid, current_fh->fh_export->ex_fsid);
++}
++
++/*
++ * find a layout recall conflicting with the specified layoutget
++ */
++static int
++is_layout_recalled(struct nfs4_client *clp,
++		   struct svc_fh *current_fh,
++		   struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layoutrecall *clr;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (clr, &clp->cl_layoutrecalls, clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != seg->layout_type)
++			continue;
++		if (clr->cb.cbl_recall_type == RETURN_ALL)
++			goto found;
++		if (clr->cb.cbl_recall_type == RETURN_FSID) {
++			if (same_fsid(&clr->cb.cbl_fsid, current_fh))
++				goto found;
++			else
++				continue;
++		}
++		BUG_ON(clr->cb.cbl_recall_type != RETURN_FILE);
++		if (clr->cb.cbl_seg.clientid == seg->clientid &&
++		    lo_seg_overlapping(&clr->cb.cbl_seg, seg))
++			goto found;
++	}
++	spin_unlock(&layout_lock);
++	return 0;
++found:
++	spin_unlock(&layout_lock);
++	return 1;
++}
++
++/*
++ * are two octet ranges overlapping or adjacent?
++ */
++static inline int
++lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	/* is end1 == start2 ranges are adjacent */
++	return (end2 >= start1) && (end1 >= start2);
++}
++
++static void
++extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lg_start = lg->offset;
++	u64 lg_end = end_offset(lg_start, lg->length);
++
++	/* lo already covers lg? */
++	if (lo_start <= lg_start && lg_end <= lo_end)
++		return;
++
++	/* extend start offset */
++	if (lo_start > lg_start)
++		lo_start = lg_start;
++
++	/* extend end offset */
++	if (lo_end < lg_end)
++		lo_end = lg_end;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ?
++		      lo_end : lo_end - lo_start;
++}
++
++static struct nfs4_layout *
++merge_layout(struct nfs4_file *fp,
++	     struct nfs4_client *clp,
++	     struct nfsd4_layout_seg *seg)
++{
++	struct nfs4_layout *lp = NULL;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
++		if (lp->lo_seg.layout_type == seg->layout_type &&
++		    lp->lo_seg.clientid == seg->clientid &&
++		    lp->lo_seg.iomode == seg->iomode &&
++		    lo_seg_mergeable(&lp->lo_seg, seg)) {
++			extend_layout(&lp->lo_seg, seg);
++			break;
++		}
++	spin_unlock(&layout_lock);
++
++	return lp;
++}
++
++__be32
++nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
++		     struct exp_xdr_stream *xdr)
++{
++	u32 status;
++	__be32 nfserr;
++	struct inode *ino = lgp->lg_fhp->fh_dentry->d_inode;
++	struct super_block *sb = ino->i_sb;
++	int can_merge;
++	struct nfs4_file *fp;
++	struct nfs4_client *clp;
++	struct nfs4_layout *lp = NULL;
++	struct nfs4_layout_state *ls = NULL;
++	struct nfsd4_pnfs_layoutget_arg args = {
++		.lg_minlength = lgp->lg_minlength,
++		.lg_fh = &lgp->lg_fhp->fh_handle,
++	};
++	struct nfsd4_pnfs_layoutget_res res = {
++		.lg_seg = lgp->lg_seg,
++	};
++
++	dprintk("NFSD: %s Begin\n", __func__);
++
++	args.lg_sbid = find_create_sbid(sb);
++	if (!args.lg_sbid) {
++		nfserr = nfserr_layouttrylater;
++		goto out;
++	}
++
++	can_merge = sb->s_pnfs_op->can_merge_layouts != NULL &&
++		    sb->s_pnfs_op->can_merge_layouts(lgp->lg_seg.layout_type);
++
++	nfs4_lock_state();
++	fp = find_alloc_file(ino, lgp->lg_fhp);
++	clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
++	dprintk("pNFS %s: fp %p clp %p \n", __func__, fp, clp);
++	if (!fp || !clp) {
++		nfserr = nfserr_inval;
++		goto out_unlock;
++	}
++
++	/* Check decoded layout stateid */
++	nfserr = nfs4_process_layout_stateid(clp, fp, &lgp->lg_sid, &ls);
++	if (nfserr)
++		goto out_unlock;
++
++	if (is_layout_recalled(clp, lgp->lg_fhp, &lgp->lg_seg)) {
++		nfserr = nfserr_recallconflict;
++		goto out;
++	}
++
++	/* pre-alloc layout in case we can't merge after we call
++	 * the file system
++	 */
++	lp = alloc_layout();
++	if (!lp) {
++		nfserr = nfserr_layouttrylater;
++		goto out_unlock;
++	}
++
++	dprintk("pNFS %s: pre-export type 0x%x maxcount %Zd "
++		"iomode %u offset %llu length %llu\n",
++		__func__, lgp->lg_seg.layout_type,
++		exp_xdr_qbytes(xdr->end - xdr->p),
++		lgp->lg_seg.iomode, lgp->lg_seg.offset, lgp->lg_seg.length);
++
++	/* FIXME: need to eliminate the use of the state lock */
++	nfs4_unlock_state();
++	status = sb->s_pnfs_op->layout_get(ino, xdr, &args, &res);
++	nfs4_lock_state();
++
++	dprintk("pNFS %s: post-export status %u "
++		"iomode %u offset %llu length %llu\n",
++		__func__, status, res.lg_seg.iomode,
++		res.lg_seg.offset, res.lg_seg.length);
++
++	/*
++	 * The allowable error codes for the layout_get pNFS export
++	 * operations vector function (from the file system) can be
++	 * expanded as needed to include other errors defined for
++	 * the RFC 5561 LAYOUTGET operation.
++	 */
++	switch (status) {
++	case 0:
++		nfserr = NFS4_OK;
++		break;
++	case NFS4ERR_ACCESS:
++	case NFS4ERR_BADIOMODE:
++		/* No support for LAYOUTIOMODE4_RW layouts */
++	case NFS4ERR_BADLAYOUT:
++		/* No layout matching loga_minlength rules */
++	case NFS4ERR_INVAL:
++	case NFS4ERR_IO:
++	case NFS4ERR_LAYOUTTRYLATER:
++	case NFS4ERR_LAYOUTUNAVAILABLE:
++	case NFS4ERR_LOCKED:
++	case NFS4ERR_NOSPC:
++	case NFS4ERR_RECALLCONFLICT:
++	case NFS4ERR_SERVERFAULT:
++	case NFS4ERR_TOOSMALL:
++		/* Requested layout too big for loga_maxcount */
++	case NFS4ERR_WRONG_TYPE:
++		/* Not a regular file */
++		nfserr = cpu_to_be32(status);
++		goto out_freelayout;
++	default:
++		BUG();
++		nfserr = nfserr_serverfault;
++	}
++
++	lgp->lg_seg = res.lg_seg;
++	lgp->lg_roc = res.lg_return_on_close;
++
++	/* SUCCESS!
++	 * Can the new layout be merged into an existing one?
++	 * If so, free unused layout struct
++	 */
++	if (can_merge && merge_layout(fp, clp, &res.lg_seg))
++		goto out_freelayout;
++
++	/* Can't merge, so let's initialize this new layout */
++	init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
++out_unlock:
++	if (ls)
++		put_layout_state(ls);
++	if (fp)
++		put_nfs4_file(fp);
++	nfs4_unlock_state();
++out:
++	dprintk("pNFS %s: lp %p exit nfserr %u\n", __func__, lp,
++		be32_to_cpu(nfserr));
++	return nfserr;
++out_freelayout:
++	free_layout(lp);
++	goto out_unlock;
++}
++
++static void
++trim_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lr)
++{
++	u64 lo_start = lo->offset;
++	u64 lo_end = end_offset(lo_start, lo->length);
++	u64 lr_start = lr->offset;
++	u64 lr_end = end_offset(lr_start, lr->length);
++
++	dprintk("%s:Begin lo %llu:%lld lr %llu:%lld\n", __func__,
++		lo->offset, lo->length, lr->offset, lr->length);
++
++	/* lr fully covers lo? */
++	if (lr_start <= lo_start && lo_end <= lr_end) {
++		lo->length = 0;
++		goto out;
++	}
++
++	/*
++	 * split not supported yet. retain layout segment.
++	 * remains must be returned by the client
++	 * on the final layout return.
++	 */
++	if (lo_start < lr_start && lr_end < lo_end) {
++		dprintk("%s: split not supported\n", __func__);
++		goto out;
++	}
++
++	if (lo_start < lr_start)
++		lo_end = lr_start - 1;
++	else /* lr_end < lo_end */
++		lo_start = lr_end + 1;
++
++	lo->offset = lo_start;
++	lo->length = (lo_end == NFS4_MAX_UINT64) ? lo_end : lo_end - lo_start;
++out:
++	dprintk("%s:End lo %llu:%lld\n", __func__, lo->offset, lo->length);
++}
++
++static int
++pnfs_return_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp,
++			 struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	dprintk("%s: clp %p fp %p\n", __func__, clp, fp);
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &fp->fi_layouts, lo_perfile) {
++		dprintk("%s: lp %p client %p,%p lo_type %x,%x iomode %d,%d\n",
++			__func__, lp,
++			lp->lo_client, clp,
++			lp->lo_seg.layout_type, lrp->args.lr_seg.layout_type,
++			lp->lo_seg.iomode, lrp->args.lr_seg.iomode);
++		if (lp->lo_client != clp ||
++		    lp->lo_seg.layout_type != lrp->args.lr_seg.layout_type ||
++		    (lp->lo_seg.iomode != lrp->args.lr_seg.iomode &&
++		     lrp->args.lr_seg.iomode != IOMODE_ANY) ||
++		     !lo_seg_overlapping(&lp->lo_seg, &lrp->args.lr_seg))
++			continue;
++		layouts_found++;
++		trim_layout(&lp->lo_seg, &lrp->args.lr_seg);
++		if (!lp->lo_seg.length) {
++			lrp->lrs_present = 0;
++			dequeue_layout(lp);
++			destroy_layout(lp);
++		}
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++pnfs_return_client_layouts(struct nfs4_client *clp,
++			   struct nfsd4_pnfs_layoutreturn *lrp, u64 ex_fsid)
++{
++	int layouts_found = 0;
++	struct nfs4_layout *lp, *nextlp;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (lp, nextlp, &clp->cl_layouts, lo_perclnt) {
++		if (lrp->args.lr_seg.layout_type != lp->lo_seg.layout_type ||
++		   (lrp->args.lr_seg.iomode != lp->lo_seg.iomode &&
++		    lrp->args.lr_seg.iomode != IOMODE_ANY))
++			continue;
++
++		if (lrp->args.lr_return_type == RETURN_FSID &&
++		    !same_fsid_major(&lp->lo_file->fi_fsid, ex_fsid))
++			continue;
++
++		layouts_found++;
++		dequeue_layout(lp);
++		destroy_layout(lp);
++	}
++	spin_unlock(&layout_lock);
++
++	return layouts_found;
++}
++
++static int
++recall_return_perfect_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode ||
++	    clr->cb.cbl_recall_type != lrp->args.lr_return_type)
++		return 0;
++
++	return (clr->cb.cbl_recall_type == RETURN_FILE &&
++		clr->clr_file == fp &&
++		clr->cb.cbl_seg.offset == lrp->args.lr_seg.offset &&
++		clr->cb.cbl_seg.length == lrp->args.lr_seg.length) ||
++
++		(clr->cb.cbl_recall_type == RETURN_FSID &&
++		 same_fsid(&clr->cb.cbl_fsid, current_fh)) ||
++
++		clr->cb.cbl_recall_type == RETURN_ALL;
++}
++
++static int
++recall_return_partial_match(struct nfs4_layoutrecall *clr,
++			    struct nfsd4_pnfs_layoutreturn *lrp,
++			    struct nfs4_file *fp,
++			    struct svc_fh *current_fh)
++{
++	/* iomode matching? */
++	if (clr->cb.cbl_seg.iomode != lrp->args.lr_seg.iomode &&
++	    clr->cb.cbl_seg.iomode != IOMODE_ANY &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY)
++		return 0;
++
++	if (clr->cb.cbl_recall_type == RETURN_ALL ||
++	    lrp->args.lr_return_type == RETURN_ALL)
++		return 1;
++
++	/* fsid matches? */
++	if (clr->cb.cbl_recall_type == RETURN_FSID ||
++	    lrp->args.lr_return_type == RETURN_FSID)
++		return same_fsid(&clr->cb.cbl_fsid, current_fh);
++
++	/* file matches, range overlapping? */
++	return clr->clr_file == fp &&
++	       lo_seg_overlapping(&clr->cb.cbl_seg, &lrp->args.lr_seg);
++}
++
++int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
++			    struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status = 0;
++	int layouts_found = 0;
++	struct inode *ino = current_fh->fh_dentry->d_inode;
++	struct nfs4_file *fp = NULL;
++	struct nfs4_client *clp;
++	struct nfs4_layoutrecall *clr, *nextclr;
++	u64 ex_fsid = current_fh->fh_export->ex_fsid;
++	void *recall_cookie = NULL;
++
++	dprintk("NFSD: %s\n", __func__);
++
++	nfs4_lock_state();
++	clp = find_confirmed_client((clientid_t *)&lrp->args.lr_seg.clientid);
++	if (!clp)
++		goto out;
++
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		fp = find_file(ino);
++		if (!fp) {
++			printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
++				"ino %p:%lu\n",
++				__func__, ino, ino ? ino->i_ino : 0L);
++			goto out;
++		}
++
++		/* Check the stateid */
++		dprintk("%s PROCESS LO_STATEID inode %p\n", __func__, ino);
++		status = nfs4_process_layout_stateid(clp, fp, &lrp->lr_sid,
++						     NULL);
++		if (status)
++			goto out_put_file;
++
++		/* update layouts */
++		layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
++		/* optimize for the all-empty case */
++		if (list_empty(&fp->fi_layouts))
++			recall_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++	} else {
++		layouts_found = pnfs_return_client_layouts(clp, lrp, ex_fsid);
++	}
++
++	dprintk("pNFS %s: clp %p fp %p layout_type 0x%x iomode %d "
++		"return_type %d fsid 0x%llx offset %llu length %llu: "
++		"layouts_found %d\n",
++		__func__, clp, fp, lrp->args.lr_seg.layout_type,
++		lrp->args.lr_seg.iomode, lrp->args.lr_return_type,
++		ex_fsid,
++		lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);
++
++	/* update layoutrecalls
++	 * note: for RETURN_{FSID,ALL}, fp may be NULL
++	 */
++	spin_lock(&layout_lock);
++	list_for_each_entry_safe (clr, nextclr, &clp->cl_layoutrecalls,
++				  clr_perclnt) {
++		if (clr->cb.cbl_seg.layout_type != lrp->args.lr_seg.layout_type)
++			continue;
++
++		if (recall_return_perfect_match(clr, lrp, fp, current_fh))
++			recall_cookie = layoutrecall_done(clr);
++		else if (layouts_found &&
++			 recall_return_partial_match(clr, lrp, fp, current_fh))
++			clr->clr_time = CURRENT_TIME;
++	}
++	spin_unlock(&layout_lock);
++
++out_put_file:
++	if (fp)
++		put_nfs4_file(fp);
++out:
++	nfs4_unlock_state();
++
++	/* call exported filesystem layout_return (ignore return-code) */
++	fs_layout_return(sb, ino, lrp, 0, recall_cookie);
++
++	dprintk("pNFS %s: exit status %d \n", __func__, status);
++	return status;
++}
++
++/*
++ * PNFS Metadata server export operations callback for get_state
++ *
++ * called by the cluster fs when it receives a get_state() from a data
++ * server.
++ * returns status, or pnfs_get_state* with pnfs_get_state->status set.
++ *
++ */
++int
++nfs4_pnfs_cb_get_state(struct super_block *sb, struct pnfs_get_state *arg)
++{
++	struct nfs4_stateid *stp;
++	int flags = LOCK_STATE | OPEN_STATE; /* search both hash tables */
++	int status = -EINVAL;
++	struct inode *ino;
++	struct nfs4_delegation *dl;
++	stateid_t *stid = (stateid_t *)&arg->stid;
++
++	dprintk("NFSD: %s sid=" STATEID_FMT " ino %llu\n", __func__,
++		STATEID_VAL(stid), arg->ino);
++
++	nfs4_lock_state();
++	stp = find_stateid(stid, flags);
++	if (!stp) {
++		ino = iget_locked(sb, arg->ino);
++		if (!ino)
++			goto out;
++
++		if (ino->i_state & I_NEW) {
++			iget_failed(ino);
++			goto out;
++		}
++
++		dl = find_delegation_stateid(ino, stid);
++		if (dl)
++			status = 0;
++
++		iput(ino);
++	} else {
++		/* XXX ANDROS: marc removed nfs4_check_fh - how come? */
++
++		/* arg->devid is the Data server id, set by the cluster fs */
++		status = nfs4_add_pnfs_ds_dev(stp, arg->dsid);
++		if (status)
++			goto out;
++
++		arg->access = stp->st_access_bmap;
++		*(clientid_t *)&arg->clid =
++			stp->st_stateowner->so_client->cl_clientid;
++	}
++out:
++	nfs4_unlock_state();
++	return status;
++}
++
++static int
++cl_has_file_layout(struct nfs4_client *clp, struct nfs4_file *lrfile,
++		   stateid_t *lsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++	struct nfs4_layout_state *ls;
++
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt) {
++		if (lp->lo_file != lrfile)
++			continue;
++
++		ls = find_get_layout_state(clp, lrfile);
++		if (!ls) {
++			/* This shouldn't happen as the file should have a
++			 * layout stateid if it has a layout.
++			 */
++			printk(KERN_ERR "%s: file %p has no layout stateid\n",
++				__func__, lrfile);
++			WARN_ON(1);
++			break;
++		}
++		update_stateid(&ls->ls_stateid);
++		memcpy(lsid, &ls->ls_stateid, sizeof(stateid_t));
++		put_layout_state_locked(ls);
++		found = 1;
++		break;
++	}
++	spin_unlock(&layout_lock);
++
++	return found;
++}
++
++static int
++cl_has_fsid_layout(struct nfs4_client *clp, struct nfs4_fsid *fsid)
++{
++	int found = 0;
++	struct nfs4_layout *lp;
++
++	/* note: minor version unused */
++	spin_lock(&layout_lock);
++	list_for_each_entry(lp, &clp->cl_layouts, lo_perclnt)
++		if (lp->lo_file->fi_fsid.major == fsid->major) {
++			found = 1;
++			break;
++		}
++	spin_unlock(&layout_lock);
++	return found;
++}
++
++static int
++cl_has_any_layout(struct nfs4_client *clp)
++{
++	return !list_empty(&clp->cl_layouts);
++}
++
++static int
++cl_has_layout(struct nfs4_client *clp, struct nfsd4_pnfs_cb_layout *cbl,
++	      struct nfs4_file *lrfile, stateid_t *lsid)
++{
++	switch (cbl->cbl_recall_type) {
++	case RETURN_FILE:
++		return cl_has_file_layout(clp, lrfile, lsid);
++	case RETURN_FSID:
++		return cl_has_fsid_layout(clp, &cbl->cbl_fsid);
++	default:
++		return cl_has_any_layout(clp);
++	}
++}
++
++/*
++ * Called without the layout_lock.
++ */
++void
++nomatching_layout(struct nfs4_layoutrecall *clr)
++{
++	struct nfsd4_pnfs_layoutreturn lr = {
++		.args.lr_return_type = clr->cb.cbl_recall_type,
++		.args.lr_seg = clr->cb.cbl_seg,
++	};
++	struct inode *inode;
++	void *recall_cookie;
++
++	if (clr->clr_file) {
++		inode = igrab(clr->clr_file->fi_inode);
++		if (WARN_ON(!inode))
++			return;
++	} else {
++		inode = NULL;
++	}
++
++	dprintk("%s: clp %p fp %p: simulating layout_return\n", __func__,
++		clr->clr_client, clr->clr_file);
++
++	if (clr->cb.cbl_recall_type == RETURN_FILE)
++		pnfs_return_file_layouts(clr->clr_client, clr->clr_file, &lr);
++	else
++		pnfs_return_client_layouts(clr->clr_client, &lr,
++					   clr->cb.cbl_fsid.major);
++
++	spin_lock(&layout_lock);
++	recall_cookie = layoutrecall_done(clr);
++	spin_unlock(&layout_lock);
++
++	fs_layout_return(clr->clr_sb, inode, &lr, LR_FLAG_INTERN,
++			 recall_cookie);
++	iput(inode);
++}
++
++void pnfs_expire_client(struct nfs4_client *clp)
++{
++	for (;;) {
++		struct nfs4_layoutrecall *lrp = NULL;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layoutrecalls)) {
++			lrp = list_entry(clp->cl_layoutrecalls.next,
++					 struct nfs4_layoutrecall, clr_perclnt);
++			get_layoutrecall(lrp);
++		}
++		spin_unlock(&layout_lock);
++		if (!lrp)
++			break;
++
++		dprintk("%s: lrp %p, fp %p\n", __func__, lrp, lrp->clr_file);
++		BUG_ON(lrp->clr_client != clp);
++		nomatching_layout(lrp);
++		put_layoutrecall(lrp);
++	}
++
++	for (;;) {
++		struct nfs4_layout *lp = NULL;
++		struct inode *inode = NULL;
++		struct nfsd4_pnfs_layoutreturn lr;
++		bool empty = false;
++
++		spin_lock(&layout_lock);
++		if (!list_empty(&clp->cl_layouts)) {
++			lp = list_entry(clp->cl_layouts.next,
++					struct nfs4_layout, lo_perclnt);
++			inode = igrab(lp->lo_file->fi_inode);
++			memset(&lr, 0, sizeof(lr));
++			lr.args.lr_return_type = RETURN_FILE;
++			lr.args.lr_seg = lp->lo_seg;
++			empty = list_empty(&lp->lo_file->fi_layouts);
++			BUG_ON(lp->lo_client != clp);
++			dequeue_layout(lp);
++			destroy_layout(lp); /* do not access lp after this */
++		}
++		spin_unlock(&layout_lock);
++		if (!lp)
++			break;
++
++		if (WARN_ON(!inode))
++			break;
++
++		dprintk("%s: inode %lu lp %p clp %p\n", __func__, inode->i_ino,
++			lp, clp);
++
++		fs_layout_return(inode->i_sb, inode, &lr, LR_FLAG_EXPIRE,
++				 empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL);
++		iput(inode);
++	}
++}
++
++struct create_recall_list_arg {
++	struct nfsd4_pnfs_cb_layout *cbl;
++	struct nfs4_file *lrfile;
++	struct list_head *todolist;
++	unsigned todo_count;
++};
++
++/*
++ * look for matching layout for the given client
++ * and add a pending layout recall to the todo list
++ * if found any.
++ * returns:
++ *   0 if layouts found or negative error.
++ */
++static int
++lo_recall_per_client(struct nfs4_client *clp, void *p)
++{
++	stateid_t lsid;
++	struct nfs4_layoutrecall *pending;
++	struct create_recall_list_arg *arg = p;
++
++	memset(&lsid, 0, sizeof(lsid));
++	if (!cl_has_layout(clp, arg->cbl, arg->lrfile, &lsid))
++		return 0;
++
++	/* Matching put done by layoutreturn */
++	pending = alloc_init_layoutrecall(arg->cbl, clp, arg->lrfile);
++	/* out of memory, drain todo queue */
++	if (!pending)
++		return -ENOMEM;
++
++	*(stateid_t *)&pending->cb.cbl_sid = lsid;
++	list_add(&pending->clr_perclnt, arg->todolist);
++	arg->todo_count++;
++	return 0;
++}
++
++/* Create a layoutrecall structure for each client based on the
++ * original structure. */
++int
++create_layout_recall_list(struct list_head *todolist, unsigned *todo_len,
++			  struct nfsd4_pnfs_cb_layout *cbl,
++			  struct nfs4_file *lrfile)
++{
++	struct nfs4_client *clp;
++	struct create_recall_list_arg arg = {
++		.cbl = cbl,
++		.lrfile = lrfile,
++		.todolist = todolist,
++	};
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	/* If client given by fs, just do single client */
++	if (cbl->cbl_seg.clientid) {
++		clp = find_confirmed_client(
++				(clientid_t *)&cbl->cbl_seg.clientid);
++		if (!clp) {
++			status = -ENOENT;
++			dprintk("%s: clientid %llx not found\n", __func__,
++				(unsigned long long)cbl->cbl_seg.clientid);
++			goto out;
++		}
++
++		status = lo_recall_per_client(clp, &arg);
++	} else {
++		/* Check all clients for layout matches */
++		status = filter_confirmed_clients(lo_recall_per_client, &arg);
++	}
++
++out:
++	*todo_len = arg.todo_count;
++	dprintk("%s: <-- list len %u status %d\n", __func__, *todo_len, status);
++	return status;
++}
++
++/*
++ * Recall layouts asynchronously
++ * Called with state lock.
++ */
++static int
++spawn_layout_recall(struct super_block *sb, struct list_head *todolist,
++		    unsigned todo_len)
++{
++	struct nfs4_layoutrecall *pending;
++	struct nfs4_layoutrecall *parent = NULL;
++	int status = 0;
++
++	dprintk("%s: -->\n", __func__);
++
++	if (todo_len > 1) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++
++		parent = alloc_init_layoutrecall(&pending->cb, NULL,
++						 pending->clr_file);
++		if (unlikely(!parent)) {
++			/* We want forward progress. If parent cannot be
++			 * allocated take the first one as parent but don't
++			 * execute it.  Caller must check for -EAGAIN, if so
++			 * When the partial recalls return,
++			 * nfsd_layout_recall_cb should be called again.
++			 */
++			list_del_init(&pending->clr_perclnt);
++			if (todo_len > 2) {
++				parent = pending;
++			} else {
++				parent = NULL;
++				put_layoutrecall(pending);
++			}
++			--todo_len;
++				status = -ENOMEM;
++		}
++	}
++
++	while (!list_empty(todolist)) {
++		pending = list_entry(todolist->next, struct nfs4_layoutrecall,
++				     clr_perclnt);
++		list_del_init(&pending->clr_perclnt);
++		dprintk("%s: clp %p cb_client %p fp %p\n", __func__,
++			pending->clr_client,
++			pending->clr_client->cl_cb_client,
++			pending->clr_file);
++		if (unlikely(!pending->clr_client->cl_cb_client)) {
++			printk(KERN_INFO
++				"%s: clientid %08x/%08x has no callback path\n",
++				__func__,
++				pending->clr_client->cl_clientid.cl_boot,
++				pending->clr_client->cl_clientid.cl_id);
++			put_layoutrecall(pending);
++			continue;
++		}
++
++		pending->clr_time = CURRENT_TIME;
++		pending->clr_sb = sb;
++		if (parent) {
++			/* If we created a parent its initial ref count is 1.
++			 * We will need to de-ref it eventually. So we just
++			 * don't increment on behalf of the last one.
++			 */
++			if (todo_len != 1)
++				get_layoutrecall(parent);
++		}
++		pending->parent = parent;
++		get_layoutrecall(pending);
++		/* Add to list so corresponding layoutreturn can find req */
++		list_add(&pending->clr_perclnt,
++			 &pending->clr_client->cl_layoutrecalls);
++
++		nfsd4_cb_layout(pending);
++		--todo_len;
++	}
++
++	return status;
++}
++
++/*
++ * Spawn a thread to perform a recall layout
++ *
++ */
++int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,
++			  struct nfsd4_pnfs_cb_layout *cbl)
++{
++	int status;
++	struct nfs4_file *lrfile = NULL;
++	struct list_head todolist;
++	unsigned todo_len = 0;
++
++	dprintk("NFSD nfsd_layout_recall_cb: inode %p cbl %p\n", inode, cbl);
++	BUG_ON(!cbl);
++	BUG_ON(cbl->cbl_recall_type != RETURN_FILE &&
++	       cbl->cbl_recall_type != RETURN_FSID &&
++	       cbl->cbl_recall_type != RETURN_ALL);
++	BUG_ON(cbl->cbl_recall_type == RETURN_FILE && !inode);
++	BUG_ON(cbl->cbl_seg.iomode != IOMODE_READ &&
++	       cbl->cbl_seg.iomode != IOMODE_RW &&
++	       cbl->cbl_seg.iomode != IOMODE_ANY);
++
++	if (nfsd_serv == NULL) {
++		dprintk("NFSD nfsd_layout_recall_cb: nfsd_serv == NULL\n");
++		return -ENOENT;
++	}
++
++	nfs4_lock_state();
++	status = -ENOENT;
++	if (inode) {
++		lrfile = find_file(inode);
++		if (!lrfile) {
++			dprintk("NFSD nfsd_layout_recall_cb: "
++				"nfs4_file not found\n");
++			goto err;
++		}
++		if (cbl->cbl_recall_type == RETURN_FSID)
++			cbl->cbl_fsid = lrfile->fi_fsid;
++	}
++
++	INIT_LIST_HEAD(&todolist);
++
++	/* If no cookie provided by FS, return a default one */
++	if (!cbl->cbl_cookie)
++		cbl->cbl_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
++
++	status = create_layout_recall_list(&todolist, &todo_len, cbl, lrfile);
++	if (list_empty(&todolist)) {
++		status = -ENOENT;
++	} else {
++		/* process todolist even if create_layout_recall_list
++		 * returned an error */
++		int status2 = spawn_layout_recall(sb, &todolist, todo_len);
++		if (status2)
++			status = status2;
++	}
++
++err:
++	nfs4_unlock_state();
++	if (lrfile)
++		put_nfs4_file(lrfile);
++	return (todo_len && status) ? -EAGAIN : status;
++}
++
++struct create_device_notify_list_arg {
++	struct list_head *todolist;
++	struct nfsd4_pnfs_cb_dev_list *ndl;
++};
++
++static int
++create_device_notify_per_cl(struct nfs4_client *clp, void *p)
++{
++	struct nfs4_notify_device *cbnd;
++	struct create_device_notify_list_arg *arg = p;
++
++	if (atomic_read(&clp->cl_deviceref) <= 0)
++		return 0;
++
++	cbnd = kmalloc(sizeof(*cbnd), GFP_KERNEL);
++	if (!cbnd)
++		return -ENOMEM;
++
++	cbnd->nd_list = arg->ndl;
++	cbnd->nd_client = clp;
++	list_add(&cbnd->nd_perclnt, arg->todolist);
++	return 0;
++}
++
++/* Create a list of clients to send device notifications. */
++int
++create_device_notify_list(struct list_head *todolist,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	int status;
++	struct create_device_notify_list_arg arg = {
++		.todolist = todolist,
++		.ndl = ndl,
++	};
++
++	nfs4_lock_state();
++	status = filter_confirmed_clients(create_device_notify_per_cl, &arg);
++	nfs4_unlock_state();
++
++	return status;
++}
++
++/*
++ * For each client that a device, send a device notification.
++ * XXX: Need to track which clients have which devices.
++ */
++int nfsd_device_notify_cb(struct super_block *sb,
++			  struct nfsd4_pnfs_cb_dev_list *ndl)
++{
++	struct nfs4_notify_device *cbnd;
++	unsigned int notify_num = 0;
++	int status2, status = 0;
++	struct list_head todolist;
++
++	BUG_ON(!ndl || ndl->cbd_len == 0 || !ndl->cbd_list);
++
++	dprintk("NFSD %s: cbl %p len %u\n", __func__, ndl, ndl->cbd_len);
++
++	if (nfsd_serv == NULL)
++		return -ENOENT;
++
++	INIT_LIST_HEAD(&todolist);
++
++	status = create_device_notify_list(&todolist, ndl);
++
++	while (!list_empty(&todolist)) {
++		cbnd = list_entry(todolist.next, struct nfs4_notify_device,
++				  nd_perclnt);
++		list_del_init(&cbnd->nd_perclnt);
++		status2 = nfsd4_cb_notify_device(cbnd);
++		pnfs_clear_device_notify(cbnd->nd_client);
++		if (status2) {
++			kfree(cbnd);
++			status = status2;
++		}
++		notify_num++;
++	}
++
++	dprintk("NFSD %s: status %d clients %u\n",
++		__func__, status, notify_num);
++	return status;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c.orig	2010-09-30 10:17:08.866999000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsdlm.c	2010-09-30 10:17:08.868998000 -0400
+@@ -0,0 +1,461 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++
++#include <linux/nfs4.h>
++#include <linux/nfsd/const.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/sunrpc/clnt.h>
++
++#include "nfsfh.h"
++#include "nfsd.h"
++
++#define NFSDDBG_FACILITY                NFSDDBG_PROC
++
++/* Just use a linked list. Do not expect more than 32 dlm_device_entries
++ * the first implementation will just use one device per cluster file system
++ */
++
++static LIST_HEAD(dlm_device_list);
++static DEFINE_SPINLOCK(dlm_device_list_lock);
++
++struct dlm_device_entry {
++	struct list_head	dlm_dev_list;
++	char			disk_name[DISK_NAME_LEN];
++	int			num_ds;
++	char			ds_list[NFSD_DLM_DS_LIST_MAX];
++};
++
++static struct dlm_device_entry *
++_nfsd4_find_pnfs_dlm_device(char *disk_name)
++{
++	struct dlm_device_entry *dlm_pdev;
++
++	dprintk("--> %s  disk name %s\n", __func__, disk_name);
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
++		dprintk("%s Look for dlm_pdev %s\n", __func__,
++			dlm_pdev->disk_name);
++		if (!memcmp(dlm_pdev->disk_name, disk_name, strlen(disk_name))) {
++			spin_unlock(&dlm_device_list_lock);
++			return dlm_pdev;
++		}
++	}
++	spin_unlock(&dlm_device_list_lock);
++	return NULL;
++}
++
++static struct dlm_device_entry *
++nfsd4_find_pnfs_dlm_device(struct super_block *sb) {
++	char dname[BDEVNAME_SIZE];
++
++	bdevname(sb->s_bdev, dname);
++	return _nfsd4_find_pnfs_dlm_device(dname);
++}
++
++ssize_t
++nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen)
++{
++	char *pos = buf;
++	ssize_t size = 0;
++	struct dlm_device_entry *dlm_pdev;
++	int ret = -EINVAL;
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list)
++	{
++		int advanced;
++		advanced = snprintf(pos, buflen - size, "%s:%s\n", dlm_pdev->disk_name, dlm_pdev->ds_list);
++		if (advanced >= buflen - size)
++			goto out;
++		size += advanced;
++		pos += advanced;
++	}
++	ret = size;
++
++out:
++	spin_unlock(&dlm_device_list_lock);
++	return ret;
++}
++
++bool nfsd4_validate_pnfs_dlm_device(char *ds_list, int *num_ds)
++{
++	char *start = ds_list;
++
++	*num_ds = 0;
++
++	while (*start) {
++		struct sockaddr_storage tempAddr;
++		int ipLen = strcspn(start, ",");
++
++		if (!rpc_pton(start, ipLen, (struct sockaddr *)&tempAddr, sizeof(tempAddr)))
++			return false;
++		(*num_ds)++;
++		start += ipLen + 1;
++	}
++	return true;
++}
++
++/*
++ * pnfs_dlm_device string format:
++ *     block-device-path:<ds1 ipv4 address>,<ds2 ipv4 address>
++ *
++ * Examples
++ *     /dev/sda:192.168.1.96,192.168.1.97' creates a data server list with
++ *     two data servers for the dlm cluster file system mounted on /dev/sda.
++ *
++ *     /dev/sda:192.168.1.96,192.168.1.100'
++ *     replaces the data server list for /dev/sda
++ *
++ *     Only the deviceid == 1 is supported. Can add device id to
++ *     pnfs_dlm_device string when needed.
++ *
++ *     Only the round robin each data server once stripe index is supported.
++ */
++int
++nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
++
++{
++	struct dlm_device_entry *new, *found;
++	char *bufp = pnfs_dlm_device;
++	char *endp = bufp + strlen(bufp);
++	int err = -ENOMEM;
++
++	dprintk("--> %s len %d\n", __func__, len);
++
++	new = kzalloc(sizeof(*new), GFP_KERNEL);
++	if (!new)
++		return err;
++
++	err = -EINVAL;
++	/* disk_name */
++	/* FIXME: need to check for valid disk_name. search superblocks?
++	 * check for slash dev slash ?
++	 */
++	len = strcspn(bufp, ":");
++	if (len > DISK_NAME_LEN)
++		goto out_free;
++	memcpy(new->disk_name, bufp, len);
++
++	err = -EINVAL;
++	bufp += len + 1;
++	if (bufp >= endp)
++		goto out_free;
++
++	/* data server list */
++	/* FIXME: need to check for comma separated valid ip format */
++	len = strcspn(bufp, ":");
++	if (len > NFSD_DLM_DS_LIST_MAX)
++		goto out_free;
++	memcpy(new->ds_list, bufp, len);
++
++
++	/*  validate the ips */
++	if (!nfsd4_validate_pnfs_dlm_device(new->ds_list, &(new->num_ds)))
++		goto out_free;
++
++	dprintk("%s disk_name %s num_ds %d ds_list %s\n", __func__,
++		new->disk_name, new->num_ds, new->ds_list);
++
++	found = _nfsd4_find_pnfs_dlm_device(new->disk_name);
++	if (found) {
++		/* FIXME: should compare found->ds_list with new->ds_list
++		 * and if it is different, kick off a CB_NOTIFY change
++		 * deviceid.
++		 */
++		dprintk("%s pnfs_dlm_device %s:%s already in cache "
++			" replace ds_list with new ds_list %s\n", __func__,
++			found->disk_name, found->ds_list, new->ds_list);
++		memset(found->ds_list, 0, DISK_NAME_LEN);
++		memcpy(found->ds_list, new->ds_list, strlen(new->ds_list));
++		found->num_ds = new->num_ds;
++		kfree(new);
++	} else {
++		dprintk("%s Adding pnfs_dlm_device %s:%s\n", __func__,
++				new->disk_name, new->ds_list);
++		spin_lock(&dlm_device_list_lock);
++		list_add(&new->dlm_dev_list, &dlm_device_list);
++		spin_unlock(&dlm_device_list_lock);
++	}
++	dprintk("<-- %s Success\n", __func__);
++	return 0;
++
++out_free:
++	kfree(new);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++void nfsd4_pnfs_dlm_shutdown(void)
++{
++	struct dlm_device_entry *dlm_pdev, *next;
++
++	dprintk("--> %s\n", __func__);
++
++	spin_lock(&dlm_device_list_lock);
++	list_for_each_entry_safe (dlm_pdev, next, &dlm_device_list,
++				  dlm_dev_list) {
++		list_del(&dlm_pdev->dlm_dev_list);
++		kfree(dlm_pdev);
++	}
++	spin_unlock(&dlm_device_list_lock);
++}
++
++static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
++				     u32 layout_type,
++				     struct nfsd4_pnfs_dev_iter_res *res)
++{
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		printk(KERN_ERR "%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return -ENOTSUPP;
++	}
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++	return 0;
++}
++
++static int nfsd4_pnfs_dlm_getdevinfo(struct super_block *sb,
++				     struct exp_xdr_stream *xdr,
++				     u32 layout_type,
++				     const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err, len, i = 0;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_devaddr *daddr;
++	struct dlm_device_entry *dlm_pdev;
++	char   *bufp;
++
++	err = -ENOTSUPP;
++	if (layout_type != LAYOUT_NFSV4_1_FILES) {
++		dprintk("%s: ERROR: layout type isn't 'file' "
++			"(type: %x)\n", __func__, layout_type);
++		return err;
++	}
++
++	/* We only hand out a deviceid of 1 in LAYOUTGET, so a GETDEVICEINFO
++	 * with a gdia_device_id != 1 is invalid.
++	 */
++	err = -EINVAL;
++	if (devid->devid != 1) {
++		dprintk("%s: WARNING: didn't receive a deviceid of "
++			"1 (got: 0x%llx)\n", __func__, devid->devid);
++		return err;
++	}
++
++	/*
++	 * If the DS list has not been established, return -EINVAL
++	 */
++	dlm_pdev = nfsd4_find_pnfs_dlm_device(sb);
++	if (!dlm_pdev) {
++		dprintk("%s: DEBUG: disk %s Not Found\n", __func__,
++			sb->s_bdev->bd_disk->disk_name);
++		return err;
++	}
++
++	dprintk("%s: Found disk %s with DS list |%s|\n",
++		__func__, dlm_pdev->disk_name, dlm_pdev->ds_list);
++
++	memset(&fdev, '\0', sizeof(fdev));
++	fdev.fl_device_length = dlm_pdev->num_ds;
++
++	err = -ENOMEM;
++	len = sizeof(*fdev.fl_device_list) * fdev.fl_device_length;
++	fdev.fl_device_list = kzalloc(len, GFP_KERNEL);
++	if (!fdev.fl_device_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a device list "
++			"buffer for %d DSes.\n", __func__, i);
++		fdev.fl_device_length = 0;
++		goto out;
++	}
++
++	/* Set a simple stripe indicie */
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = kzalloc(sizeof(u32) *
++				     fdev.fl_stripeindices_length, GFP_KERNEL);
++
++	if (!fdev.fl_stripeindices_list) {
++		printk(KERN_ERR "%s: ERROR: unable to kmalloc a stripeindices "
++			"list buffer for %d DSes.\n", __func__, i);
++		goto out;
++	}
++	for (i = 0; i < fdev.fl_stripeindices_length; i++)
++		fdev.fl_stripeindices_list[i] = i;
++
++	/* Transfer the data server list with a single multipath entry */
++	bufp = dlm_pdev->ds_list;
++	for (i = 0; i < fdev.fl_device_length; i++) {
++		daddr = kmalloc(sizeof(*daddr), GFP_KERNEL);
++		if (!daddr) {
++			printk(KERN_ERR "%s: ERROR: unable to kmalloc a device "
++				"addr buffer.\n", __func__);
++			goto out;
++		}
++
++		daddr->r_netid.data = "tcp";
++		daddr->r_netid.len = 3;
++
++		len = strcspn(bufp, ",");
++		daddr->r_addr.data = kmalloc(len + 4, GFP_KERNEL);
++		memcpy(daddr->r_addr.data, bufp, len);
++		/*
++		 * append the port number.  interpreted as two more bytes
++		 * beyond the quad: ".8.1" -> 0x08.0x01 -> 0x0801 = port 2049.
++		 */
++		memcpy(daddr->r_addr.data + len, ".8.1", 4);
++		daddr->r_addr.len = len + 4;
++
++		fdev.fl_device_list[i].fl_multipath_length = 1;
++		fdev.fl_device_list[i].fl_multipath_list = daddr;
++
++		dprintk("%s: encoding DS |%s|\n", __func__, bufp);
++
++		bufp += len + 1;
++	}
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	for (i = 0; i < fdev.fl_device_length; i++)
++		kfree(fdev.fl_device_list[i].fl_multipath_list);
++	kfree(fdev.fl_device_list);
++	kfree(fdev.fl_stripeindices_list);
++	dprintk("<-- %s returns %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize >= NFSSVC_MAXBLKSIZE)
++		return blocksize;
++	return NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++}
++
++/*
++ * Look up inode block device in pnfs_dlm_device list.
++ * Hash on the inode->i_ino and number of data servers.
++ */
++static int dlm_ino_hash(struct inode *ino)
++{
++	struct dlm_device_entry *de;
++	u32 hash_mask = 0;
++
++	/* If can't find the inode block device in the pnfs_dlm_deivce list
++	 * then don't hand out a layout
++	 */
++	de = nfsd4_find_pnfs_dlm_device(ino->i_sb);
++	if (!de)
++		return -1;
++	hash_mask = de->num_ds - 1;
++	return ino->i_ino & hash_mask;
++}
++
++static enum nfsstat4 nfsd4_pnfs_dlm_layoutget(struct inode *inode,
++			   struct exp_xdr_stream *xdr,
++			   const struct nfsd4_pnfs_layoutget_arg *args,
++			   struct nfsd4_pnfs_layoutget_res *res)
++{
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++	int index;
++	enum nfsstat4 rc = NFS4_OK;
++
++	dprintk("%s: LAYOUT_GET\n", __func__);
++
++	/* DLM exported file systems only support layouts for READ */
++	if (res->lg_seg.iomode == IOMODE_RW)
++		return NFS4ERR_BADIOMODE;
++
++	index = dlm_ino_hash(inode);
++	dprintk("%s first stripe index %d i_ino %lu\n", __func__, index,
++		inode->i_ino);
++	if (index < 0)
++		return NFS4ERR_LAYOUTUNAVAILABLE;
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	/* Always give out whole file layouts */
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++	/* Always give out READ ONLY layouts */
++	res->lg_seg.iomode = IOMODE_READ;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = false;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = args->lg_sbid;
++	layout->device_id.devid = 1;                                /*FSFTEMP*/
++	layout->lg_first_stripe_index = index;                      /*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = NFS4ERR_LAYOUTTRYLATER;
++		goto error;
++	}
++
++	memcpy(fhp, args->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++/* For use by DLM cluster file systems exported by pNFSD */
++const struct pnfs_export_operations pnfs_dlm_export_ops = {
++	.layout_type = nfsd4_pnfs_dlm_layouttype,
++	.get_device_info = nfsd4_pnfs_dlm_getdevinfo,
++	.get_device_iter = nfsd4_pnfs_dlm_getdeviter,
++	.layout_get = nfsd4_pnfs_dlm_layoutget,
++};
++EXPORT_SYMBOL(pnfs_dlm_export_ops);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c.orig	2010-09-30 10:17:08.871998000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4pnfsds.c	2010-09-30 10:17:08.873003000 -0400
+@@ -0,0 +1,620 @@
++/*
++*  linux/fs/nfsd/nfs4pnfsds.c
++*
++*  Copyright (c) 2005 The Regents of the University of Michigan.
++*  All rights reserved.
++*
++*  Andy Adamson <andros@umich.edu>
++*
++*  Redistribution and use in source and binary forms, with or without
++*  modification, are permitted provided that the following conditions
++*  are met:
++*
++*  1. Redistributions of source code must retain the above copyright
++*     notice, this list of conditions and the following disclaimer.
++*  2. Redistributions in binary form must reproduce the above copyright
++*     notice, this list of conditions and the following disclaimer in the
++*     documentation and/or other materials provided with the distribution.
++*  3. Neither the name of the University nor the names of its
++*     contributors may be used to endorse or promote products derived
++*     from this software without specific prior written permission.
++*
++*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*
++*/
++#if defined(CONFIG_PNFSD)
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++#include <linux/param.h>
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/debug.h>
++#include <linux/nfs4.h>
++#include <linux/exportfs.h>
++#include <linux/sched.h>
++
++#include "nfsd.h"
++#include "pnfsd.h"
++#include "state.h"
++
++/*
++ *******************
++ *   	 PNFS
++ *******************
++ */
++/*
++ * Hash tables for pNFS Data Server state
++ *
++ * mds_nodeid:	list of struct pnfs_mds_id one per Metadata server (MDS) using
++ *		this data server (DS).
++ *
++ * mds_clid_hashtbl[]: uses clientid_hashval(), hash of all clientids obtained
++ *			from any MDS.
++ *
++ * ds_stid_hashtbl[]: uses stateid_hashval(), hash of all stateids obtained
++ *			from any MDS.
++ *
++ */
++/* Hash tables for clientid state */
++#define CLIENT_HASH_BITS                 4
++#define CLIENT_HASH_SIZE                (1 << CLIENT_HASH_BITS)
++#define CLIENT_HASH_MASK                (CLIENT_HASH_SIZE - 1)
++
++#define clientid_hashval(id) \
++	((id) & CLIENT_HASH_MASK)
++
++/* hash table for pnfs_ds_stateid */
++#define STATEID_HASH_BITS              10
++#define STATEID_HASH_SIZE              (1 << STATEID_HASH_BITS)
++#define STATEID_HASH_MASK              (STATEID_HASH_SIZE - 1)
++
++#define stateid_hashval(owner_id, file_id)  \
++	(((owner_id) + (file_id)) & STATEID_HASH_MASK)
++
++static struct list_head mds_id_tbl;
++static struct list_head mds_clid_hashtbl[CLIENT_HASH_SIZE];
++static struct list_head ds_stid_hashtbl[STATEID_HASH_SIZE];
++
++static inline void put_ds_clientid(struct pnfs_ds_clientid *dcp);
++static inline void put_ds_mdsid(struct pnfs_mds_id *mdp);
++
++/* Mutex for data server state.  Needs to be separate from
++ * mds state mutex since a node can be both mds and ds */
++static DEFINE_MUTEX(ds_mutex);
++static struct thread_info *ds_mutex_owner;
++
++static void
++ds_lock_state(void)
++{
++	mutex_lock(&ds_mutex);
++	ds_mutex_owner = current_thread_info();
++}
++
++static void
++ds_unlock_state(void)
++{
++	BUG_ON(ds_mutex_owner != current_thread_info());
++	ds_mutex_owner = NULL;
++	mutex_unlock(&ds_mutex);
++}
++
++static int
++cmp_clid(const clientid_t *cl1, const clientid_t *cl2)
++{
++	return (cl1->cl_boot == cl2->cl_boot) &&
++	       (cl1->cl_id == cl2->cl_id);
++}
++
++void
++nfs4_pnfs_state_init(void)
++{
++	int i;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&mds_clid_hashtbl[i]);
++
++	for (i = 0; i < STATEID_HASH_SIZE; i++)
++		INIT_LIST_HEAD(&ds_stid_hashtbl[i]);
++
++	INIT_LIST_HEAD(&mds_id_tbl);
++}
++
++static struct pnfs_mds_id *
++find_pnfs_mds_id(u32 mdsid)
++{
++	struct pnfs_mds_id *local = NULL;
++
++	dprintk("pNFSD: %s\n", __func__);
++	list_for_each_entry(local, &mds_id_tbl, di_hash) {
++		if (local->di_mdsid == mdsid)
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_clientid *
++find_pnfs_ds_clientid(const clientid_t *clid)
++{
++	struct pnfs_ds_clientid *local = NULL;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = clientid_hashval(clid->cl_id);
++	list_for_each_entry(local, &mds_clid_hashtbl[hashval], dc_hash) {
++		if (cmp_clid(&local->dc_mdsclid, clid))
++			return local;
++	}
++	return NULL;
++}
++
++static struct pnfs_ds_stateid *
++find_pnfs_ds_stateid(stateid_t *stid)
++{
++	struct pnfs_ds_stateid *local = NULL;
++	u32 st_id = stid->si_stateownerid;
++	u32 f_id = stid->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_for_each_entry(local, &ds_stid_hashtbl[hashval], ds_hash)
++		if ((local->ds_stid.si_stateownerid == st_id) &&
++				(local->ds_stid.si_fileid == f_id) &&
++				(local->ds_stid.si_boot == stid->si_boot)) {
++			stateid_t *sid = &local->ds_stid;
++			dprintk("NFSD: %s <-- %p ds_flags %lx " STATEID_FMT "\n",
++				__func__, local, local->ds_flags,
++				STATEID_VAL(sid));
++			return local;
++		}
++	return NULL;
++}
++
++static void
++release_ds_mdsid(struct kref *kref)
++{
++	struct pnfs_mds_id *mdp =
++		container_of(kref, struct pnfs_mds_id, di_ref);
++	dprintk("pNFSD: %s\n", __func__);
++
++	list_del(&mdp->di_hash);
++	list_del(&mdp->di_mdsclid);
++	kfree(mdp);
++}
++
++static void
++release_ds_clientid(struct kref *kref)
++{
++	struct pnfs_ds_clientid *dcp =
++		container_of(kref, struct pnfs_ds_clientid, dc_ref);
++	struct pnfs_mds_id *mdp;
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(dcp->dc_mdsid);
++	if (mdp)
++		put_ds_mdsid(mdp);
++
++	list_del(&dcp->dc_hash);
++	list_del(&dcp->dc_stateid);
++	list_del(&dcp->dc_permdsid);
++	kfree(dcp);
++}
++
++static void
++release_ds_stateid(struct kref *kref)
++{
++	struct pnfs_ds_stateid *dsp =
++		container_of(kref, struct pnfs_ds_stateid, ds_ref);
++	struct pnfs_ds_clientid *dcp;
++	dprintk("pNFS %s: dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid(&dsp->ds_mdsclid);
++	if (dcp)
++		put_ds_clientid(dcp);
++
++	list_del(&dsp->ds_hash);
++	list_del(&dsp->ds_perclid);
++	kfree(dsp);
++}
++
++static inline void
++put_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_put(&dcp->dc_ref, release_ds_clientid);
++}
++
++static inline void
++get_ds_clientid(struct pnfs_ds_clientid *dcp)
++{
++	dprintk("pNFS %s: dcp %p ref %d\n", __func__, dcp,
++		atomic_read(&dcp->dc_ref.refcount));
++	kref_get(&dcp->dc_ref);
++}
++
++static inline void
++put_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_put(&mdp->di_ref, release_ds_mdsid);
++}
++
++static inline void
++get_ds_mdsid(struct pnfs_mds_id *mdp)
++{
++	dprintk("pNFS %s: mdp %p ref %d\n", __func__, mdp,
++		atomic_read(&mdp->di_ref.refcount));
++	kref_get(&mdp->di_ref);
++}
++
++static inline void
++put_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_put(&dsp->ds_ref, release_ds_stateid);
++}
++
++static inline void
++get_ds_stateid(struct pnfs_ds_stateid *dsp)
++{
++	dprintk("pNFS %s: dsp %p ref %d\n", __func__, dsp,
++		atomic_read(&dsp->ds_ref.refcount));
++	kref_get(&dsp->ds_ref);
++}
++
++void
++nfs4_pnfs_state_shutdown(void)
++{
++	struct pnfs_ds_stateid *dsp;
++	int i;
++
++	dprintk("pNFSD %s: -->\n", __func__);
++
++	ds_lock_state();
++	for (i = 0; i < STATEID_HASH_SIZE; i++) {
++		while (!list_empty(&ds_stid_hashtbl[i])) {
++			dsp = list_entry(ds_stid_hashtbl[i].next,
++					 struct pnfs_ds_stateid, ds_hash);
++			put_ds_stateid(dsp);
++		}
++	}
++	ds_unlock_state();
++}
++
++static struct pnfs_mds_id *
++alloc_init_mds_id(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = kmalloc(sizeof(*mdp), GFP_KERNEL);
++	if (!mdp)
++		return NULL;
++	INIT_LIST_HEAD(&mdp->di_hash);
++	INIT_LIST_HEAD(&mdp->di_mdsclid);
++	list_add(&mdp->di_hash, &mds_id_tbl);
++	mdp->di_mdsid = gsp->dsid;
++	mdp->di_mdsboot = 0;
++	kref_init(&mdp->di_ref);
++	return mdp;
++}
++
++static struct pnfs_ds_clientid *
++alloc_init_ds_clientid(struct pnfs_get_state *gsp)
++{
++	struct pnfs_mds_id *mdp;
++	struct pnfs_ds_clientid *dcp;
++	clientid_t *clid = (clientid_t *)&gsp->clid;
++	unsigned int hashval = clientid_hashval(clid->cl_id);
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	mdp = find_pnfs_mds_id(gsp->dsid);
++	if (!mdp) {
++		mdp = alloc_init_mds_id(gsp);
++		if (!mdp)
++			return NULL;
++	} else {
++		get_ds_mdsid(mdp);
++	}
++
++	dcp = kmalloc(sizeof(*dcp), GFP_KERNEL);
++	if (!dcp)
++		return NULL;
++
++	INIT_LIST_HEAD(&dcp->dc_hash);
++	INIT_LIST_HEAD(&dcp->dc_stateid);
++	INIT_LIST_HEAD(&dcp->dc_permdsid);
++	list_add(&dcp->dc_hash, &mds_clid_hashtbl[hashval]);
++	list_add(&dcp->dc_permdsid, &mdp->di_mdsclid);
++	dcp->dc_mdsclid = *clid;
++	kref_init(&dcp->dc_ref);
++	dcp->dc_mdsid = gsp->dsid;
++	return dcp;
++}
++
++static struct pnfs_ds_stateid *
++alloc_init_ds_stateid(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct pnfs_ds_stateid *dsp;
++	u32 st_id = stidp->si_stateownerid;
++	u32 f_id  = stidp->si_fileid;
++	unsigned int hashval;
++
++	dprintk("pNFSD: %s\n", __func__);
++
++	dsp = kmalloc(sizeof(*dsp), GFP_KERNEL);
++	if (!dsp)
++		return dsp;
++
++	INIT_LIST_HEAD(&dsp->ds_hash);
++	INIT_LIST_HEAD(&dsp->ds_perclid);
++	memcpy(&dsp->ds_stid, stidp, sizeof(stateid_t));
++	fh_copy_shallow(&dsp->ds_fh, &cfh->fh_handle);
++	dsp->ds_access = 0;
++	dsp->ds_status = 0;
++	dsp->ds_flags = 0L;
++	kref_init(&dsp->ds_ref);
++	set_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	clear_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	init_waitqueue_head(&dsp->ds_waitq);
++
++	hashval = stateid_hashval(st_id, f_id);
++	list_add(&dsp->ds_hash, &ds_stid_hashtbl[hashval]);
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++static int
++update_ds_stateid(struct pnfs_ds_stateid *dsp, struct svc_fh *cfh,
++		  struct pnfs_get_state *gsp)
++{
++	struct pnfs_ds_clientid *dcp;
++	int new = 0;
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	dcp = find_pnfs_ds_clientid((clientid_t *)&gsp->clid);
++	if (!dcp) {
++		dcp = alloc_init_ds_clientid(gsp);
++		if (!dcp)
++			return 1;
++		new = 1;
++	}
++	if (test_bit(DS_STATEID_NEW, &dsp->ds_flags)) {
++		list_add(&dsp->ds_perclid, &dcp->dc_stateid);
++		if (!new)
++			get_ds_clientid(dcp);
++	}
++
++	memcpy(&dsp->ds_stid, &gsp->stid, sizeof(stateid_t));
++	dsp->ds_access = gsp->access;
++	dsp->ds_status = 0;
++	dsp->ds_verifier[0] = gsp->verifier[0];
++	dsp->ds_verifier[1] = gsp->verifier[1];
++	memcpy(&dsp->ds_mdsclid, &gsp->clid, sizeof(clientid_t));
++	set_bit(DS_STATEID_VALID, &dsp->ds_flags);
++	clear_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++	clear_bit(DS_STATEID_NEW, &dsp->ds_flags);
++	return 0;
++}
++
++int
++nfs4_pnfs_cb_change_state(struct pnfs_get_state *gs)
++{
++	stateid_t *stid = (stateid_t *)&gs->stid;
++	struct pnfs_ds_stateid *dsp;
++
++	dprintk("pNFSD: %s stateid=" STATEID_FMT "\n", __func__,
++		STATEID_VAL(stid));
++
++	ds_lock_state();
++	dsp = find_pnfs_ds_stateid(stid);
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++
++	dprintk("pNFSD: %s dsp %p\n", __func__, dsp);
++
++	if (dsp)
++		return 0;
++	return -ENOENT;
++}
++
++/* Retrieves and validates stateid.
++ * If stateid exists and its fields match, return it.
++ * If stateid exists but either the generation or
++ * ownerids don't match, check with mds to see if it is valid.
++ * If the stateid doesn't exist, the first thread creates a
++ * invalid *marker* stateid, then checks to see if the
++ * stateid exists on the mds.  If so, it validates the *marker*
++ * stateid and updates its fields.  Subsequent threads that
++ * find the *marker* stateid wait until it is valid or an error
++ * occurs.
++ * Called with ds_state_lock.
++ */
++static struct pnfs_ds_stateid *
++nfsv4_ds_get_state(struct svc_fh *cfh, stateid_t *stidp)
++{
++	struct inode *ino = cfh->fh_dentry->d_inode;
++	struct super_block *sb;
++	struct pnfs_ds_stateid *dsp = NULL;
++	struct pnfs_get_state gs = {
++		.access = 0,
++	};
++	int status = 0, waiter = 0;
++
++	dprintk("pNFSD: %s -->\n", __func__);
++
++	dsp = find_pnfs_ds_stateid(stidp);
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags) &&
++	    (stidp->si_generation == dsp->ds_stid.si_generation))
++		goto out_noput;
++
++	sb = ino->i_sb;
++	if (!sb || !sb->s_pnfs_op->get_state)
++		goto out_noput;
++
++	/* Uninitialize current state if it exists yet it doesn't match.
++	 * If it is already invalid, another thread is checking state */
++	if (dsp) {
++		if (!test_and_clear_bit(DS_STATEID_VALID, &dsp->ds_flags))
++			waiter = 1;
++	} else {
++		dsp = alloc_init_ds_stateid(cfh, stidp);
++		if (!dsp)
++			goto out_noput;
++	}
++
++	dprintk("pNFSD: %s Starting loop\n", __func__);
++	get_ds_stateid(dsp);
++	while (!test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		ds_unlock_state();
++
++		/* Another thread is checking the state */
++		if (waiter) {
++			dprintk("pNFSD: %s waiting\n", __func__);
++			wait_event_interruptible_timeout(dsp->ds_waitq,
++				(test_bit(DS_STATEID_VALID, &dsp->ds_flags) ||
++				 test_bit(DS_STATEID_ERROR, &dsp->ds_flags)),
++				 msecs_to_jiffies(1024));
++			dprintk("pNFSD: %s awake\n", __func__);
++			ds_lock_state();
++			if (test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++				goto out;
++
++			continue;
++		}
++
++		/* Validate stateid on mds */
++		dprintk("pNFSD: %s Checking state on MDS\n", __func__);
++		memcpy(&gs.stid, stidp, sizeof(stateid_t));
++		status = sb->s_pnfs_op->get_state(ino, &cfh->fh_handle, &gs);
++		dprintk("pNFSD: %s from MDS status %d\n", __func__, status);
++		ds_lock_state();
++		/* if !status and stateid is valid, update id and mark valid */
++		if (status || update_ds_stateid(dsp, cfh, &gs)) {
++			set_bit(DS_STATEID_ERROR, &dsp->ds_flags);
++			/* remove invalid stateid from list */
++			put_ds_stateid(dsp);
++			wake_up(&dsp->ds_waitq);
++			goto out;
++		}
++
++		wake_up(&dsp->ds_waitq);
++	}
++out:
++	if (dsp)
++		put_ds_stateid(dsp);
++out_noput:
++	if (dsp)
++		dprintk("pNFSD: %s <-- dsp %p ds_flags %lx " STATEID_FMT "\n",
++			__func__, dsp, dsp->ds_flags, STATEID_VAL(&dsp->ds_stid));
++	/* If error, return null */
++	if (dsp && test_bit(DS_STATEID_ERROR, &dsp->ds_flags))
++		dsp = NULL;
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return dsp;
++}
++
++int
++nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *cfh, stateid_t *stateid)
++{
++	struct pnfs_ds_stateid *dsp;
++	int status = 0;
++
++	dprintk("pNFSD: %s --> " STATEID_FMT "\n", __func__,
++		STATEID_VAL(stateid));
++
++	/* Must release state lock while verifying stateid on mds */
++	nfs4_unlock_state();
++	ds_lock_state();
++	dsp = nfsv4_ds_get_state(cfh, stateid);
++	if (dsp) {
++		get_ds_stateid(dsp);
++		dprintk("pNFSD: %s Found " STATEID_FMT "\n", __func__,
++			STATEID_VAL(&dsp->ds_stid));
++
++		dprintk("NFSD: %s: dsp %p fh_size %u:%u "
++			"fh [%08x:%08x:%08x:%08x]:[%08x:%08x:%08x:%08x] "
++			"gen %x:%x\n",
++			__func__, dsp,
++			cfh->fh_handle.fh_size, dsp->ds_fh.fh_size,
++			((unsigned *)&cfh->fh_handle.fh_base)[0],
++			((unsigned *)&cfh->fh_handle.fh_base)[1],
++			((unsigned *)&cfh->fh_handle.fh_base)[2],
++			((unsigned *)&cfh->fh_handle.fh_base)[3],
++			((unsigned *)&dsp->ds_fh.fh_base)[0],
++			((unsigned *)&dsp->ds_fh.fh_base)[1],
++			((unsigned *)&dsp->ds_fh.fh_base)[2],
++			((unsigned *)&dsp->ds_fh.fh_base)[3],
++			stateid->si_generation, dsp->ds_stid.si_generation);
++	}
++
++	if (!dsp ||
++	    (cfh->fh_handle.fh_size != dsp->ds_fh.fh_size) ||
++	    (memcmp(&cfh->fh_handle.fh_base, &dsp->ds_fh.fh_base,
++		    dsp->ds_fh.fh_size) != 0) ||
++	    (stateid->si_generation > dsp->ds_stid.si_generation))
++		status = nfserr_bad_stateid;
++	else if (stateid->si_generation < dsp->ds_stid.si_generation)
++		status = nfserr_old_stateid;
++
++	if (dsp)
++		put_ds_stateid(dsp);
++	ds_unlock_state();
++	nfs4_lock_state();
++	dprintk("pNFSD: %s <-- status %d\n", __func__, be32_to_cpu(status));
++	return status;
++}
++
++void
++nfs4_ds_get_verifier(stateid_t *stateid, struct super_block *sb, u32 *p)
++{
++	struct pnfs_ds_stateid *dsp = NULL;
++
++	dprintk("pNFSD: %s --> stid %p\n", __func__, stateid);
++
++	ds_lock_state();
++	if (stateid != NULL) {
++		dsp = find_pnfs_ds_stateid(stateid);
++		if (dsp)
++			get_ds_stateid(dsp);
++	}
++
++	/* XXX: Should we fetch the stateid or wait if some other
++	 * thread is currently retrieving the stateid ? */
++	if (dsp && test_bit(DS_STATEID_VALID, &dsp->ds_flags)) {
++		*p++ = dsp->ds_verifier[0];
++		*p++ = dsp->ds_verifier[1];
++		put_ds_stateid(dsp);
++	} else {
++		/* must be on MDS */
++		ds_unlock_state();
++		sb->s_pnfs_op->get_verifier(sb, p);
++		ds_lock_state();
++		p += 2;
++	}
++	ds_unlock_state();
++	dprintk("pNFSD: %s <-- dsp %p\n", __func__, dsp);
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4proc.c.orig	2010-09-30 10:15:18.334728000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4proc.c	2010-09-30 10:17:08.878998000 -0400
+@@ -34,10 +34,14 @@
+  */
+ #include <linux/file.h>
+ #include <linux/slab.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "cache.h"
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_PROC
+ 
+@@ -372,6 +376,24 @@ nfsd4_open(struct svc_rqst *rqstp, struc
+ 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ 	 */
+ 	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
++#if defined(CONFIG_SPNFS)
++	if (!status && spnfs_enabled()) {
++		struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
++
++		status = spnfs_open(inode, open);
++		if (status) {
++			dprintk(
++			     "nfsd: pNFS could not be enabled for inode: %lu\n",
++			     inode->i_ino);
++			/*
++			 * XXX When there's a failure then need to indicate to
++			 * future ops that no pNFS is available.  Should I save
++			 * the status in the inode?  It's kind of a big hammer.
++			 * But there may be no stripes available?
++			 */
++		}
++	}
++#endif /* CONFIG_SPNFS */
+ out:
+ 	if (open->op_stateowner) {
+ 		nfs4_get_stateowner(open->op_stateowner);
+@@ -454,16 +476,30 @@ nfsd4_access(struct svc_rqst *rqstp, str
+ 			   &access->ac_supported);
+ }
+ 
++static void
++nfsd4_get_verifier(struct super_block *sb, nfs4_verifier *verf)
++{
++	u32 *p = (u32 *)verf->data;
++
++#if defined(CONFIG_PNFSD)
++	if (sb->s_pnfs_op && sb->s_pnfs_op->get_verifier) {
++		nfs4_ds_get_verifier(NULL, sb, p);
++		return;
++	}
++#endif /* CONFIG_PNFSD */
++
++	*p++ = nfssvc_boot.tv_sec;
++	*p++ = nfssvc_boot.tv_usec;
++}
++
+ static __be32
+ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 	     struct nfsd4_commit *commit)
+ {
+ 	__be32 status;
+ 
+-	u32 *p = (u32 *)commit->co_verf.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+-
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &commit->co_verf);
+ 	status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
+ 			     commit->co_count);
+ 	if (status == nfserr_symlink)
+@@ -816,7 +852,6 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ {
+ 	stateid_t *stateid = &write->wr_stateid;
+ 	struct file *filp = NULL;
+-	u32 *p;
+ 	__be32 status = nfs_ok;
+ 	unsigned long cnt;
+ 
+@@ -838,13 +873,49 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ 
+ 	cnt = write->wr_buflen;
+ 	write->wr_how_written = write->wr_stable_how;
+-	p = (u32 *)write->wr_verifier.data;
+-	*p++ = nfssvc_boot.tv_sec;
+-	*p++ = nfssvc_boot.tv_usec;
+ 
++	nfsd4_get_verifier(cstate->current_fh.fh_dentry->d_inode->i_sb,
++			   &write->wr_verifier);
++#if defined(CONFIG_SPNFS)
++#if defined(CONFIG_SPNFS_BLOCK)
++	if (pnfs_block_enabled(cstate->current_fh.fh_dentry->d_inode, 0)) {
++                status = bl_layoutrecall(cstate->current_fh.fh_dentry->d_inode,
++		    RETURN_FILE, write->wr_offset, write->wr_buflen);
++                if (!status) {
++                        status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++                }
++        } else
++#endif
++		
++	if (spnfs_enabled()) {
++		status = spnfs_write(cstate->current_fh.fh_dentry->d_inode,
++			write->wr_offset, write->wr_buflen, write->wr_vlen,
++			rqstp);
++		if (status == nfs_ok) {
++			/* DMXXX: HACK to get filesize set */
++			/* write one byte at offset+length-1 */
++			struct kvec k[1];
++			char zero = 0;
++			unsigned long cnt = 1;
++
++			k[0].iov_base = (void *)&zero;
++			k[0].iov_len = 1;
++			nfsd_write(rqstp, &cstate->current_fh, filp,
++				   write->wr_offset+write->wr_buflen-1, k, 1,
++				   &cnt, &write->wr_how_written);
++		}
++	} else /* we're not an MDS */
++		status =  nfsd_write(rqstp, &cstate->current_fh, filp,
++			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
++			     &cnt, &write->wr_how_written);
++#else
+ 	status =  nfsd_write(rqstp, &cstate->current_fh, filp,
+ 			     write->wr_offset, rqstp->rq_vec, write->wr_vlen,
+ 			     &cnt, &write->wr_how_written);
++#endif /* CONFIG_SPNFS */
++
+ 	if (filp)
+ 		fput(filp);
+ 
+@@ -935,6 +1006,306 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+ 	return status == nfserr_same ? nfs_ok : status;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++static __be32
++nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
++		    unsigned int layout_type)
++{
++	int status, type;
++
++	/* check to see if pNFS  is supported. */
++	status = nfserr_layoutunavailable;
++	if (exp && exp->ex_pnfs == 0) {
++		dprintk("%s: Underlying file system "
++			"is not exported over pNFS\n", __func__);
++		goto out;
++	}
++	if (!sb->s_pnfs_op || !sb->s_pnfs_op->layout_type) {
++		dprintk("%s: Underlying file system "
++			"does not support pNFS\n", __func__);
++		goto out;
++	}
++
++	type = sb->s_pnfs_op->layout_type(sb);
++
++	/* check to see if requested layout type is supported. */
++	status = nfserr_unknown_layouttype;
++	if (!type)
++		dprintk("BUG: %s: layout_type 0 is reserved and must not be "
++			"used by filesystem\n", __func__);
++	else if (type != layout_type)
++		dprintk("%s: requested layout type %d "
++		       "does not match supported type %d\n",
++			__func__, layout_type, type);
++	else
++		status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevlist(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevlist *gdlp)
++{
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++	int status;
++
++	dprintk("%s: type %u maxdevices %u cookie %llu verf %llu\n",
++		__func__, gdlp->gd_layout_type, gdlp->gd_maxdevices,
++		gdlp->gd_cookie, gdlp->gd_verf);
++
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* We must be able to encode at list one device */
++	if (!gdlp->gd_maxdevices)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     gdlp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Do nothing if underlying file system does not support
++	 * getdevicelist */
++	if (!sb->s_pnfs_op->get_device_iter) {
++		status = nfserr_notsupp;
++		goto out;
++	}
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdlp->gd_fhp = &cstate->current_fh;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_getdevinfo(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_getdevinfo *gdp)
++{
++	struct super_block *sb;
++	int status;
++	clientid_t clid;
++
++	dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
++	       __func__, gdp->gd_layout_type, gdp->gd_devid.sbid,
++	       gdp->gd_devid.devid, gdp->gd_maxcount);
++
++	status = nfserr_inval;
++	sb = find_sbid_id(gdp->gd_devid.sbid);
++	dprintk("%s: sb %p\n", __func__, sb);
++	if (!sb) {
++		status = nfserr_noent;
++		goto out;
++	}
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, NULL, gdp->gd_layout_type);
++	if (status)
++		goto out;
++
++	/* Set up arguments so device can be retrieved at encode time */
++	gdp->gd_sb = sb;
++
++	/* Update notifications */
++	copy_clientid(&clid, cstate->session);
++	pnfs_set_device_notify(&clid, gdp->gd_notify_types);
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutget(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutget *lgp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lgp->lg_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_badiomode;
++	if (lgp->lg_seg.iomode != IOMODE_READ &&
++	    lgp->lg_seg.iomode != IOMODE_RW) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lgp->lg_seg.iomode);
++		goto out;
++	}
++
++	/* Set up arguments so layout can be retrieved at encode time */
++	lgp->lg_fhp = current_fh;
++	copy_clientid((clientid_t *)&lgp->lg_seg.clientid, cstate->session);
++	status = nfs_ok;
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutcommit(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	int status;
++	struct inode *ino = NULL;
++	struct iattr ia;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	dprintk("NFSD: nfsd4_layoutcommit \n");
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	ino = current_fh->fh_dentry->d_inode;
++	if (!ino)
++		goto out;
++
++	status = nfserr_inval;
++	sb = ino->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lcp->args.lc_seg.layout_type);
++	if (status)
++		goto out;
++
++	/* This will only extend the file length.  Do a quick
++	 * check to see if there is any point in waiting for the update
++	 * locks.
++	 * TODO: Is this correct for all back ends?
++	 */
++	dprintk("%s:new offset: %d new size: %llu old size: %lld\n",
++		__func__, lcp->args.lc_newoffset, lcp->args.lc_last_wr + 1,
++		ino->i_size);
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lcp->args.lc_seg.clientid, cstate->session);
++	lcp->res.lc_size_chg = 0;
++	if (sb->s_pnfs_op->layout_commit) {
++		status = sb->s_pnfs_op->layout_commit(ino, &lcp->args, &lcp->res);
++		dprintk("%s:layout_commit result %d\n", __func__, status);
++	} else {
++		fh_lock(current_fh);
++		if ((lcp->args.lc_newoffset == 0) ||
++		    ((lcp->args.lc_last_wr + 1) <= ino->i_size)) {
++			status = 0;
++			lcp->res.lc_size_chg = 0;
++			fh_unlock(current_fh);
++			goto out;
++		}
++
++		/* Try our best to update the file size */
++		dprintk("%s: Modifying file size\n", __func__);
++		ia.ia_valid = ATTR_SIZE;
++		ia.ia_size = lcp->args.lc_last_wr + 1;
++		status = notify_change(current_fh->fh_dentry, &ia);
++		fh_unlock(current_fh);
++		dprintk("%s:notify_change result %d\n", __func__, status);
++	}
++
++	if (!status && lcp->res.lc_size_chg &&
++	    EX_ISSYNC(current_fh->fh_export)) {
++		dprintk("%s: Synchronously writing inode size %llu\n",
++			__func__, ino->i_size);
++		write_inode_now(ino, 1);
++		lcp->res.lc_newsize = i_size_read(ino);
++	}
++out:
++	return status;
++}
++
++static __be32
++nfsd4_layoutreturn(struct svc_rqst *rqstp,
++		struct nfsd4_compound_state *cstate,
++		struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	int status;
++	struct super_block *sb;
++	struct svc_fh *current_fh = &cstate->current_fh;
++
++	status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (!sb)
++		goto out;
++
++	/* Ensure underlying file system supports pNFS and,
++	 * if so, the requested layout type
++	 */
++	status = nfsd4_layout_verify(sb, current_fh->fh_export,
++				     lrp->args.lr_seg.layout_type);
++	if (status)
++		goto out;
++
++	status = nfserr_inval;
++	if (lrp->args.lr_return_type != RETURN_FILE &&
++	    lrp->args.lr_return_type != RETURN_FSID &&
++	    lrp->args.lr_return_type != RETURN_ALL) {
++		dprintk("pNFS %s: invalid return_type %d\n", __func__,
++			lrp->args.lr_return_type);
++		goto out;
++	}
++
++	status = nfserr_inval;
++	if (lrp->args.lr_seg.iomode != IOMODE_READ &&
++	    lrp->args.lr_seg.iomode != IOMODE_RW &&
++	    lrp->args.lr_seg.iomode != IOMODE_ANY) {
++		dprintk("pNFS %s: invalid iomode %d\n", __func__,
++			lrp->args.lr_seg.iomode);
++		goto out;
++	}
++
++	/* Set clientid from sessionid */
++	copy_clientid((clientid_t *)&lrp->args.lr_seg.clientid, cstate->session);
++	lrp->lrs_present = (lrp->args.lr_return_type == RETURN_FILE);
++	status = nfs4_pnfs_return_layout(sb, current_fh, lrp);
++out:
++	dprintk("pNFS %s: status %d return_type 0x%x lrs_present %d\n",
++		__func__, status, lrp->args.lr_return_type, lrp->lrs_present);
++	return status;
++}
++#endif /* CONFIG_PNFSD */
++
+ /*
+  * NULL call.
+  */
+@@ -1317,6 +1688,29 @@ static struct nfsd4_operation nfsd4_ops[
+ 		.op_flags = ALLOWED_WITHOUT_FH,
+ 		.op_name = "OP_RECLAIM_COMPLETE",
+ 	},
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICELIST] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevlist,
++		.op_name = "OP_GETDEVICELIST",
++	},
++	[OP_GETDEVICEINFO] = {
++		.op_func = (nfsd4op_func)nfsd4_getdevinfo,
++		.op_flags = ALLOWED_WITHOUT_FH,
++		.op_name = "OP_GETDEVICEINFO",
++	},
++	[OP_LAYOUTGET] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutget,
++		.op_name = "OP_LAYOUTGET",
++	},
++	[OP_LAYOUTCOMMIT] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
++		.op_name = "OP_LAYOUTCOMMIT",
++	},
++	[OP_LAYOUTRETURN] = {
++		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
++		.op_name = "OP_LAYOUTRETURN",
++	},
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static const char *nfsd4_op_name(unsigned opnum)
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4state.c.orig	2010-09-30 10:15:18.345729000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4state.c	2010-09-30 10:17:08.887003000 -0400
+@@ -42,6 +42,8 @@
+ #include "xdr4.h"
+ #include "vfs.h"
+ 
++#include "pnfsd.h"
++
+ #define NFSDDBG_FACILITY                NFSDDBG_PROC
+ 
+ /* Globals */
+@@ -60,8 +62,6 @@ static u64 current_sessionid = 1;
+ #define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+ 
+ /* forward declarations */
+-static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+-static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+ static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+ static void nfs4_set_recdir(char *recdir);
+ 
+@@ -69,6 +69,7 @@ static void nfs4_set_recdir(char *recdir
+ 
+ /* Currently used for almost all code touching nfsv4 state: */
+ static DEFINE_MUTEX(client_mutex);
++struct task_struct *client_mutex_owner;
+ 
+ /*
+  * Currently used for the del_recall_lru and file hash table.  In an
+@@ -86,11 +87,21 @@ void
+ nfs4_lock_state(void)
+ {
+ 	mutex_lock(&client_mutex);
++	client_mutex_owner = current;
++}
++
++#define BUG_ON_UNLOCKED_STATE() BUG_ON(client_mutex_owner != current)
++
++void
++nfs4_bug_on_unlocked_state(void)
++{
++	BUG_ON(client_mutex_owner != current);
+ }
+ 
+ void
+ nfs4_unlock_state(void)
+ {
++	client_mutex_owner = NULL;
+ 	mutex_unlock(&client_mutex);
+ }
+ 
+@@ -109,7 +120,7 @@ opaque_hashval(const void *ptr, int nbyt
+ 
+ static struct list_head del_recall_lru;
+ 
+-static inline void
++inline void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+ 	if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+@@ -120,7 +131,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ 	}
+ }
+ 
+-static inline void
++inline void
+ get_nfs4_file(struct nfs4_file *fi)
+ {
+ 	atomic_inc(&fi->fi_ref);
+@@ -230,7 +241,10 @@ nfs4_close_delegation(struct nfs4_delega
+ 	 * but we want to remove the lease in any case. */
+ 	if (dp->dl_flock)
+ 		vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(filp);
++	nfs4_lock_state();
+ }
+ 
+ /* Called under the state lock. */
+@@ -266,8 +280,8 @@ static DEFINE_SPINLOCK(client_lock);
+  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+  * used in reboot/reset lease grace period processing
+  *
+- * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+- * setclientid_confirmed info. 
++ * conf_id_hashtbl[], and conf_str_hashtbl[] hold
++ * confirmed setclientid_confirmed info.
+  *
+  * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed 
+  * setclientid info.
+@@ -292,6 +306,7 @@ static void unhash_generic_stateid(struc
+ 	list_del(&stp->st_hash);
+ 	list_del(&stp->st_perfile);
+ 	list_del(&stp->st_perstateowner);
++	release_pnfs_ds_dev_list(stp);
+ }
+ 
+ static void free_generic_stateid(struct nfs4_stateid *stp)
+@@ -345,7 +360,10 @@ static void release_open_stateid(struct 
+ {
+ 	unhash_generic_stateid(stp);
+ 	release_stateid_lockowners(stp);
++	BUG_ON_UNLOCKED_STATE();
++	nfs4_unlock_state();	/* allow nested layout recall/return */
+ 	nfsd_close(stp->st_vfs_file);
++	nfs4_lock_state();
+ 	free_generic_stateid(stp);
+ }
+ 
+@@ -739,6 +757,8 @@ expire_client(struct nfs4_client *clp)
+ 	struct nfs4_delegation *dp;
+ 	struct list_head reaplist;
+ 
++	BUG_ON_UNLOCKED_STATE();
++
+ 	INIT_LIST_HEAD(&reaplist);
+ 	spin_lock(&recall_lock);
+ 	while (!list_empty(&clp->cl_delegations)) {
+@@ -758,6 +778,7 @@ expire_client(struct nfs4_client *clp)
+ 		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
+ 		release_openowner(sop);
+ 	}
++	pnfs_expire_client(clp);
+ 	nfsd4_set_callback_client(clp, NULL);
+ 	if (clp->cl_cb_conn.cb_xprt)
+ 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+@@ -770,6 +791,13 @@ expire_client(struct nfs4_client *clp)
+ 	spin_unlock(&client_lock);
+ }
+ 
++void expire_client_lock(struct nfs4_client *clp)
++{
++	nfs4_lock_state();
++	expire_client(clp);
++	nfs4_unlock_state();
++}
++
+ static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
+ {
+ 	memcpy(target->cl_verifier.data, source->data,
+@@ -859,6 +887,11 @@ static struct nfs4_client *create_client
+ 	INIT_LIST_HEAD(&clp->cl_strhash);
+ 	INIT_LIST_HEAD(&clp->cl_openowners);
+ 	INIT_LIST_HEAD(&clp->cl_delegations);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&clp->cl_layouts);
++	INIT_LIST_HEAD(&clp->cl_layoutrecalls);
++	atomic_set(&clp->cl_deviceref, 0);
++#endif /* CONFIG_PNFSD */
+ 	INIT_LIST_HEAD(&clp->cl_sessions);
+ 	INIT_LIST_HEAD(&clp->cl_lru);
+ 	clp->cl_time = get_seconds();
+@@ -908,7 +941,7 @@ move_to_confirmed(struct nfs4_client *cl
+ 	renew_client(clp);
+ }
+ 
+-static struct nfs4_client *
++struct nfs4_client *
+ find_confirmed_client(clientid_t *clid)
+ {
+ 	struct nfs4_client *clp;
+@@ -978,6 +1011,24 @@ find_unconfirmed_client_by_str(const cha
+ 	return NULL;
+ }
+ 
++int
++filter_confirmed_clients(int (* func)(struct nfs4_client *, void *),
++			 void *arg)
++{
++	struct nfs4_client *clp, *next;
++	int i, status = 0;
++
++	for (i = 0; i < CLIENT_HASH_SIZE; i++)
++		list_for_each_entry_safe (clp, next, &conf_str_hashtbl[i],
++					  cl_strhash) {
++			status = func(clp, arg);
++			if (status)
++				break;
++		}
++
++	return status;
++}
++
+ static void
+ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
+ {
+@@ -1110,8 +1161,12 @@ nfsd4_replay_cache_entry(struct nfsd4_co
+ static void
+ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
+ {
+-	/* pNFS is not supported */
++#if defined(CONFIG_PNFSD)
++	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
++				  EXCHGID4_FLAG_USE_PNFS_DS;
++#else  /* CONFIG_PNFSD */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
++#endif /* CONFIG_PNFSD */
+ 
+ 	/* Referrals are supported, Migration is not. */
+ 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
+@@ -1301,6 +1356,13 @@ nfsd4_create_session(struct svc_rqst *rq
+ 	struct nfsd4_clid_slot *cs_slot = NULL;
+ 	int status = 0;
+ 
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++	/* XXX hack to get local ip address */
++	memcpy(&pnfsd_lexp_addr, &rqstp->rq_xprt->xpt_local,
++		sizeof(pnfsd_lexp_addr));
++	pnfs_lexp_addr_len = rqstp->rq_xprt->xpt_locallen;
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
+ 	nfs4_lock_state();
+ 	unconf = find_unconfirmed_client(&cr_ses->clientid);
+ 	conf = find_confirmed_client(&cr_ses->clientid);
+@@ -1340,25 +1402,26 @@ nfsd4_create_session(struct svc_rqst *rq
+ 		cs_slot->sl_seqid++; /* from 0 to 1 */
+ 		move_to_confirmed(unconf);
+ 
+-		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+-			unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
+-			svc_xprt_get(rqstp->rq_xprt);
+-			rpc_copy_addr(
+-				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+-				sa);
+-			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+-			unconf->cl_cb_conn.cb_minorversion =
+-				cstate->minorversion;
+-			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+-			unconf->cl_cb_seq_nr = 1;
+-			nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
+-		}
++		if (is_ds_only_session(unconf->cl_exchange_flags))
++			cr_ses->flags &= ~SESSION4_BACK_CHAN;
++
+ 		conf = unconf;
+ 	} else {
+ 		status = nfserr_stale_clientid;
+ 		goto out;
+ 	}
+ 
++	if (cr_ses->flags & SESSION4_BACK_CHAN) {
++		conf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
++		svc_xprt_get(rqstp->rq_xprt);
++		rpc_copy_addr((struct sockaddr *)&conf->cl_cb_conn.cb_addr, sa);
++		conf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
++		conf->cl_cb_conn.cb_minorversion = cstate->minorversion;
++		conf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
++		conf->cl_cb_seq_nr = 1;
++		nfsd4_probe_callback(conf, &conf->cl_cb_conn);
++	}
++
+ 	/*
+ 	 * We do not support RDMA or persistent sessions
+ 	 */
+@@ -1746,7 +1809,7 @@ out:
+ 
+ /* OPEN Share state helper functions */
+ static inline struct nfs4_file *
+-alloc_init_file(struct inode *ino)
++alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
+ {
+ 	struct nfs4_file *fp;
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1760,6 +1823,16 @@ alloc_init_file(struct inode *ino)
+ 		fp->fi_inode = igrab(ino);
+ 		fp->fi_id = current_fileid++;
+ 		fp->fi_had_conflict = false;
++#if defined(CONFIG_PNFSD)
++		INIT_LIST_HEAD(&fp->fi_layouts);
++		INIT_LIST_HEAD(&fp->fi_layout_states);
++		fp->fi_fsid.major = current_fh->fh_export->ex_fsid;
++		fp->fi_fsid.minor = 0;
++		fp->fi_fhlen = current_fh->fh_handle.fh_size;
++		BUG_ON(fp->fi_fhlen > sizeof(fp->fi_fhval));
++		memcpy(fp->fi_fhval, &current_fh->fh_handle.fh_base,
++		       fp->fi_fhlen);
++#endif /* CONFIG_PNFSD */
+ 		spin_lock(&recall_lock);
+ 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ 		spin_unlock(&recall_lock);
+@@ -1768,7 +1841,7 @@ alloc_init_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
+-static void
++void
+ nfsd4_free_slab(struct kmem_cache **slab)
+ {
+ 	if (*slab == NULL)
+@@ -1784,6 +1857,7 @@ nfsd4_free_slabs(void)
+ 	nfsd4_free_slab(&file_slab);
+ 	nfsd4_free_slab(&stateid_slab);
+ 	nfsd4_free_slab(&deleg_slab);
++	nfsd4_free_pnfs_slabs();
+ }
+ 
+ static int
+@@ -1805,6 +1879,8 @@ nfsd4_init_slabs(void)
+ 			sizeof(struct nfs4_delegation), 0, 0, NULL);
+ 	if (deleg_slab == NULL)
+ 		goto out_nomem;
++	if (nfsd4_init_pnfs_slabs())
++		goto out_nomem;
+ 	return 0;
+ out_nomem:
+ 	nfsd4_free_slabs();
+@@ -1878,6 +1954,9 @@ init_stateid(struct nfs4_stateid *stp, s
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners);
+ 	INIT_LIST_HEAD(&stp->st_perfile);
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+@@ -1919,6 +1998,7 @@ find_openstateowner_str(unsigned int has
+ {
+ 	struct nfs4_stateowner *so = NULL;
+ 
++	BUG_ON_UNLOCKED_STATE();
+ 	list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ 		if (same_owner_str(so, &open->op_owner, &open->op_clientid))
+ 			return so;
+@@ -1927,7 +2007,7 @@ find_openstateowner_str(unsigned int has
+ }
+ 
+ /* search file_hashtbl[] for file */
+-static struct nfs4_file *
++struct nfs4_file *
+ find_file(struct inode *ino)
+ {
+ 	unsigned int hashval = file_hashval(ino);
+@@ -1945,6 +2025,18 @@ find_file(struct inode *ino)
+ 	return NULL;
+ }
+ 
++struct nfs4_file *
++find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
++{
++	struct nfs4_file *fp;
++
++	fp = find_file(ino);
++	if (fp)
++		return fp;
++
++	return alloc_init_file(ino, current_fh);
++}
++
+ static inline int access_valid(u32 x, u32 minorversion)
+ {
+ 	if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
+@@ -2503,7 +2595,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ 		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+ 			goto out;
+ 		status = nfserr_resource;
+-		fp = alloc_init_file(ino);
++		fp = alloc_init_file(ino, current_fh);
+ 		if (fp == NULL)
+ 			goto out;
+ 	}
+@@ -2730,7 +2822,7 @@ nfs4_check_fh(struct svc_fh *fhp, struct
+ 	return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
+ }
+ 
+-static int
++int
+ STALE_STATEID(stateid_t *stateid)
+ {
+ 	if (stateid->si_boot == boot_time)
+@@ -2740,6 +2832,16 @@ STALE_STATEID(stateid_t *stateid)
+ 	return 1;
+ }
+ 
++__be32
++nfs4_check_stateid(stateid_t *stateid)
++{
++	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++		return nfserr_bad_stateid;
++	if (STALE_STATEID(stateid))
++		return nfserr_stale_stateid;
++	return 0;
++}
++
+ static inline int
+ access_permit_read(unsigned long access_bmap)
+ {
+@@ -2848,6 +2950,24 @@ nfs4_preprocess_stateid_op(struct nfsd4_
+ 	if (grace_disallows_io(ino))
+ 		return nfserr_grace;
+ 
++#if defined(CONFIG_PNFSD)
++	if (pnfs_fh_is_ds(&current_fh->fh_handle)) {
++		if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
++			status = nfserr_bad_stateid;
++		else
++#ifdef CONFIG_GFS2_FS_LOCKING_DLM
++		{
++			dprintk("%s Don't check DS stateid\n", __func__);
++			return 0;
++		}
++#else /* CONFIG_GFS2_FS_LOCKING_DLM */
++			status = nfs4_preprocess_pnfs_ds_stateid(current_fh,
++								 stateid);
++#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
++		goto out;
++	}
++#endif /* CONFIG_PNFSD */
++
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 
+@@ -2924,13 +3044,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_co
+ 	*stpp = NULL;
+ 	*sopp = NULL;
+ 
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+-		dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
+-		return nfserr_bad_stateid;
+-	}
+-
+-	if (STALE_STATEID(stateid))
+-		return nfserr_stale_stateid;
++	status = nfs4_check_stateid(stateid);
++	if (status)
++		return status;
+ 
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+@@ -3205,11 +3321,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp
+ 	if (nfsd4_has_session(cstate))
+ 		flags |= HAS_SESSION;
+ 	nfs4_lock_state();
+-	status = nfserr_bad_stateid;
+-	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+-		goto out;
+-	status = nfserr_stale_stateid;
+-	if (STALE_STATEID(stateid))
++	status = nfs4_check_stateid(stateid);
++	if (status)
+ 		goto out;
+ 	status = nfserr_bad_stateid;
+ 	if (!is_delegation_stateid(stateid))
+@@ -3238,26 +3351,6 @@ out:
+ #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
+ #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+ 
+-static inline u64
+-end_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	end = start + len;
+-	return end >= start ? end: NFS4_MAX_UINT64;
+-}
+-
+-/* last octet in a range */
+-static inline u64
+-last_byte_offset(u64 start, u64 len)
+-{
+-	u64 end;
+-
+-	BUG_ON(!len);
+-	end = start + len;
+-	return end > start ? end - 1: NFS4_MAX_UINT64;
+-}
+-
+ #define lockownerid_hashval(id) \
+         ((id) & LOCK_HASH_MASK)
+ 
+@@ -3274,7 +3367,7 @@ static struct list_head lock_ownerid_has
+ static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+ static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+ 
+-static struct nfs4_stateid *
++struct nfs4_stateid *
+ find_stateid(stateid_t *stid, int flags)
+ {
+ 	struct nfs4_stateid *local;
+@@ -3303,7 +3396,7 @@ find_stateid(stateid_t *stid, int flags)
+ 	return NULL;
+ }
+ 
+-static struct nfs4_delegation *
++struct nfs4_delegation *
+ find_delegation_stateid(struct inode *ino, stateid_t *stid)
+ {
+ 	struct nfs4_file *fp;
+@@ -3436,6 +3529,9 @@ alloc_init_lock_stateid(struct nfs4_stat
+ 	INIT_LIST_HEAD(&stp->st_perfile);
+ 	INIT_LIST_HEAD(&stp->st_perstateowner);
+ 	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
++#if defined(CONFIG_PNFSD)
++	INIT_LIST_HEAD(&stp->st_pnfs_ds_id);
++#endif /* CONFIG_PNFSD */
+ 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ 	list_add(&stp->st_perfile, &fp->fi_stateids);
+ 	list_add(&stp->st_perstateowner, &sop->so_stateids);
+@@ -3998,6 +4094,9 @@ nfs4_state_init(void)
+ 	INIT_LIST_HEAD(&client_lru);
+ 	INIT_LIST_HEAD(&del_recall_lru);
+ 	reclaim_str_hashtbl_size = 0;
++#if defined(CONFIG_PNFSD)
++	nfs4_pnfs_state_init();
++#endif /* CONFIG_PNFSD */
+ 	return 0;
+ }
+ 
+@@ -4110,6 +4209,7 @@ __nfs4_state_shutdown(void)
+ 	}
+ 
+ 	nfsd4_shutdown_recdir();
++	nfs4_pnfs_state_shutdown();
+ 	nfs4_init = 0;
+ }
+ 
+diff -up linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c.orig	2010-09-30 10:15:18.353734000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfs4xdr.c	2010-09-30 10:17:08.894999000 -0400
+@@ -47,9 +47,14 @@
+ #include <linux/nfsd_idmap.h>
+ #include <linux/nfs4_acl.h>
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/exportfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd4_block.h>
+ 
+ #include "xdr4.h"
+ #include "vfs.h"
++#include "pnfsd.h"
+ 
+ #define NFSDDBG_FACILITY		NFSDDBG_XDR
+ 
+@@ -1234,6 +1239,138 @@ nfsd4_decode_sequence(struct nfsd4_compo
+ 	DECODE_TAIL;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++static __be32
++nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16 + sizeof(nfs4_verifier));
++	READ32(gdevl->gd_layout_type);
++	READ32(gdevl->gd_maxdevices);
++	READ64(gdevl->gd_cookie);
++	COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	u32 num;
++	DECODE_HEAD;
++
++	READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
++	READ64(gdev->gd_devid.sbid);
++	READ64(gdev->gd_devid.devid);
++	READ32(gdev->gd_layout_type);
++	READ32(gdev->gd_maxcount);
++	READ32(num);
++	if (num) {
++		READ_BUF(4);
++		READ32(gdev->gd_notify_types);
++	} else {
++		gdev->gd_notify_types = 0;
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
++			struct nfsd4_pnfs_layoutget *lgp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(36);
++	READ32(lgp->lg_signal);
++	READ32(lgp->lg_seg.layout_type);
++	READ32(lgp->lg_seg.iomode);
++	READ64(lgp->lg_seg.offset);
++	READ64(lgp->lg_seg.length);
++	READ64(lgp->lg_minlength);
++	nfsd4_decode_stateid(argp, &lgp->lg_sid);
++	READ_BUF(4);
++	READ32(lgp->lg_maxcount);
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	DECODE_HEAD;
++	u32 timechange;
++
++	READ_BUF(20);
++	READ64(lcp->args.lc_seg.offset);
++	READ64(lcp->args.lc_seg.length);
++	READ32(lcp->args.lc_reclaim);
++	nfsd4_decode_stateid(argp, &lcp->lc_sid);
++	READ_BUF(4);
++	READ32(lcp->args.lc_newoffset);
++	if (lcp->args.lc_newoffset) {
++		READ_BUF(8);
++		READ64(lcp->args.lc_last_wr);
++	} else
++		lcp->args.lc_last_wr = 0;
++	READ_BUF(4);
++	READ32(timechange);
++	if (timechange) {
++		READ_BUF(12);
++		READ64(lcp->args.lc_mtime.seconds);
++		READ32(lcp->args.lc_mtime.nseconds);
++	} else {
++		lcp->args.lc_mtime.seconds = 0;
++		lcp->args.lc_mtime.nseconds = 0;
++	}
++	READ_BUF(8);
++	READ32(lcp->args.lc_seg.layout_type);
++	/* XXX: saving XDR'ed layout update. Since we don't have the
++	 * current_fh yet, and therefore no export_ops, we can't call
++	 * the layout specific decode routines. File and pVFS2
++	 * do not use the layout update....
++	 */
++	READ32(lcp->args.lc_up_len);
++	if (lcp->args.lc_up_len > 0) {
++		READ_BUF(lcp->args.lc_up_len);
++		READMEM(lcp->args.lc_up_layout, lcp->args.lc_up_len);
++	}
++
++	DECODE_TAIL;
++}
++
++static __be32
++nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	DECODE_HEAD;
++
++	READ_BUF(16);
++	READ32(lrp->args.lr_reclaim);
++	READ32(lrp->args.lr_seg.layout_type);
++	READ32(lrp->args.lr_seg.iomode);
++	READ32(lrp->args.lr_return_type);
++	if (lrp->args.lr_return_type == RETURN_FILE) {
++		READ_BUF(16);
++		READ64(lrp->args.lr_seg.offset);
++		READ64(lrp->args.lr_seg.length);
++		nfsd4_decode_stateid(argp, &lrp->lr_sid);
++		READ_BUF(4);
++		READ32(lrp->args.lrf_body_len);
++		if (lrp->args.lrf_body_len > 0) {
++			READ_BUF(lrp->args.lrf_body_len);
++			READMEM(lrp->args.lrf_body, lrp->args.lrf_body_len);
++		}
++	}
++
++	DECODE_TAIL;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+ {
+@@ -1335,11 +1472,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_notsupp,
+ 	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+@@ -2136,6 +2281,36 @@ out_acl:
+ 		}
+ 		WRITE64(stat.ino);
+ 	}
++#if defined(CONFIG_PNFSD)
++	if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
++		struct super_block *sb = dentry->d_inode->i_sb;
++		int type = 0;
++
++		/* Query the filesystem for supported pNFS layout types.
++		 * Currently, we only support one layout type per file system.
++		 * The export_ops->layout_type() returns the pnfs_layouttype4.
++		 */
++		buflen -= 4;
++		if (buflen < 0)		/* length */
++			goto out_resource;
++
++		if (sb && sb->s_pnfs_op && sb->s_pnfs_op->layout_type)
++			type = sb->s_pnfs_op->layout_type(sb);
++		if (type) {
++			if ((buflen -= 4) < 0)	/* type */
++				goto out_resource;
++			WRITE32(1); 	/* length */
++			WRITE32(type);  /* type */
++		} else
++			WRITE32(0);  /* length */
++	}
++
++	if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		if ((buflen -= 4) < 0)
++			goto out_resource;
++		WRITE32(stat.blksize);
++	}
++#endif /* CONFIG_PNFSD */
+ 	if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ 		WRITE32(3);
+ 		WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+@@ -2366,6 +2541,10 @@ nfsd4_encode_commit(struct nfsd4_compoun
+ 	if (!nfserr) {
+ 		RESERVE_SPACE(8);
+ 		WRITEMEM(commit->co_verf.data, 8);
++		dprintk("NFSD: nfsd4_encode_commit: verifier %x:%x\n",
++			((u32 *)(&commit->co_verf.data))[0],
++			((u32 *)(&commit->co_verf.data))[1]);
++
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -2620,9 +2799,20 @@ nfsd4_encode_read(struct nfsd4_compoundr
+ 	}
+ 	read->rd_vlen = v;
+ 
++#if defined(CONFIG_SPNFS)
++	if (spnfs_enabled())
++		nfserr = spnfs_read(read->rd_fhp->fh_dentry->d_inode,
++				    read->rd_offset, &maxcount, read->rd_vlen,
++				    resp->rqstp);
++	else /* we're not an MDS */
++		nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
++			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
++			&maxcount);
++#else
+ 	nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ 			read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
+ 			&maxcount);
++#endif /* CONFIG_SPNFS */
+ 
+ 	if (nfserr == nfserr_symlink)
+ 		nfserr = nfserr_inval;
+@@ -2926,6 +3116,9 @@ nfsd4_encode_write(struct nfsd4_compound
+ 		WRITE32(write->wr_bytes_written);
+ 		WRITE32(write->wr_how_written);
+ 		WRITEMEM(write->wr_verifier.data, 8);
++		dprintk("NFSD: nfsd4_encode_write: verifier %x:%x\n",
++			((u32 *)(&write->wr_verifier.data))[0],
++			((u32 *)(&write->wr_verifier.data))[1]);
+ 		ADJUST_ARGS();
+ 	}
+ 	return nfserr;
+@@ -3069,6 +3262,343 @@ nfsd4_encode_sequence(struct nfsd4_compo
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/* Uses the export interface to iterate through the available devices
++ * and encodes them on the response stream.
++ */
++static  __be32
++nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
++			      struct nfsd4_pnfs_getdevlist *gdevl,
++			      unsigned int *dev_count)
++{
++	struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
++	__be32 nfserr;
++	int status;
++	__be32 *p;
++	struct nfsd4_pnfs_dev_iter_res res = {
++		.gd_cookie = gdevl->gd_cookie,
++		.gd_verf = gdevl->gd_verf,
++		.gd_eof = 0
++	};
++	u64 sbid;
++
++	dprintk("%s: Begin\n", __func__);
++
++	sbid = find_create_sbid(sb);
++	*dev_count = 0;
++	do {
++		status = sb->s_pnfs_op->get_device_iter(sb,
++							gdevl->gd_layout_type,
++							&res);
++		if (status) {
++			if (status == -ENOENT) {
++				res.gd_eof = 1;
++				/* return success */
++				break;
++			}
++			nfserr = nfserrno(status);
++			goto out_err;
++		}
++
++		/* Encode device id and layout type */
++		RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
++		WRITE64((__be64)sbid);
++		WRITE64(res.gd_devid);	/* devid minor */
++		ADJUST_ARGS();
++		(*dev_count)++;
++	} while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
++	gdevl->gd_cookie = res.gd_cookie;
++	gdevl->gd_verf = res.gd_verf;
++	gdevl->gd_eof = res.gd_eof;
++	nfserr = nfs_ok;
++out_err:
++	dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
++	return nfserr;
++}
++
++/* Encodes the response of get device list.
++*/
++static __be32
++nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevlist *gdevl)
++{
++	unsigned int dev_count = 0, lead_count;
++	u32 *p_in = resp->p;
++	__be32 *p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	/* Ensure we have room for cookie, verifier, and devlist len,
++	 * which we will backfill in after we encode as many devices as possible
++	 */
++	lead_count = 8 + sizeof(nfs4_verifier) + 4;
++	RESERVE_SPACE(lead_count);
++	/* skip past these values */
++	p += XDR_QUADLEN(lead_count);
++	ADJUST_ARGS();
++
++	/* Iterate over as many device ids as possible on the xdr stream */
++	nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
++	if (nfserr)
++		goto out_err;
++
++	/* Backfill in cookie, verf and number of devices encoded */
++	p = p_in;
++	WRITE64(gdevl->gd_cookie);
++	WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
++	WRITE32(dev_count);
++
++	/* Skip over devices */
++	p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
++	ADJUST_ARGS();
++
++	/* are we at the end of devices? */
++	RESERVE_SPACE(4);
++	WRITE32(gdevl->gd_eof);
++	ADJUST_ARGS();
++
++	dprintk("%s: done.\n", __func__);
++
++	nfserr = nfs_ok;
++out:
++	return nfserr;
++out_err:
++	p = p_in;
++	ADJUST_ARGS();
++	goto out;
++}
++
++/* For a given device id, have the file system retrieve and encode the
++ * associated device.  For file layout, the encoding function is
++ * passed down to the file system.  The file system then has the option
++ * of using this encoding function or one of its own.
++ *
++ * Note: the file system must return the XDR size of struct device_addr4
++ * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
++ * gdir_mincount calculation.
++ */
++static __be32
++nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
++			struct nfsd4_pnfs_getdevinfo *gdev)
++{
++	struct super_block *sb;
++	int maxcount = 0, type_notify_len = 12;
++	__be32 *p, *p_save = NULL, *p_in = resp->p;
++	struct exp_xdr_stream xdr;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = gdev->gd_sb;
++
++	if (gdev->gd_maxcount != 0) {
++		/* FIXME: this will be bound by the session max response */
++		maxcount = svc_max_payload(resp->rqstp);
++		if (maxcount > gdev->gd_maxcount)
++			maxcount = gdev->gd_maxcount;
++
++		/* Ensure have room for type and notify field */
++		maxcount -= type_notify_len;
++		if (maxcount < 0) {
++			nfserr = -ETOOSMALL;
++			goto toosmall;
++		}
++	}
++
++	RESERVE_SPACE(4);
++	WRITE32(gdev->gd_layout_type);
++	ADJUST_ARGS();
++
++	/* If maxcount is 0 then just update notifications */
++	if (gdev->gd_maxcount == 0)
++		goto handle_notifications;
++
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
++						&gdev->gd_devid);
++	if (nfserr)
++		goto err;
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++	BUG_ON(xdr.p > xdr.end);
++
++	/* Update the xdr stream with the number of bytes encoded
++	 * by the file system.
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++handle_notifications:
++	/* Encode supported device notifications */
++	RESERVE_SPACE(4);
++	if (sb->s_pnfs_op->set_device_notify) {
++		struct pnfs_devnotify_arg dn_args;
++
++		dn_args.dn_layout_type = gdev->gd_layout_type;
++		dn_args.dn_devid = gdev->gd_devid;
++		dn_args.dn_notify_types = gdev->gd_notify_types;
++		nfserr = sb->s_pnfs_op->set_device_notify(sb, &dn_args);
++		if (nfserr)
++			goto err;
++		WRITE32(dn_args.dn_notify_types);
++	} else {
++		WRITE32(0);
++	}
++	ADJUST_ARGS();
++
++out:
++	return nfserrno(nfserr);
++toosmall:
++	dprintk("%s: maxcount too small\n", __func__);
++	RESERVE_SPACE(4);
++	WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
++	ADJUST_ARGS();
++	goto out;
++err:
++	/* Rewind to the beginning */
++	p = p_in;
++	ADJUST_ARGS();
++	if (nfserr == -ETOOSMALL)
++		goto toosmall;
++	printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
++	goto out;
++}
++
++static __be32
++nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
++		       __be32 nfserr,
++		       struct nfsd4_pnfs_layoutget *lgp)
++{
++	int maxcount, leadcount;
++	struct super_block *sb;
++	struct exp_xdr_stream xdr;
++	__be32 *p, *p_save, *p_start = resp->p;
++
++	dprintk("%s: err %d\n", __func__, nfserr);
++	if (nfserr)
++		return nfserr;
++
++	sb = lgp->lg_fhp->fh_dentry->d_inode->i_sb;
++	maxcount = PAGE_SIZE;
++	if (maxcount > lgp->lg_maxcount)
++		maxcount = lgp->lg_maxcount;
++
++	/* Check for space on xdr stream */
++	leadcount = 36 + sizeof(stateid_opaque_t);
++	RESERVE_SPACE(leadcount);
++	/* encode layout metadata after file system encodes layout */
++	p += XDR_QUADLEN(leadcount);
++	ADJUST_ARGS();
++
++	/* Ensure have room for ret_on_close, off, len, iomode, type */
++	maxcount -= leadcount;
++	if (maxcount < 0) {
++		printk(KERN_ERR "%s: buffer too small\n", __func__);
++		nfserr = nfserr_toosmall;
++		goto err;
++	}
++
++	/* Set xdr info so file system can encode layout */
++	xdr.p = p_save = resp->p;
++	xdr.end = resp->end;
++	if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
++		xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
++
++	/* Retrieve, encode, and merge layout; process stateid */
++	nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
++	if (nfserr)
++		goto err;
++
++	/* Ensure file system returned enough bytes for the client
++	 * to access.
++	 */
++	if (lgp->lg_seg.length < lgp->lg_minlength) {
++		nfserr = nfserr_badlayout;
++		goto err;
++	}
++
++	/* The file system should never write 0 bytes without
++	 * returning an error
++	 */
++	BUG_ON(xdr.p == p_save);
++
++	/* Rewind to beginning and encode attrs */
++	resp->p = p_start;
++	RESERVE_SPACE(4);
++	WRITE32(lgp->lg_roc);	/* return on close */
++	ADJUST_ARGS();
++	nfsd4_encode_stateid(resp, &lgp->lg_sid);
++	RESERVE_SPACE(28);
++	/* Note: response logr_layout array count, always one for now */
++	WRITE32(1);
++	WRITE64(lgp->lg_seg.offset);
++	WRITE64(lgp->lg_seg.length);
++	WRITE32(lgp->lg_seg.iomode);
++	WRITE32(lgp->lg_seg.layout_type);
++
++	/* Update the xdr stream with the number of bytes written
++	 * by the file system
++	 */
++	p = xdr.p;
++	ADJUST_ARGS();
++
++	return nfs_ok;
++err:
++	resp->p = p_start;
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutcommit *lcp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lcp->res.lc_size_chg);
++	ADJUST_ARGS();
++	if (lcp->res.lc_size_chg) {
++		RESERVE_SPACE(8);
++		WRITE64(lcp->res.lc_newsize);
++		ADJUST_ARGS();
++	}
++out:
++	return nfserr;
++}
++
++static __be32
++nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
++			  struct nfsd4_pnfs_layoutreturn *lrp)
++{
++	__be32 *p;
++
++	if (nfserr)
++		goto out;
++
++	RESERVE_SPACE(4);
++	WRITE32(lrp->lrs_present != 0);    /* got stateid? */
++	ADJUST_ARGS();
++	if (lrp->lrs_present)
++		nfsd4_encode_stateid(resp, &lrp->lr_sid);
++out:
++	return nfserr;
++}
++#endif /* CONFIG_PNFSD */
++
+ static __be32
+ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+ {
+@@ -3129,11 +3659,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
+ 	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session,
+ 	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
++#if defined(CONFIG_PNFSD)
++	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdevinfo,
++	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_getdevlist,
++	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
++	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
++	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
++#else  /* CONFIG_PNFSD */
+ 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
++#endif /* CONFIG_PNFSD */
+ 	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_noop,
+ 	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
+ 	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig linux-2.6.34.noarch/fs/nfsd/nfsctl.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsctl.c.orig	2010-09-30 10:15:18.364728000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsctl.c	2010-09-30 10:17:08.900002000 -0400
+@@ -13,10 +13,15 @@
+ #include <linux/nfsd/syscall.h>
+ #include <linux/lockd/lockd.h>
+ #include <linux/sunrpc/clnt.h>
++#include <linux/nfsd/nfs4pnfsdlm.h>
+ 
+ #include "nfsd.h"
+ #include "cache.h"
+ 
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++#include <linux/nfsd4_spnfs.h>
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ /*
+  *	We have a single directory with 9 nodes in it.
+  */
+@@ -49,6 +54,9 @@ enum {
+ 	NFSD_Gracetime,
+ 	NFSD_RecoveryDir,
+ #endif
++#ifdef CONFIG_PNFSD
++	NFSD_pnfs_dlm_device,
++#endif
+ };
+ 
+ /*
+@@ -74,6 +82,9 @@ static ssize_t write_leasetime(struct fi
+ static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+ #endif
++#ifdef CONFIG_PNFSD
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size);
++#endif
+ 
+ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ 	[NFSD_Svc] = write_svc,
+@@ -96,6 +107,9 @@ static ssize_t (*write_op[])(struct file
+ 	[NFSD_Gracetime] = write_gracetime,
+ 	[NFSD_RecoveryDir] = write_recoverydir,
+ #endif
++#ifdef CONFIG_PNFSD
++	[NFSD_pnfs_dlm_device] = write_pnfs_dlm_device,
++#endif
+ };
+ 
+ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+@@ -1349,6 +1363,68 @@ static ssize_t write_recoverydir(struct 
+ 
+ #endif
+ 
++#ifdef CONFIG_PNFSD
++
++static ssize_t __write_pnfs_dlm_device(struct file *file, char *buf,
++				       size_t size)
++{
++	char *mesg = buf;
++	char *pnfs_dlm_device;
++	int max_size = NFSD_PNFS_DLM_DEVICE_MAX;
++	int len, ret = 0;
++
++	if (size > 0) {
++		ret = -EINVAL;
++		if (size > max_size || buf[size-1] != '\n')
++			return ret;
++		buf[size-1] = 0;
++
++		pnfs_dlm_device = mesg;
++		len = qword_get(&mesg, pnfs_dlm_device, size);
++		if (len <= 0)
++			return ret;
++
++		ret = nfsd4_set_pnfs_dlm_device(pnfs_dlm_device, len);
++	} else
++		return nfsd4_get_pnfs_dlm_device_list(buf, SIMPLE_TRANSACTION_LIMIT);
++
++	return ret <= 0 ? ret : strlen(buf);
++}
++
++/**
++ * write_pnfs_dlm_device - Set or report the current pNFS data server list
++ *
++ * Input:
++ *			buf:		ignored
++ *			size:		zero
++ *
++ * OR
++ *
++ * Input:
++ *			buf:		C string containing a block device name,
++ *					a colon, and then a comma separated
++ *					list of pNFS data server IPv4 addresses
++ *			size:		non-zero length of C string in @buf
++ * Output:
++ *	On success:	passed-in buffer filled with '\n'-terminated C
++ *			string containing a block device name, a colon, and
++ *			then a comma separated list of pNFS
++ *			data server IPv4 addresses.
++ *			return code is the size in bytes of the string
++ *	On error:	return code is a negative errno value
++ */
++static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size)
++{
++	ssize_t rv;
++
++	mutex_lock(&nfsd_mutex);
++	rv = __write_pnfs_dlm_device(file, buf, size);
++	mutex_unlock(&nfsd_mutex);
++	return rv;
++}
++
++#endif /* CONFIG_PNFSD */
++
+ /*----------------------------------------------------------------------------*/
+ /*
+  *	populating the filesystem.
+@@ -1383,6 +1459,10 @@ static int nfsd_fill_super(struct super_
+ 		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+ 		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+ #endif
++#ifdef CONFIG_PNFSD
++		[NFSD_pnfs_dlm_device] = {"pnfs_dlm_device", &transaction_ops,
++					   S_IWUSR|S_IRUSR},
++#endif
+ 		/* last one */ {""}
+ 	};
+ 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
+@@ -1421,6 +1501,9 @@ static int create_proc_exports_entry(voi
+ }
+ #endif
+ 
++#if defined(CONFIG_SPNFS_BLOCK)
++int nfsd_bl_init(void);
++#endif
+ static int __init init_nfsd(void)
+ {
+ 	int retval;
+@@ -1443,6 +1526,15 @@ static int __init init_nfsd(void)
+ 	retval = create_proc_exports_entry();
+ 	if (retval)
+ 		goto out_free_idmap;
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	retval = spnfs_init_proc();
++	if (retval != 0)
++		goto out_free_idmap;
++#if defined(CONFIG_SPNFS_BLOCK)
++	nfsd_bl_init();
++#endif /* CONFIG_SPNFS_BLOCK */
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
+ 	retval = register_filesystem(&nfsd_fs_type);
+ 	if (retval)
+ 		goto out_free_all;
+@@ -1465,7 +1557,22 @@ out_free_stat:
+ 
+ static void __exit exit_nfsd(void)
+ {
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS)
++	remove_proc_entry("fs/nfs/spnfs/recall", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/getfh", NULL);
++	remove_proc_entry("fs/nfs/spnfs/config", NULL);
++	remove_proc_entry("fs/nfs/spnfs/ctl", NULL);
++	remove_proc_entry("fs/nfs/spnfs", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS */
++
++#if defined(CONFIG_PROC_FS) && defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	remove_proc_entry("fs/nfs/spnfs/layoutseg", NULL);
++	remove_proc_entry("fs/nfs/spnfs/layoutsegsize", NULL);
++#endif /* CONFIG_PROC_FS && CONFIG_SPNFS_LAYOUTSEGMENTS */
++
+ 	nfsd_export_shutdown();
++	nfsd4_pnfs_dlm_shutdown();
+ 	nfsd_reply_cache_shutdown();
+ 	remove_proc_entry("fs/nfs/exports", NULL);
+ 	remove_proc_entry("fs/nfs", NULL);
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig linux-2.6.34.noarch/fs/nfsd/nfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsd.h.orig	2010-09-30 10:15:18.370728000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsd.h	2010-09-30 10:17:08.906000000 -0400
+@@ -285,11 +285,17 @@ extern time_t nfsd4_grace;
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD0
+ 
++#if defined(CONFIG_PNFSD)
++#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
++	(NFSD4_SUPPORTED_ATTRS_WORD1 | FATTR4_WORD1_FS_LAYOUT_TYPES)
++#else /* CONFIG_PNFSD */
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ 	NFSD4_SUPPORTED_ATTRS_WORD1
++#endif /* CONFIG_PNFSD */
+ 
+ #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
++	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT | \
++	 FATTR4_WORD2_LAYOUT_BLKSIZE)
+ 
+ static inline u32 nfsd_suppattrs0(u32 minorversion)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.c
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.c	2010-09-30 10:17:08.911003000 -0400
+@@ -10,6 +10,7 @@
+ #include <linux/exportfs.h>
+ 
+ #include <linux/sunrpc/svcauth_gss.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
+ #include "nfsd.h"
+ #include "vfs.h"
+ #include "auth.h"
+@@ -139,6 +140,7 @@ static inline __be32 check_pseudo_root(s
+ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ 	struct knfsd_fh	*fh = &fhp->fh_handle;
++	int fsid_type;
+ 	struct fid *fid = NULL, sfid;
+ 	struct svc_export *exp;
+ 	struct dentry *dentry;
+@@ -159,7 +161,8 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 			return error;
+ 		if (fh->fh_auth_type != 0)
+ 			return error;
+-		len = key_len(fh->fh_fsid_type) / 4;
++		fsid_type = pnfs_fh_fsid_type(fh);
++		len = key_len(fsid_type) / 4;
+ 		if (len == 0)
+ 			return error;
+ 		if  (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+@@ -172,7 +175,7 @@ static __be32 nfsd_set_fh_dentry(struct 
+ 		data_left -= len;
+ 		if (data_left < 0)
+ 			return error;
+-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
++		exp = rqst_exp_find(rqstp, fsid_type, fh->fh_auth);
+ 		fid = (struct fid *)(fh->fh_auth + len);
+ 	} else {
+ 		__u32 tfh[2];
+diff -up linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig linux-2.6.34.noarch/fs/nfsd/nfsfh.h
+--- linux-2.6.34.noarch/fs/nfsd/nfsfh.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfsfh.h	2010-09-30 10:17:08.917000000 -0400
+@@ -14,6 +14,7 @@ enum nfsd_fsid {
+ 	FSID_UUID8,
+ 	FSID_UUID16,
+ 	FSID_UUID16_INUM,
++	FSID_MAX
+ };
+ 
+ enum fsid_source {
+@@ -205,4 +206,42 @@ fh_unlock(struct svc_fh *fhp)
+ 	}
+ }
+ 
++#if defined(CONFIG_PNFSD)
++
++/*
++ * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
++ * to a DS by LAYOUTGET.  nfs4_preprocess_stateid_op() uses this to decide how
++ * to handle a given stateid.
++ */
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return fh->fh_fsid_type >= FSID_MAX;
++}
++
++static inline void pnfs_fh_mark_ds(struct knfsd_fh *fh)
++{
++	BUG_ON(fh->fh_version != 1);
++	BUG_ON(pnfs_fh_is_ds(fh));
++	fh->fh_fsid_type += FSID_MAX;
++}
++
++#else  /* CONFIG_PNFSD */
++
++static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
++{
++	return 0;
++}
++
++#endif /* CONFIG_PNFSD */
++
++/* allows fh_verify() to check the real fsid_type (i.e., not overloaded). */
++static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
++{
++	int fsid_type = fh->fh_fsid_type;
++
++	if (pnfs_fh_is_ds(fh))
++		return fsid_type - FSID_MAX;
++	return fsid_type;
++}
++
+ #endif /* _LINUX_NFSD_FH_INT_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig linux-2.6.34.noarch/fs/nfsd/nfssvc.c
+--- linux-2.6.34.noarch/fs/nfsd/nfssvc.c.orig	2010-09-30 10:15:05.063337000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/nfssvc.c	2010-09-30 10:17:08.922000000 -0400
+@@ -115,7 +115,7 @@ struct svc_program		nfsd_program = {
+ 
+ };
+ 
+-u32 nfsd_supported_minorversion;
++u32 nfsd_supported_minorversion = NFSD_SUPPORTED_MINOR_VERSION;
+ 
+ int nfsd_vers(int vers, enum vers_op change)
+ {
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig linux-2.6.34.noarch/fs/nfsd/pnfsd.h
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd.h.orig	2010-09-30 10:17:08.924003000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd.h	2010-09-30 10:17:08.926004000 -0400
+@@ -0,0 +1,143 @@
++/*
++ *  Copyright (c) 2005 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef LINUX_NFSD_PNFSD_H
++#define LINUX_NFSD_PNFSD_H
++
++#include <linux/list.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#include "state.h"
++#include "xdr4.h"
++
++/* outstanding layout stateid */
++struct nfs4_layout_state {
++	struct list_head	ls_perfile;
++	struct list_head	ls_layouts; /* list of nfs4_layouts */
++	struct kref		ls_ref;
++	struct nfs4_client	*ls_client;
++	struct nfs4_file	*ls_file;
++	stateid_t		ls_stateid;
++};
++
++/* outstanding layout */
++struct nfs4_layout {
++	struct list_head		lo_perfile;	/* hash by f_id */
++	struct list_head		lo_perclnt;	/* hash by clientid */
++	struct list_head		lo_perstate;
++	struct nfs4_file		*lo_file;	/* backpointer */
++	struct nfs4_client		*lo_client;
++	struct nfs4_layout_state	*lo_state;
++	struct nfsd4_layout_seg 	lo_seg;
++};
++
++struct pnfs_inval_state {
++	struct knfsd_fh		mdsfh; /* needed only by invalidate all */
++	stateid_t		stid;
++	clientid_t		clid;
++	u32			status;
++};
++
++/* pNFS Data Server state */
++#define DS_STATEID_VALID   0
++#define DS_STATEID_ERROR   1
++#define DS_STATEID_NEW     2
++
++struct pnfs_ds_stateid {
++	struct list_head	ds_hash;        /* ds_stateid hash entry */
++	struct list_head	ds_perclid;     /* per client hash entry */
++	stateid_t		ds_stid;
++	struct knfsd_fh		ds_fh;
++	unsigned long		ds_access;
++	u32			ds_status;      /* from MDS */
++	u32			ds_verifier[2]; /* from MDS */
++	wait_queue_head_t	ds_waitq;
++	unsigned long		ds_flags;
++	struct kref		ds_ref;
++	clientid_t		ds_mdsclid;
++};
++
++struct pnfs_ds_clientid {
++	struct list_head	dc_hash;        /* mds_clid_hashtbl entry */
++	struct list_head	dc_stateid;     /* ds_stateid head */
++	struct list_head	dc_permdsid;    /* per mdsid hash entry */
++	clientid_t		dc_mdsclid;
++	struct kref		dc_ref;
++	uint32_t		dc_mdsid;
++};
++
++struct pnfs_mds_id {
++	struct list_head	di_hash;        /* mds_nodeid list entry */
++	struct list_head	di_mdsclid;     /* mds_clientid head */
++	uint32_t		di_mdsid;
++	time_t			di_mdsboot;	/* mds boot time */
++	struct kref		di_ref;
++};
++
++/* notify device request (from exported filesystem) */
++struct nfs4_notify_device {
++	struct nfsd4_pnfs_cb_dev_list  *nd_list;
++	struct nfs4_client	       *nd_client;
++	struct list_head	        nd_perclnt;
++
++	void				*nd_args;	/* nfsd internal */
++};
++
++u64 find_create_sbid(struct super_block *);
++struct super_block *find_sbid_id(u64);
++__be32 nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
++int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
++					struct nfsd4_pnfs_layoutreturn *);
++int nfs4_pnfs_cb_get_state(struct super_block *, struct pnfs_get_state *);
++int nfs4_pnfs_cb_change_state(struct pnfs_get_state *);
++void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++int put_layoutrecall(struct nfs4_layoutrecall *);
++void nomatching_layout(struct nfs4_layoutrecall *);
++void *layoutrecall_done(struct nfs4_layoutrecall *);
++int nfsd4_cb_layout(struct nfs4_layoutrecall *);
++int nfsd_layout_recall_cb(struct super_block *, struct inode *,
++			  struct nfsd4_pnfs_cb_layout *);
++int nfsd_device_notify_cb(struct super_block *,
++			  struct nfsd4_pnfs_cb_dev_list *);
++int nfsd4_cb_notify_device(struct nfs4_notify_device *);
++void pnfs_set_device_notify(clientid_t *, unsigned int types);
++void pnfs_clear_device_notify(struct nfs4_client *);
++
++#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
++extern struct sockaddr pnfsd_lexp_addr;
++extern size_t pnfs_lexp_addr_len;
++
++extern void pnfsd_lexp_init(struct inode *);
++#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
++
++#endif /* LINUX_NFSD_PNFSD_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c
+--- linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c.orig	2010-09-30 10:17:08.928999000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/pnfsd_lexp.c	2010-09-30 10:17:08.930006000 -0400
+@@ -0,0 +1,225 @@
++/*
++ * linux/fs/nfsd/pnfs_lexp.c
++ *
++ * pNFS export of local filesystems.
++ *
++ * Export local file systems over the files layout type.
++ * The MDS (metadata server) functions also as a single DS (data server).
++ * This is mostly useful for development and debugging purposes.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * Copyright (C) 2008 Benny Halevy, <bhalevy@panasas.com>
++ *
++ * Initial implementation was based on the pnfs-gfs2 patches done
++ * by David M. Richter <richterd@citi.umich.edu>
++ */
++
++#include <linux/sunrpc/svc_xprt.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++#define NFSDDBG_FACILITY NFSDDBG_PNFS
++
++struct sockaddr pnfsd_lexp_addr;
++size_t pnfs_lexp_addr_len;
++
++static int
++pnfsd_lexp_layout_type(struct super_block *sb)
++{
++	int ret = LAYOUT_NFSV4_1_FILES;
++	dprintk("<-- %s: return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++pnfsd_lexp_get_device_iter(struct super_block *sb,
++			   u32 layout_type,
++			   struct nfsd4_pnfs_dev_iter_res *res)
++{
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	res->gd_eof = 1;
++	if (res->gd_cookie)
++		return -ENOENT;
++	res->gd_cookie = 1;
++	res->gd_verf = 1;
++	res->gd_devid = 1;
++
++	dprintk("<-- %s: return 0\n", __func__);
++	return 0;
++}
++
++static int
++pnfsd_lexp_get_device_info(struct super_block *sb,
++			   struct exp_xdr_stream *xdr,
++			   u32 layout_type,
++			   const struct nfsd4_pnfs_deviceid *devid)
++{
++	int err;
++	struct pnfs_filelayout_device fdev;
++	struct pnfs_filelayout_multipath fl_devices[1];
++	u32 fl_stripe_indices[1] = { 0 };
++	struct pnfs_filelayout_devaddr daddr;
++	/* %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x.%03u.%03u */
++	char daddr_buf[8*4 + 2*3 + 10];
++
++	dprintk("--> %s: sb=%p\n", __func__, sb);
++
++	BUG_ON(layout_type != LAYOUT_NFSV4_1_FILES);
++
++	memset(&fdev, '\0', sizeof(fdev));
++
++	if (devid->devid != 1) {
++		printk(KERN_ERR "%s: WARNING: didn't receive a deviceid of 1 "
++			"(got: 0x%llx)\n", __func__, devid->devid);
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* count the number of comma-delimited DS IPs */
++	fdev.fl_device_length = 1;
++	fdev.fl_device_list = fl_devices;
++
++	fdev.fl_stripeindices_length = fdev.fl_device_length;
++	fdev.fl_stripeindices_list = fl_stripe_indices;
++
++	daddr.r_addr.data = daddr_buf;
++	daddr.r_addr.len = sizeof(daddr_buf);
++	err = __svc_print_netaddr(&pnfsd_lexp_addr, &daddr.r_addr);
++	if (err < 0)
++		goto out;
++	daddr.r_addr.len = err;
++	switch (pnfsd_lexp_addr.sa_family) {
++	case AF_INET:
++		daddr.r_netid.data = "tcp";
++		daddr.r_netid.len = 3;
++		break;
++	case AF_INET6:
++		daddr.r_netid.data = "tcp6";
++		daddr.r_netid.len = 4;
++		break;
++	default:
++		BUG();
++	}
++	fdev.fl_device_list[0].fl_multipath_length = 1;
++	fdev.fl_device_list[0].fl_multipath_list = &daddr;
++
++	/* have nfsd encode the device info */
++	err = filelayout_encode_devinfo(xdr, &fdev);
++out:
++	dprintk("<-- %s: return %d\n", __func__, err);
++	return err;
++}
++
++static int get_stripe_unit(int blocksize)
++{
++	if (blocksize < NFSSVC_MAXBLKSIZE)
++		blocksize = NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
++	dprintk("%s: return %d\n", __func__, blocksize);
++	return blocksize;
++}
++
++static enum nfsstat4
++pnfsd_lexp_layout_get(struct inode *inode,
++		      struct exp_xdr_stream *xdr,
++		      const struct nfsd4_pnfs_layoutget_arg *arg,
++		      struct nfsd4_pnfs_layoutget_res *res)
++{
++	enum nfsstat4 rc = NFS4_OK;
++	struct pnfs_filelayout_layout *layout = NULL;
++	struct knfsd_fh *fhp = NULL;
++
++	dprintk("--> %s: inode=%p\n", __func__, inode);
++
++	res->lg_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	res->lg_seg.offset = 0;
++	res->lg_seg.length = NFS4_MAX_UINT64;
++
++	layout = kzalloc(sizeof(*layout), GFP_KERNEL);
++	if (layout == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	/* Set file layout response args */
++	layout->lg_layout_type = LAYOUT_NFSV4_1_FILES;
++	layout->lg_stripe_type = STRIPE_SPARSE;
++	layout->lg_commit_through_mds = true;
++	layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
++	layout->lg_fh_length = 1;
++	layout->device_id.sbid = arg->lg_sbid;
++	layout->device_id.devid = 1;				/*FSFTEMP*/
++	layout->lg_first_stripe_index = 0;			/*FSFTEMP*/
++	layout->lg_pattern_offset = 0;
++
++	fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
++	if (fhp == NULL) {
++		rc = -ENOMEM;
++		goto error;
++	}
++
++	memcpy(fhp, arg->lg_fh, sizeof(*fhp));
++	pnfs_fh_mark_ds(fhp);
++	layout->lg_fh_list = fhp;
++
++	/* Call nfsd to encode layout */
++	rc = filelayout_encode_layout(xdr, layout);
++exit:
++	kfree(layout);
++	kfree(fhp);
++	dprintk("<-- %s: return %d\n", __func__, rc);
++	return rc;
++
++error:
++	res->lg_seg.length = 0;
++	goto exit;
++}
++
++static int
++pnfsd_lexp_layout_commit(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutcommit_arg *args,
++			 struct nfsd4_pnfs_layoutcommit_res *res)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int
++pnfsd_lexp_layout_return(struct inode *inode,
++			 const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	dprintk("%s: (unimplemented)\n", __func__);
++
++	return 0;
++}
++
++static int pnfsd_lexp_get_state(struct inode *inode, struct knfsd_fh *fh,
++				struct pnfs_get_state *p)
++{
++	return 0;	/* just use the current stateid */
++}
++
++static struct pnfs_export_operations pnfsd_lexp_ops = {
++	.layout_type = pnfsd_lexp_layout_type,
++	.get_device_info = pnfsd_lexp_get_device_info,
++	.get_device_iter = pnfsd_lexp_get_device_iter,
++	.layout_get = pnfsd_lexp_layout_get,
++	.layout_commit = pnfsd_lexp_layout_commit,
++	.layout_return = pnfsd_lexp_layout_return,
++	.get_state = pnfsd_lexp_get_state,
++};
++
++void
++pnfsd_lexp_init(struct inode *inode)
++{
++	dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
++	inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_com.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_com.c.orig	2010-09-30 10:17:08.933003000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_com.c	2010-09-30 10:17:08.935000000 -0400
+@@ -0,0 +1,535 @@
++/*
++ * fs/nfsd/spnfs_com.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ * Based heavily on idmap.c
++ *
++ */
++
++/*
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#include <linux/namei.h>
++#include <linux/mount.h>
++#include <linux/path.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++#include <linux/nfsd/debug.h>
++
++#include <linux/nfsd4_spnfs.h>
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PROC
++
++static ssize_t   spnfs_pipe_upcall(struct file *, struct rpc_pipe_msg *,
++		     char __user *, size_t);
++static ssize_t   spnfs_pipe_downcall(struct file *, const char __user *,
++		     size_t);
++static void      spnfs_pipe_destroy_msg(struct rpc_pipe_msg *);
++
++static struct rpc_pipe_ops spnfs_upcall_ops = {
++	.upcall		= spnfs_pipe_upcall,
++	.downcall	= spnfs_pipe_downcall,
++	.destroy_msg	= spnfs_pipe_destroy_msg,
++};
++
++/* evil global variable */
++struct spnfs *global_spnfs;
++struct spnfs_config *spnfs_config;
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++int spnfs_use_layoutsegments;
++uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++/*
++ * Used by spnfs_enabled()
++ * Tracks if the subsystem has been initialized at some point.  It doesn't
++ * matter if it's not currently initialized.
++ */
++static int spnfs_enabled_at_some_point;
++
++/* call this to start the ball rolling */
++/* code it like we're going to avoid the global variable in the future */
++int
++nfsd_spnfs_new(void)
++{
++	struct spnfs *spnfs = NULL;
++	struct path path;
++	struct nameidata nd;
++	int rc;
++
++	if (global_spnfs != NULL)
++		return -EEXIST;
++
++	path.mnt = rpc_get_mount();
++	if (IS_ERR(path.mnt))
++		return PTR_ERR(path.mnt);
++
++	/* FIXME: do not abuse rpc_pipefs/nfs */
++	rc = vfs_path_lookup(path.mnt->mnt_root, path.mnt, "/nfs", 0, &nd);
++	if (rc)
++		goto err;
++
++	spnfs = kzalloc(sizeof(*spnfs), GFP_KERNEL);
++	if (spnfs == NULL){
++		rc = -ENOMEM;
++		goto err;
++	}
++
++	spnfs->spnfs_dentry = rpc_mkpipe(nd.path.dentry, "spnfs", spnfs,
++					 &spnfs_upcall_ops, 0);
++	if (IS_ERR(spnfs->spnfs_dentry)) {
++		rc = -EPIPE;
++		goto err;
++	}
++
++	mutex_init(&spnfs->spnfs_lock);
++	mutex_init(&spnfs->spnfs_plock);
++	init_waitqueue_head(&spnfs->spnfs_wq);
++
++	global_spnfs = spnfs;
++	spnfs_enabled_at_some_point = 1;
++
++	return 0;
++err:
++	rpc_put_mount();
++	kfree(spnfs);
++	return rc;
++}
++
++/* again, code it like we're going to remove the global variable */
++void
++nfsd_spnfs_delete(void)
++{
++	struct spnfs *spnfs = global_spnfs;
++
++	if (!spnfs)
++		return;
++	rpc_unlink(spnfs->spnfs_dentry);
++	rpc_put_mount();
++	global_spnfs = NULL;
++	kfree(spnfs);
++}
++
++/* RPC pipefs upcall/downcall routines */
++/* looks like this code is invoked by the rpc_pipe code */
++/* to handle upcalls on things we've queued elsewhere */
++/* See nfs_idmap_id for an exmaple of enqueueing */
++static ssize_t
++spnfs_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
++    char __user *dst, size_t buflen)
++{
++	char *data = (char *)msg->data + msg->copied;
++	ssize_t mlen = msg->len - msg->copied;
++	ssize_t left;
++
++	if (mlen > buflen)
++		mlen = buflen;
++
++	left = copy_to_user(dst, data, mlen);
++	if (left < 0) {
++		msg->errno = left;
++		return left;
++	}
++	mlen -= left;
++	msg->copied += mlen;
++	msg->errno = 0;
++	return mlen;
++}
++
++static ssize_t
++spnfs_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
++{
++	struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
++	struct spnfs *spnfs = (struct spnfs *)rpci->private;
++	struct spnfs_msg *im_in = NULL, *im = &spnfs->spnfs_im;
++	int ret;
++
++	if (mlen != sizeof(struct spnfs_msg))
++		return -ENOSPC;
++
++	im_in = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im_in == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(im_in, src, mlen) != 0)
++		return -EFAULT;
++
++	mutex_lock(&spnfs->spnfs_plock);
++
++	ret = mlen;
++	im->im_status = im_in->im_status;
++	/* If we got an error, terminate now, and wake up pending upcalls */
++	if (!(im_in->im_status & SPNFS_STATUS_SUCCESS)) {
++		wake_up(&spnfs->spnfs_wq);
++		goto out;
++	}
++
++	ret = -EINVAL;
++	/* Did we match the current upcall? */
++	/* DMXXX: do not understand the comment above, from original code */
++	/* DMXXX: when do we _not_ match the current upcall? */
++	/* DMXXX: anyway, let's to a simplistic check */
++	if (im_in->im_type == im->im_type) {
++		/* copy the response into the spnfs struct */
++		memcpy(&im->im_res, &im_in->im_res, sizeof(im->im_res));
++		ret = mlen;
++	} else
++		dprintk("spnfs: downcall type != upcall type\n");
++
++
++	wake_up(&spnfs->spnfs_wq);
++/* DMXXX handle rval processing */
++out:
++	mutex_unlock(&spnfs->spnfs_plock);
++	kfree(im_in);
++	return ret;
++}
++
++static void
++spnfs_pipe_destroy_msg(struct rpc_pipe_msg *msg)
++{
++	struct spnfs_msg *im = msg->data;
++	struct spnfs *spnfs = container_of(im, struct spnfs, spnfs_im);
++
++	if (msg->errno >= 0)
++		return;
++	mutex_lock(&spnfs->spnfs_plock);
++	im->im_status = SPNFS_STATUS_FAIL;  /* DMXXX */
++	wake_up(&spnfs->spnfs_wq);
++	mutex_unlock(&spnfs->spnfs_plock);
++}
++
++/* generic upcall.  called by functions in spnfs_ops.c  */
++int
++spnfs_upcall(struct spnfs *spnfs, struct spnfs_msg *upmsg,
++		union spnfs_msg_res *res)
++{
++	struct rpc_pipe_msg msg;
++	struct spnfs_msg *im;
++	DECLARE_WAITQUEUE(wq, current);
++	int ret = -EIO;
++	int rval;
++
++	im = &spnfs->spnfs_im;
++
++	mutex_lock(&spnfs->spnfs_lock);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	memset(im, 0, sizeof(*im));
++	memcpy(im, upmsg, sizeof(*upmsg));
++
++	memset(&msg, 0, sizeof(msg));
++	msg.data = im;
++	msg.len = sizeof(*im);
++
++	add_wait_queue(&spnfs->spnfs_wq, &wq);
++	rval = rpc_queue_upcall(spnfs->spnfs_dentry->d_inode, &msg);
++	if (rval < 0) {
++		remove_wait_queue(&spnfs->spnfs_wq, &wq);
++		goto out;
++	}
++
++	set_current_state(TASK_UNINTERRUPTIBLE);
++	mutex_unlock(&spnfs->spnfs_plock);
++	schedule();
++	current->state = TASK_RUNNING;
++	remove_wait_queue(&spnfs->spnfs_wq, &wq);
++	mutex_lock(&spnfs->spnfs_plock);
++
++	if (im->im_status & SPNFS_STATUS_SUCCESS) {
++		/* copy our result from the upcall */
++		memcpy(res, &im->im_res, sizeof(*res));
++		ret = 0;
++	}
++
++out:
++	memset(im, 0, sizeof(*im));
++	mutex_unlock(&spnfs->spnfs_plock);
++	mutex_unlock(&spnfs->spnfs_lock);
++	return(ret);
++}
++
++/*
++ * This is used to determine if the spnfsd daemon has been started at
++ * least once since the system came up.  This is used to by the export
++ * mechanism to decide if spnfs is in use.
++ *
++ * Returns non-zero if the spnfsd has initialized the communication pipe
++ * at least once.
++ */
++int spnfs_enabled(void)
++{
++	return spnfs_enabled_at_some_point;
++}
++
++#ifdef CONFIG_PROC_FS
++
++/*
++ * procfs virtual files for user/kernel space communication:
++ *
++ * ctl - currently just an on/off switch...can be expanded
++ * getfh - fd to fh conversion
++ * recall - recall a layout from the command line, for example:
++ *		echo <path> > /proc/fs/spnfs/recall
++ * config - configuration info, e.g., stripe size, num ds, etc.
++ */
++
++/*-------------- start ctl -------------------------*/
++static ssize_t ctl_write(struct file *file, const char __user *buf,
++			 size_t count, loff_t *offset)
++{
++	int cmd, rc;
++
++	if (copy_from_user((int *)&cmd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (cmd) {
++		rc = nfsd_spnfs_new();
++		if (rc != 0)
++			return rc;
++	} else
++		nfsd_spnfs_delete();
++
++	return count;
++}
++
++static const struct file_operations ctl_ops = {
++	.write		= ctl_write,
++};
++/*-------------- end ctl ---------------------------*/
++
++/*-------------- start config -------------------------*/
++static ssize_t config_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	static struct spnfs_config cfg;
++
++	if (copy_from_user(&cfg, buf, count))
++		return -EFAULT;
++
++	spnfs_config = &cfg;
++	return 0;
++}
++
++static const struct file_operations config_ops = {
++	.write		= config_write,
++};
++/*-------------- end config ---------------------------*/
++
++/*-------------- start getfh -----------------------*/
++static int getfh_open(struct inode *inode, struct file *file)
++{
++	file->private_data = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
++	if (file->private_data == NULL)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static ssize_t getfh_read(struct file *file, char __user *buf, size_t count,
++			  loff_t *offset)
++{
++	if (copy_to_user(buf, file->private_data, sizeof(struct nfs_fh)))
++		return -EFAULT;
++
++	return count;
++}
++
++static ssize_t getfh_write(struct file *file, const char __user *buf,
++			   size_t count, loff_t *offset)
++{
++	int fd;
++
++	if (copy_from_user((int *)&fd, (int *)buf, sizeof(int)))
++		return -EFAULT;
++	if (spnfs_getfh(fd, file->private_data) != 0)
++		return -EIO;
++
++	return count;
++}
++
++static int getfh_release(struct inode *inode, struct file *file)
++{
++	kfree(file->private_data);
++	return 0;
++}
++
++static const struct file_operations getfh_ops = {
++	.open		= getfh_open,
++	.read		= getfh_read,
++	.write		= getfh_write,
++	.release	= getfh_release,
++};
++/*-------------- end getfh ------------------------*/
++
++
++/*-------------- start recall layout --------------*/
++static ssize_t recall_write(struct file *file, const char __user *buf,
++			    size_t count, loff_t *offset)
++{
++	char input[128];
++	char *path, *str, *p;
++	int rc;
++	u64 off = 0, len = 0;
++
++	if (count > 128)
++		return -EINVAL;
++
++	if (copy_from_user(input, buf, count))
++		return -EFAULT;
++
++	/* assumes newline-terminated path */
++	p = memchr(input, '\n', count);
++	if (p == NULL)
++		return -EINVAL;
++	*p = '\0';
++
++	/*
++	 * Scan for path and, optionally, an offset and length
++	 * of a layout segment to be recalled; if there are two
++	 * fields, they're assumed to be path and offset.
++	 */
++	p = input;
++	path = strsep(&p, " ");
++	if (path == NULL)
++		return -EINVAL;
++
++	str = strsep(&p, " ");
++	if (str != NULL) {
++		rc = strict_strtoull(str, 10, &off);
++		if (rc != 0)
++			return -EINVAL;
++
++		str = strsep(&p, " ");
++		if (str != NULL) {
++			rc = strict_strtoull(str, 10, &len);
++			if (rc != 0)
++				return -EINVAL;
++		}
++	}
++
++	rc = spnfs_test_layoutrecall(path, off, len);
++	if (rc != 0)
++		return rc;
++
++	return count;
++}
++
++static const struct file_operations recall_ops = {
++	.write		= recall_write,
++};
++/*-------------- end recall layout --------------*/
++
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++/*-------------- start layoutseg -------------------------*/
++static ssize_t layoutseg_write(struct file *file, const char __user *buf,
++			       size_t count, loff_t *offset)
++{
++	char cmd[3];
++
++	if (copy_from_user(cmd, buf, 1))
++		return -EFAULT;
++	if (cmd[0] == '0')
++		spnfs_use_layoutsegments = 0;
++	else
++		spnfs_use_layoutsegments = 1;
++
++	return count;
++}
++
++static const struct file_operations layoutseg_ops = {
++	.write		= layoutseg_write,
++};
++/*-------------- end layoutseg ---------------------------*/
++
++/*-------------- start layoutsegsize -------------------------*/
++static ssize_t layoutsegsize_write(struct file *file, const char __user *buf,
++				   size_t count, loff_t *offset)
++{
++	char cmd[50];
++
++	if (copy_from_user(cmd, buf, 49))
++		return -EFAULT;
++	layoutsegment_size = simple_strtoull(cmd, NULL, 10);
++
++	return count;
++}
++
++static const struct file_operations layoutsegsize_ops = {
++	.write		= layoutsegsize_write,
++};
++/*-------------- end layoutsegsize ---------------------------*/
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++int
++spnfs_init_proc(void)
++{
++	struct proc_dir_entry *entry;
++
++	entry = proc_mkdir("fs/spnfs", NULL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry = create_proc_entry("fs/spnfs/ctl", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &ctl_ops;
++
++	entry = create_proc_entry("fs/spnfs/config", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &config_ops;
++
++	entry = create_proc_entry("fs/spnfs/getfh", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &getfh_ops;
++
++	entry = create_proc_entry("fs/spnfs/recall", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &recall_ops;
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++	entry = create_proc_entry("fs/spnfs/layoutseg", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutseg_ops;
++
++	entry = create_proc_entry("fs/spnfs/layoutsegsize", 0, NULL);
++	if (!entry)
++		return -ENOMEM;
++	entry->proc_fops = &layoutsegsize_ops;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	return 0;
++}
++#endif /* CONFIG_PROC_FS */
+diff -up linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c
+--- linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c.orig	2010-09-30 10:17:08.938003000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/spnfs_ops.c	2010-09-30 10:17:08.940000000 -0400
+@@ -0,0 +1,878 @@
++/*
++ * fs/nfsd/spnfs_ops.c
++ *
++ * Communcation layer between spNFS kernel and userspace
++ *
++ */
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/namei.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfsd4_spnfs.h>
++#include <linux/nfsd/debug.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++#include <linux/nfsd/nfs4layoutxdr.h>
++
++#include "pnfsd.h"
++
++/* comment out CONFIG_SPNFS_TEST for non-test behaviour */
++/* #define CONFIG_SPNFS_TEST 1 */
++
++#define	NFSDDBG_FACILITY		NFSDDBG_PNFS
++
++/*
++ * The functions that are called from elsewhere in the kernel
++ * to perform tasks in userspace
++ *
++ */
++
++#ifdef CONFIG_SPNFS_LAYOUTSEGMENTS
++extern int spnfs_use_layoutsegments;
++extern uint64_t layoutsegment_size;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++extern struct spnfs *global_spnfs;
++
++int
++spnfs_layout_type(struct super_block *sb)
++{
++	return LAYOUT_NFSV4_1_FILES;
++}
++
++enum nfsstat4
++spnfs_layoutget(struct inode *inode, struct exp_xdr_stream *xdr,
++		const struct nfsd4_pnfs_layoutget_arg *lg_arg,
++		struct nfsd4_pnfs_layoutget_res *lg_res)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct pnfs_filelayout_layout *flp = NULL;
++	int status, i;
++	enum nfsstat4 nfserr;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++
++	im->im_type = SPNFS_TYPE_LAYOUTGET;
++	im->im_args.layoutget_args.inode = inode->i_ino;
++	im->im_args.layoutget_args.generation = inode->i_generation;
++
++	/* call function to queue the msg for upcall */
++	if (spnfs_upcall(spnfs, im, res) != 0) {
++		dprintk("failed spnfs upcall: layoutget\n");
++		nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		goto layoutget_cleanup;
++	}
++	status = res->layoutget_res.status;
++	if (status != 0) {
++		/* FIXME? until user mode is fixed, translate system error */
++		switch (status) {
++		case -E2BIG:
++		case -ETOOSMALL:
++			nfserr = NFS4ERR_TOOSMALL;
++			break;
++		case -ENOMEM:
++		case -EAGAIN:
++		case -EINTR:
++			nfserr = NFS4ERR_LAYOUTTRYLATER;
++			break;
++		case -ENOENT:
++			nfserr = NFS4ERR_BADLAYOUT;
++			break;
++ 		default:
++			nfserr = NFS4ERR_LAYOUTUNAVAILABLE;
++		}
++		dprintk("spnfs layout_get upcall: status=%d nfserr=%u\n",
++			status, nfserr);
++		goto layoutget_cleanup;
++	}
++
++	lg_res->lg_return_on_close = 0;
++#if defined(CONFIG_SPNFS_LAYOUTSEGMENTS)
++	/* if spnfs_use_layoutsegments & layoutsegment_size == 0, use */
++	/* the amount requested by the client.			      */
++	if (spnfs_use_layoutsegments) {
++		if (layoutsegment_size != 0)
++			lg_res->lg_seg.length = layoutsegment_size;
++	} else
++		lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#else
++	lg_res->lg_seg.length = NFS4_MAX_UINT64;
++#endif /* CONFIG_SPNFS_LAYOUTSEGMENTS */
++
++	flp = kmalloc(sizeof(struct pnfs_filelayout_layout), GFP_KERNEL);
++	if (flp == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	flp->device_id.sbid = lg_arg->lg_sbid;
++	flp->device_id.devid = res->layoutget_res.devid;
++	flp->lg_layout_type = 1; /* XXX */
++	flp->lg_stripe_type = res->layoutget_res.stripe_type;
++	flp->lg_commit_through_mds = 0;
++	flp->lg_stripe_unit =  res->layoutget_res.stripe_size;
++	flp->lg_first_stripe_index = 0;
++	flp->lg_pattern_offset = 0;
++	flp->lg_fh_length = res->layoutget_res.stripe_count;
++
++	flp->lg_fh_list = kmalloc(flp->lg_fh_length * sizeof(struct knfsd_fh),
++				  GFP_KERNEL);
++	if (flp->lg_fh_list == NULL) {
++		nfserr = NFS4ERR_LAYOUTTRYLATER;
++		goto layoutget_cleanup;
++	}
++	/*
++	 * FIX: Doing an extra copy here.  Should group res.flist's fh_len
++	 * and fh_val into a knfsd_fh structure.
++	 */
++	for (i = 0; i < flp->lg_fh_length; i++) {
++		flp->lg_fh_list[i].fh_size = res->layoutget_res.flist[i].fh_len;
++		memcpy(&flp->lg_fh_list[i].fh_base,
++		       res->layoutget_res.flist[i].fh_val,
++		       res->layoutget_res.flist[i].fh_len);
++	}
++
++	/* encode the layoutget body */
++	nfserr = filelayout_encode_layout(xdr, flp);
++
++layoutget_cleanup:
++	if (flp) {
++		if (flp->lg_fh_list)
++			kfree(flp->lg_fh_list);
++		kfree(flp);
++	}
++	kfree(im);
++	kfree(res);
++
++	return nfserr;
++}
++
++int
++spnfs_layoutcommit(void)
++{
++	return 0;
++}
++
++int
++spnfs_layoutreturn(struct inode *inode,
++		   const struct nfsd4_pnfs_layoutreturn_arg *args)
++{
++	return 0;
++}
++
++int
++spnfs_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
++{
++	struct super_block *sb;
++	struct nfsd4_pnfs_cb_layout lr;
++
++	switch (type) {
++	case RETURN_FILE:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for ino = %lu\n",
++			__func__, inode->i_ino);
++		break;
++	case RETURN_FSID:
++		sb = inode->i_sb;
++		dprintk("%s: recalling layout for fsid x (unimplemented)\n",
++			__func__);
++		return 0;
++	case RETURN_ALL:
++		/* XXX figure out how to get a sb since there's no inode ptr */
++		dprintk("%s: recalling all layouts (unimplemented)\n",
++			__func__);
++		return 0;
++	default:
++		return -EINVAL;
++	}
++
++	lr.cbl_recall_type = type;
++	lr.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
++	lr.cbl_seg.clientid = 0;
++	lr.cbl_seg.offset = offset;
++	lr.cbl_seg.length = len;
++	lr.cbl_seg.iomode = IOMODE_ANY;
++	lr.cbl_layoutchanged = 0;
++
++	nfsd_layout_recall_cb(sb, inode, &lr);
++
++	return 0;
++}
++
++
++int
++spnfs_test_layoutrecall(char *path, u64 offset, u64 len)
++{
++	struct nameidata nd;
++	struct inode *inode;
++	int type, rc;
++
++	dprintk("%s: path=%s, offset=%llu, len=%llu\n",
++		__func__, path, offset, len);
++
++	if (strcmp(path, "all") == 0) {
++		inode = NULL;
++		type = RETURN_ALL;
++	} else {
++		rc = path_lookup(path, 0, &nd);
++		if (rc != 0)
++			return -ENOENT;
++
++		/*
++		 * XXX todo: add a RETURN_FSID scenario here...maybe if
++		 * inode is a dir...
++		 */
++
++		inode = nd.path.dentry->d_inode;
++		type = RETURN_FILE;
++	}
++
++	if (len == 0)
++		len = NFS4_MAX_UINT64;
++
++	rc = spnfs_layoutrecall(inode, type, offset, len);
++
++	if (type != RETURN_ALL)
++		path_put(&nd.path);
++	return rc;
++}
++
++int
++spnfs_getdeviceiter(struct super_block *sb,
++		    u32 layout_type,
++		    struct nfsd4_pnfs_dev_iter_res *gd_res)
++{
++	struct spnfs *spnfs = global_spnfs;   /* XXX keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceiter_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEITER;
++	im->im_args.getdeviceiter_args.cookie = gd_res->gd_cookie;
++	im->im_args.getdeviceiter_args.verf = gd_res->gd_verf;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceiter_out;
++	}
++	status = res->getdeviceiter_res.status;
++
++	if (res->getdeviceiter_res.eof)
++		gd_res->gd_eof = 1;
++	else {
++		gd_res->gd_devid = res->getdeviceiter_res.devid;
++		gd_res->gd_cookie = res->getdeviceiter_res.cookie;
++		gd_res->gd_verf = res->getdeviceiter_res.verf;
++		gd_res->gd_eof = 0;
++	}
++
++getdeviceiter_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++#ifdef CONFIG_SPNFS_TEST
++/*
++ * Setup the rq_res xdr_buf.  The svc_rqst rq_respages[1] page contains the
++ * 1024 encoded stripe indices.
++ *
++ * Skip the devaddr4 length and encode the indicies count (1024) in the
++ * rq_res.head and set the rq_res.head length.
++ *
++ * Set the rq_res page_len to 4096 (for the 1024 stripe indices).
++ * Set the rq_res xdr_buf tail base to rq_respages[0] just after the
++ * rq_res head to hold the rest of the getdeviceinfo return.
++ *
++ * So rq_respages[rq_resused - 1] contains the rq_res.head and rq_res.tail and
++ * rq_respages[rq_resused] contains the rq_res.pages.
++ */
++static int spnfs_test_indices_xdr(struct pnfs_xdr_info *info,
++				  const struct pnfs_filelayout_device *fdev)
++{
++	struct nfsd4_compoundres *resp = info->resp;
++	struct svc_rqst *rqstp = resp->rqstp;
++	struct xdr_buf *xb = &resp->rqstp->rq_res;
++	__be32 *p;
++
++	p = nfsd4_xdr_reserve_space(resp, 8);
++	p++; /* Fill in length later */
++	*p++ = cpu_to_be32(fdev->fl_stripeindices_length); /* 1024 */
++	resp->p = p;
++
++	xb->head[0].iov_len = (char *)resp->p - (char *)xb->head[0].iov_base;
++	xb->pages = &rqstp->rq_respages[rqstp->rq_resused];
++	xb->page_base = 0;
++	xb->page_len = PAGE_SIZE; /* page of 1024 encoded indices */
++	xb->tail[0].iov_base = resp->p;
++	resp->end = xb->head[0].iov_base + PAGE_SIZE;
++	xb->tail[0].iov_len = (char *)resp->end - (char *)resp->p;
++	return 0;
++}
++/*
++ * Return a stripeindices of length 1024 to test
++ * the pNFS client multipage getdeviceinfo implementation.
++ *
++ * Encode a page of stripe indices.
++ */
++static void spnfs_set_test_indices(struct pnfs_filelayout_device *fldev,
++				  struct spnfs_device *dev,
++				  struct pnfs_devinfo_arg *info)
++{
++	struct svc_rqst *rqstp = info->xdr.resp->rqstp;
++	__be32 *p;
++	int i, j = 0;
++
++	p = (__be32 *)page_address(rqstp->rq_respages[rqstp->rq_resused]);
++	fldev->fl_stripeindices_length = 1024;
++	/* round-robin the data servers device index into the stripe indicie */
++	for (i = 0; i < 1024; i++) {
++		*p++ = cpu_to_be32(j);
++		if (j < dev->dscount - 1)
++			j++;
++		else
++			j = 0;
++	}
++	fldev->fl_stripeindices_list = NULL;
++}
++#endif /* CONFIG_SPNFS_TEST */
++
++int
++spnfs_getdeviceinfo(struct super_block *sb, struct exp_xdr_stream *xdr,
++		    u32 layout_type,
++		    const struct nfsd4_pnfs_deviceid *devid)
++{
++	struct spnfs *spnfs = global_spnfs;
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	struct spnfs_device *dev;
++	struct pnfs_filelayout_device *fldev = NULL;
++	struct pnfs_filelayout_multipath *mp = NULL;
++	struct pnfs_filelayout_devaddr *fldap = NULL;
++	int status = 0, i, len;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	im->im_type = SPNFS_TYPE_GETDEVICEINFO;
++	/* XXX FIX: figure out what to do about fsid */
++	im->im_args.getdeviceinfo_args.devid = devid->devid;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto getdeviceinfo_out;
++	}
++	status = res->getdeviceinfo_res.status;
++	if (status != 0)
++		goto getdeviceinfo_out;
++
++	dev = &res->getdeviceinfo_res.devinfo;
++
++	/* Fill in the device data, i.e., nfs4_1_file_layout_ds_addr4 */
++	fldev = kzalloc(sizeof(struct pnfs_filelayout_device), GFP_KERNEL);
++	if (fldev == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++
++	/*
++	 * Stripe count is the same as data server count for our purposes
++	 */
++	fldev->fl_stripeindices_length = dev->dscount;
++	fldev->fl_device_length = dev->dscount;
++
++	/* Set stripe indices */
++#ifdef CONFIG_SPNFS_TEST
++	spnfs_set_test_indices(fldev, dev, info);
++	fldev->fl_enc_stripe_indices = spnfs_test_indices_xdr;
++#else /* CONFIG_SPNFS_TEST */
++	fldev->fl_stripeindices_list =
++		kmalloc(fldev->fl_stripeindices_length * sizeof(u32),
++			GFP_KERNEL);
++	if (fldev->fl_stripeindices_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_stripeindices_length; i++)
++		fldev->fl_stripeindices_list[i] = i;
++#endif /* CONFIG_SPNFS_TEST */
++
++	/*
++	 * Set the device's data server addresses  No multipath for spnfs,
++	 * so mp length is always 1.
++	 *
++	 */
++	fldev->fl_device_list =
++		kmalloc(fldev->fl_device_length *
++			sizeof(struct pnfs_filelayout_multipath),
++			GFP_KERNEL);
++	if (fldev->fl_device_list == NULL) {
++		status = -ENOMEM;
++		goto getdeviceinfo_out;
++	}
++	for (i = 0; i < fldev->fl_device_length; i++) {
++		mp = &fldev->fl_device_list[i];
++		mp->fl_multipath_length = 1;
++		mp->fl_multipath_list =
++			kmalloc(sizeof(struct pnfs_filelayout_devaddr),
++				GFP_KERNEL);
++		if (mp->fl_multipath_list == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		fldap = mp->fl_multipath_list;
++
++		/*
++		 * Copy the netid into the device address, for example: "tcp"
++		 */
++		len = strlen(dev->dslist[i].netid);
++		fldap->r_netid.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_netid.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_netid.data, dev->dslist[i].netid, len);
++		fldap->r_netid.len = len;
++
++		/*
++		 * Copy the network address into the device address,
++		 * for example: "10.35.9.16.08.01"
++		 */
++		len = strlen(dev->dslist[i].addr);
++		fldap->r_addr.data = kmalloc(len, GFP_KERNEL);
++		if (fldap->r_addr.data == NULL) {
++			status = -ENOMEM;
++			goto getdeviceinfo_out;
++		}
++		memcpy(fldap->r_addr.data, dev->dslist[i].addr, len);
++		fldap->r_addr.len = len;
++	}
++
++	/* encode the device data */
++	status = filelayout_encode_devinfo(xdr, fldev);
++
++getdeviceinfo_out:
++	if (fldev) {
++		kfree(fldev->fl_stripeindices_list);
++		if (fldev->fl_device_list) {
++			for (i = 0; i < fldev->fl_device_length; i++) {
++				fldap =
++				    fldev->fl_device_list[i].fl_multipath_list;
++				kfree(fldap->r_netid.data);
++				kfree(fldap->r_addr.data);
++				kfree(fldap);
++			}
++			kfree(fldev->fl_device_list);
++		}
++		kfree(fldev);
++	}
++
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_setattr(void)
++{
++	return 0;
++}
++
++int
++spnfs_open(struct inode *inode, struct nfsd4_open *open)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto open_out;
++	}
++
++	im->im_type = SPNFS_TYPE_OPEN;
++	im->im_args.open_args.inode = inode->i_ino;
++	im->im_args.open_args.generation = inode->i_generation;
++	im->im_args.open_args.create = open->op_create;
++	im->im_args.open_args.createmode = open->op_createmode;
++	im->im_args.open_args.truncate = open->op_truncate;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto open_out;
++	}
++	status = res->open_res.status;
++
++open_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++int
++spnfs_create(void)
++{
++	return 0;
++}
++
++/*
++ * Invokes the spnfsd with the inode number of the object to remove.
++ * The file has already been removed on the MDS, so all the spnsfd
++ * daemon does is remove the stripes.
++ * Returns 0 on success otherwise error code
++ */
++int
++spnfs_remove(unsigned long ino, unsigned long generation)
++{
++	struct spnfs *spnfs = global_spnfs; /* keep up the pretence */
++	struct spnfs_msg *im = NULL;
++	union spnfs_msg_res *res = NULL;
++	int status = 0;
++
++	im = kmalloc(sizeof(struct spnfs_msg), GFP_KERNEL);
++	if (im == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	res = kmalloc(sizeof(union spnfs_msg_res), GFP_KERNEL);
++	if (res == NULL) {
++		status = -ENOMEM;
++		goto remove_out;
++	}
++
++	im->im_type = SPNFS_TYPE_REMOVE;
++	im->im_args.remove_args.inode = ino;
++	im->im_args.remove_args.generation = generation;
++
++	/* call function to queue the msg for upcall */
++	status = spnfs_upcall(spnfs, im, res);
++	if (status != 0) {
++		dprintk("%s spnfs upcall failure: %d\n", __func__, status);
++		status = -EIO;
++		goto remove_out;
++	}
++	status = res->remove_res.status;
++
++remove_out:
++	kfree(im);
++	kfree(res);
++
++	return status;
++}
++
++static int
++read_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	 struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_read(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		if (err == 0)
++			break;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++     struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDONLY | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0 ; vnum < vlen ; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = read_one(inode, offset + bytecount, iolen,
++			       (char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err < 0) {
++			status = nfserr_io;
++			goto read_out;
++		}
++		if (err < iolen) {
++			bytecount += err;
++			goto read_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++read_out:
++	*lenp = bytecount;
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++	return status;
++}
++
++__be32
++spnfs_read(struct inode *inode, loff_t offset, unsigned long *lenp, int vlen,
++	   struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return read(inode, offset, lenp, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++static int
++write_one(struct inode *inode, loff_t offset, size_t len, char *buf,
++	  struct file **filp)
++{
++	loff_t bufoffset = 0, soffset, pos, snum, soff, tmp;
++	size_t iolen;
++	int completed = 0, ds, err;
++
++	while (len > 0) {
++		tmp = offset;
++		soff = do_div(tmp, spnfs_config->stripe_size);
++		snum = tmp;
++		ds = do_div(tmp, spnfs_config->num_ds);
++		if (spnfs_config->dense_striping == 0)
++			soffset = offset;
++		else {
++			tmp = snum;
++			do_div(tmp, spnfs_config->num_ds);
++			soffset = tmp * spnfs_config->stripe_size + soff;
++		}
++		if (len < spnfs_config->stripe_size - soff)
++			iolen = len;
++		else
++			iolen = spnfs_config->stripe_size - soff;
++
++		pos = soffset;
++		err = vfs_write(filp[ds], buf + bufoffset, iolen, &pos);
++		if (err < 0)
++			return -EIO;
++		filp[ds]->f_pos = pos;
++		iolen = err;
++		completed += iolen;
++		len -= iolen;
++		offset += iolen;
++		bufoffset += iolen;
++	}
++
++	return completed;
++}
++
++static __be32
++write(struct inode *inode, loff_t offset, size_t len, int vlen,
++      struct svc_rqst *rqstp)
++{
++	int i, vnum, err, bytecount = 0;
++	char path[128];
++	struct file *filp[SPNFS_MAX_DATA_SERVERS];
++	size_t iolen;
++	__be32 status = nfs_ok;
++
++	/*
++	 * XXX We should just be doing this at open time, but it gets
++	 * kind of messy storing this info in nfsd's state structures
++	 * and piggybacking its path through the various state handling
++	 * functions.  Revisit this.
++	 */
++	memset(filp, 0, SPNFS_MAX_DATA_SERVERS * sizeof(struct file *));
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		sprintf(path, "%s/%ld.%u", spnfs_config->ds_dir[i],
++			inode->i_ino, inode->i_generation);
++		filp[i] = filp_open(path, O_RDWR | O_LARGEFILE, 0);
++		if (filp[i] == NULL) {
++			status = nfserr_io;
++			goto write_out;
++		}
++		get_file(filp[i]);
++	}
++
++	for (vnum = 0; vnum < vlen; vnum++) {
++		iolen = rqstp->rq_vec[vnum].iov_len;
++		err = write_one(inode, offset + bytecount, iolen,
++				(char *)rqstp->rq_vec[vnum].iov_base, filp);
++		if (err != iolen) {
++			dprintk("spnfs_write: err=%d expected %Zd\n", err, len);
++			status = nfserr_io;
++			goto write_out;
++		}
++		bytecount += rqstp->rq_vec[vnum].iov_len;
++	}
++
++write_out:
++	for (i = 0; i < spnfs_config->num_ds; i++) {
++		if (filp[i]) {
++			filp_close(filp[i], current->files);
++			fput(filp[i]);
++		}
++	}
++
++	return status;
++}
++
++__be32
++spnfs_write(struct inode *inode, loff_t offset, size_t len, int vlen,
++	    struct svc_rqst *rqstp)
++{
++	if (spnfs_config)
++		return write(inode, offset, len, vlen, rqstp);
++	else {
++		printk(KERN_ERR "Please upgrade to latest spnfsd\n");
++		return nfserr_notsupp;
++	}
++}
++
++int
++spnfs_commit(void)
++{
++	return 0;
++}
++
++/*
++ * Return the state for this object.
++ * At this time simply return 0 to indicate success and use the existing state
++ */
++int
++spnfs_get_state(struct inode *inode, struct knfsd_fh *fh, struct pnfs_get_state *arg)
++{
++	return 0;
++}
++
++/*
++ * Return the filehandle for the specified file descriptor
++ */
++int
++spnfs_getfh(int fd, struct nfs_fh *fh)
++{
++	struct file *file;
++
++	file = fget(fd);
++	if (file == NULL)
++		return -EIO;
++
++	memcpy(fh, NFS_FH(file->f_dentry->d_inode), sizeof(struct nfs_fh));
++	fput(file);
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfsd/state.h.orig linux-2.6.34.noarch/fs/nfsd/state.h
+--- linux-2.6.34.noarch/fs/nfsd/state.h.orig	2010-09-30 10:15:18.375737000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/state.h	2010-09-30 10:17:08.964002000 -0400
+@@ -242,6 +242,12 @@ struct nfs4_client {
+ 	u32			cl_cb_seq_nr;
+ 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+ 						/* wait here for slots */
++#if defined(CONFIG_PNFSD)
++	struct list_head	cl_layouts;	/* outstanding layouts */
++	struct list_head	cl_layoutrecalls; /* outstanding layoutrecall
++						     callbacks */
++	atomic_t		cl_deviceref;	/* Num outstanding devs */
++#endif /* CONFIG_PNFSD */
+ };
+ 
+ static inline void
+@@ -342,12 +348,31 @@ struct nfs4_file {
+ 	struct list_head        fi_hash;    /* hash by "struct inode *" */
+ 	struct list_head        fi_stateids;
+ 	struct list_head	fi_delegations;
++#if defined(CONFIG_PNFSD)
++	struct list_head	fi_layouts;
++	struct list_head	fi_layout_states;
++#endif /* CONFIG_PNFSD */
+ 	struct inode		*fi_inode;
+ 	u32                     fi_id;      /* used with stateowner->so_id 
+ 					     * for stateid_hashtbl hash */
+ 	bool			fi_had_conflict;
++#if defined(CONFIG_PNFSD)
++	/* used by layoutget / layoutrecall */
++	struct nfs4_fsid	fi_fsid;
++	u32			fi_fhlen;
++	u8			fi_fhval[NFS4_FHSIZE];
++#endif /* CONFIG_PNFSD */
+ };
+ 
++#if defined(CONFIG_PNFSD)
++/* pNFS Metadata server state */
++
++struct pnfs_ds_dev_entry {
++	struct list_head	dd_dev_entry; /* st_pnfs_ds_id entry */
++	u32			dd_dsid;
++};
++#endif /* CONFIG_PNFSD */
++
+ /*
+ * nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+ *
+@@ -370,6 +395,9 @@ struct nfs4_stateid {
+ 	struct list_head              st_perfile;
+ 	struct list_head              st_perstateowner;
+ 	struct list_head              st_lockowners;
++#if defined(CONFIG_PNFSD)
++	struct list_head              st_pnfs_ds_id;
++#endif /* CONFIG_PNFSD */
+ 	struct nfs4_stateowner      * st_stateowner;
+ 	struct nfs4_file            * st_file;
+ 	stateid_t                     st_stateid;
+@@ -421,6 +449,34 @@ extern void nfsd4_recdir_purge_old(void)
+ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+ extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+ extern void release_session_client(struct nfsd4_session *);
++extern void nfsd4_free_slab(struct kmem_cache **);
++extern struct nfs4_file *find_file(struct inode *);
++extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
++extern void put_nfs4_file(struct nfs4_file *);
++extern void get_nfs4_file(struct nfs4_file *);
++extern struct nfs4_client *find_confirmed_client(clientid_t *);
++extern struct nfs4_stateid *find_stateid(stateid_t *, int flags);
++extern struct nfs4_delegation *find_delegation_stateid(struct inode *, stateid_t *);
++extern __be32 nfs4_check_stateid(stateid_t *);
++extern void expire_client_lock(struct nfs4_client *);
++extern int filter_confirmed_clients(int (* func)(struct nfs4_client *, void *), void *);
++
++#if defined(CONFIG_PNFSD)
++extern int nfsd4_init_pnfs_slabs(void);
++extern void nfsd4_free_pnfs_slabs(void);
++extern void pnfs_expire_client(struct nfs4_client *);
++extern void release_pnfs_ds_dev_list(struct nfs4_stateid *);
++extern void nfs4_pnfs_state_init(void);
++extern void nfs4_pnfs_state_shutdown(void);
++extern void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *);
++extern int nfs4_preprocess_pnfs_ds_stateid(struct svc_fh *, stateid_t *);
++#else /* CONFIG_PNFSD */
++static inline void nfsd4_free_pnfs_slabs(void) {}
++static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
++static inline void pnfs_expire_client(struct nfs4_client *clp) {}
++static inline void release_pnfs_ds_dev_list(struct nfs4_stateid *stp) {}
++static inline void nfs4_pnfs_state_shutdown(void) {}
++#endif /* CONFIG_PNFSD */
+ 
+ static inline void
+ nfs4_put_stateowner(struct nfs4_stateowner *so)
+@@ -434,4 +490,24 @@ nfs4_get_stateowner(struct nfs4_stateown
+ 	kref_get(&so->so_ref);
+ }
+ 
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
+ #endif   /* NFSD4_STATE_H */
+diff -up linux-2.6.34.noarch/fs/nfsd/vfs.c.orig linux-2.6.34.noarch/fs/nfsd/vfs.c
+--- linux-2.6.34.noarch/fs/nfsd/vfs.c.orig	2010-09-30 10:15:05.090335000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/vfs.c	2010-09-30 10:17:08.970001000 -0400
+@@ -37,7 +37,12 @@
+ #ifdef CONFIG_NFSD_V4
+ #include <linux/nfs4_acl.h>
+ #include <linux/nfsd_idmap.h>
++#include <linux/security.h>
++#include <linux/nfsd4_spnfs.h>
+ #endif /* CONFIG_NFSD_V4 */
++#if defined(CONFIG_SPNFS_BLOCK)
++#include <linux/nfsd4_block.h>
++#endif
+ 
+ #include "nfsd.h"
+ #include "vfs.h"
+@@ -383,6 +388,12 @@ nfsd_setattr(struct svc_rqst *rqstp, str
+ 					NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
+ 			if (err)
+ 				goto out;
++#if defined(CONFIG_SPNFS_BLOCK)
++			if (pnfs_block_enabled(inode, 0)) {
++				err = bl_layoutrecall(inode, RETURN_FILE,
++				    iap->ia_size, inode->i_size - iap->ia_size);
++			}
++#endif /* CONFIG_SPNFS_BLOCK */
+ 		}
+ 
+ 		/*
+@@ -1703,6 +1714,11 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	struct inode	*fdir, *tdir;
+ 	__be32		err;
+ 	int		host_err;
++#ifdef CONFIG_SPNFS
++	unsigned long ino = 0;
++	unsigned long generation = 0;
++	unsigned int nlink = 0;
++#endif /* CONFIG_SPNFS */
+ 
+ 	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
+ 	if (err)
+@@ -1766,7 +1782,26 @@ nfsd_rename(struct svc_rqst *rqstp, stru
+ 	if (host_err)
+ 		goto out_dput_new;
+ 
++#ifdef CONFIG_SPNFS
++	/*
++	 * if the target is a preexisting regular file, remember the
++	 * inode number and generation so we can delete the stripes;
++	 * save the link count as well so that the stripes only get
++	 * get deleted when the last link is deleted
++	 */
++	if (ndentry && ndentry->d_inode && S_ISREG(ndentry->d_inode->i_mode)) {
++		ino = ndentry->d_inode->i_ino;
++		generation = ndentry->d_inode->i_generation;
++		nlink = ndentry->d_inode->i_nlink;
++	}
++#endif /* CONFIG_SPNFS */
++
+ 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
++#ifdef CONFIG_SPNFS
++	if (spnfs_enabled() && (!host_err && ino && nlink == 1))
++		spnfs_remove(ino, generation);
++#endif /* CONFIG_SPNFS */
++
+ 	if (!host_err) {
+ 		host_err = commit_metadata(tfhp);
+ 		if (!host_err)
+@@ -1807,6 +1842,11 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	struct inode	*dirp;
+ 	__be32		err;
+ 	int		host_err;
++#if defined(CONFIG_SPNFS)
++	unsigned long	ino;
++	unsigned long	generation;
++	unsigned int	nlink;
++#endif /* defined(CONFIG_SPNFS) */
+ 
+ 	err = nfserr_acces;
+ 	if (!flen || isdotent(fname, flen))
+@@ -1830,6 +1870,17 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 		goto out;
+ 	}
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * Remember the inode number to communicate to the spnfsd
++	 * for removal of stripes; save the link count as well so that
++	 * the stripes only get get deleted when the last link is deleted
++	 */
++	ino = rdentry->d_inode->i_ino;
++	generation = rdentry->d_inode->i_generation;
++	nlink = rdentry->d_inode->i_nlink;
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	if (!type)
+ 		type = rdentry->d_inode->i_mode & S_IFMT;
+ 
+@@ -1854,6 +1905,29 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
+ 	if (!host_err)
+ 		host_err = commit_metadata(fhp);
+ 
++#if defined(CONFIG_SPNFS)
++	/*
++	 * spnfs: notify spnfsd of removal to destroy stripes
++	 */
++/*
++	sb = current_fh->fh_dentry->d_inode->i_sb;
++	if (sb->s_export_op->spnfs_remove) {
++*/
++	dprintk("%s check if spnfs_enabled\n", __FUNCTION__);
++	if (spnfs_enabled() && nlink == 1) {
++		BUG_ON(ino == 0);
++		dprintk("%s calling spnfs_remove inumber=%ld\n",
++			__FUNCTION__, ino);
++		if (spnfs_remove(ino, generation) == 0) {
++			dprintk("%s spnfs_remove success\n", __FUNCTION__);
++		} else {
++			/* XXX How do we make this atomic? */
++			printk(KERN_WARNING "nfsd: pNFS could not "
++				"remove stripes for inode: %ld\n", ino);
++		}
++	}
++#endif /* defined(CONFIG_SPNFS) */
++
+ 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ out_nfserr:
+ 	err = nfserrno(host_err);
+diff -up linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig linux-2.6.34.noarch/fs/nfsd/xdr4.h
+--- linux-2.6.34.noarch/fs/nfsd/xdr4.h.orig	2010-09-30 10:15:18.395731000 -0400
++++ linux-2.6.34.noarch/fs/nfsd/xdr4.h	2010-09-30 10:17:08.978004000 -0400
+@@ -37,6 +37,8 @@
+ #ifndef _LINUX_NFSD_XDR4_H
+ #define _LINUX_NFSD_XDR4_H
+ 
++#include <linux/nfsd/nfsd4_pnfs.h>
++
+ #include "state.h"
+ #include "nfsd.h"
+ 
+@@ -385,6 +387,51 @@ struct nfsd4_reclaim_complete {
+ 	u32 rca_one_fs;
+ };
+ 
++struct nfsd4_pnfs_getdevinfo {
++	struct nfsd4_pnfs_deviceid gd_devid;	/* request */
++	u32			gd_layout_type;	/* request */
++	u32			gd_maxcount;	/* request */
++	u32			gd_notify_types;/* request */
++	struct super_block	*gd_sb;
++};
++
++struct nfsd4_pnfs_getdevlist {
++	u32             gd_layout_type;	/* request */
++	u32		gd_maxdevices;	/* request */
++	u64		gd_cookie;	/* request - response */
++	u64		gd_verf;	/* request - response */
++	struct svc_fh 	*gd_fhp;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++struct nfsd4_pnfs_layoutget {
++	u64			lg_minlength;	/* request */
++	u32			lg_signal;	/* request */
++	u32			lg_maxcount;	/* request */
++	struct svc_fh		*lg_fhp;	/* request */
++	stateid_t		lg_sid;		/* request/response */
++	struct nfsd4_layout_seg	lg_seg;		/* request/response */
++	u32			lg_roc;		/* response */
++};
++
++struct nfsd4_pnfs_layoutcommit {
++	struct nfsd4_pnfs_layoutcommit_arg args;
++	stateid_t		lc_sid;		/* request */
++	struct nfsd4_pnfs_layoutcommit_res res;
++};
++
++enum layoutreturn_flags {
++	LR_FLAG_INTERN = 1 << 0,	/* internal return */
++	LR_FLAG_EXPIRE = 1 << 1,	/* return on client expiration */
++};
++
++struct nfsd4_pnfs_layoutreturn {
++	struct nfsd4_pnfs_layoutreturn_arg args;
++	u32			lr_flags;
++	stateid_t		lr_sid;		/* request/resopnse */
++	u32			lrs_present;	/* response */
++};
++
+ struct nfsd4_op {
+ 	int					opnum;
+ 	__be32					status;
+@@ -426,6 +473,13 @@ struct nfsd4_op {
+ 		struct nfsd4_destroy_session	destroy_session;
+ 		struct nfsd4_sequence		sequence;
+ 		struct nfsd4_reclaim_complete	reclaim_complete;
++#if defined(CONFIG_PNFSD)
++		struct nfsd4_pnfs_getdevlist	pnfs_getdevlist;
++		struct nfsd4_pnfs_getdevinfo	pnfs_getdevinfo;
++		struct nfsd4_pnfs_layoutget	pnfs_layoutget;
++		struct nfsd4_pnfs_layoutcommit	pnfs_layoutcommit;
++		struct nfsd4_pnfs_layoutreturn	pnfs_layoutreturn;
++#endif /* CONFIG_PNFSD */
+ 	} u;
+ 	struct nfs4_replay *			replay;
+ };
+diff -up linux-2.6.34.noarch/fs/nfs/file.c.orig linux-2.6.34.noarch/fs/nfs/file.c
+--- linux-2.6.34.noarch/fs/nfs/file.c.orig	2010-09-30 10:15:17.741713000 -0400
++++ linux-2.6.34.noarch/fs/nfs/file.c	2010-09-30 10:17:08.626991000 -0400
+@@ -36,6 +36,7 @@
+ #include "internal.h"
+ #include "iostat.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_FILE
+ 
+@@ -388,12 +389,17 @@ static int nfs_write_begin(struct file *
+ 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ 	struct page *page;
+ 	int once_thru = 0;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+ 		file->f_path.dentry->d_name.name,
+ 		mapping->host->i_ino, len, (long long) pos);
+ 
++	pnfs_update_layout(mapping->host,
++			   nfs_file_open_context(file),
++			   0, NFS4_MAX_UINT64, IOMODE_RW,
++			   &lseg);
+ start:
+ 	/*
+ 	 * Prevent starvation issues if someone is doing a consistency
+@@ -402,17 +408,22 @@ start:
+ 	ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ 			nfs_wait_bit_killable, TASK_KILLABLE);
+ 	if (ret)
+-		return ret;
++		goto out;
+ 
+ 	page = grab_cache_page_write_begin(mapping, index, flags);
+-	if (!page)
+-		return -ENOMEM;
++	if (!page) {
++		ret = -ENOMEM;
++		goto out;
++	}
+ 	*pagep = page;
+ 
+-	ret = nfs_flush_incompatible(file, page);
++	ret = nfs_flush_incompatible(file, page, lseg);
+ 	if (ret) {
+ 		unlock_page(page);
+ 		page_cache_release(page);
++		*pagep = NULL;
++		*fsdata = NULL;
++		goto out;
+ 	} else if (!once_thru &&
+ 		   nfs_want_read_modify_write(file, page, pos, len)) {
+ 		once_thru = 1;
+@@ -421,6 +432,12 @@ start:
+ 		if (!ret)
+ 			goto start;
+ 	}
++	ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
++ out:
++	if (ret) {
++		put_lseg(lseg);
++		*fsdata = NULL;
++	}
+ 	return ret;
+ }
+ 
+@@ -430,6 +447,7 @@ static int nfs_write_end(struct file *fi
+ {
+ 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ 	int status;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
+ 		file->f_path.dentry->d_parent->d_name.name,
+@@ -456,10 +474,17 @@ static int nfs_write_end(struct file *fi
+ 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+ 	}
+ 
+-	status = nfs_updatepage(file, page, offset, copied);
++	lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
++	status = pnfs_write_end(file, page, pos, len, copied, lseg);
++	if (status)
++		goto out;
++	status = nfs_updatepage(file, page, offset, copied, lseg, fsdata);
+ 
++ out:
+ 	unlock_page(page);
+ 	page_cache_release(page);
++	pnfs_write_end_cleanup(file, fsdata);
++	put_lseg(lseg);
+ 
+ 	if (status < 0)
+ 		return status;
+@@ -570,6 +595,8 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	/* make sure the cache has finished storing the page */
+ 	nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
+ 
++	/* XXX Do we want to call pnfs_update_layout here? */
++
+ 	lock_page(page);
+ 	mapping = page->mapping;
+ 	if (mapping != dentry->d_inode->i_mapping)
+@@ -580,11 +607,11 @@ static int nfs_vm_page_mkwrite(struct vm
+ 	if (pagelen == 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_flush_incompatible(filp, page);
++	ret = nfs_flush_incompatible(filp, page, NULL);
+ 	if (ret != 0)
+ 		goto out_unlock;
+ 
+-	ret = nfs_updatepage(filp, page, 0, pagelen);
++	ret = nfs_updatepage(filp, page, 0, pagelen, NULL, NULL);
+ out_unlock:
+ 	if (!ret)
+ 		return VM_FAULT_LOCKED;
+diff -up linux-2.6.34.noarch/fs/nfs/inode.c.orig linux-2.6.34.noarch/fs/nfs/inode.c
+--- linux-2.6.34.noarch/fs/nfs/inode.c.orig	2010-09-30 10:15:17.769716000 -0400
++++ linux-2.6.34.noarch/fs/nfs/inode.c	2010-09-30 10:17:08.632991000 -0400
+@@ -48,6 +48,7 @@
+ #include "internal.h"
+ #include "fscache.h"
+ #include "dns_resolve.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -278,7 +279,7 @@ nfs_fhget(struct super_block *sb, struct
+ 		 */
+ 		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
+ 		if (S_ISREG(inode->i_mode)) {
+-			inode->i_fop = &nfs_file_operations;
++			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ 			inode->i_data.a_ops = &nfs_file_aops;
+ 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
+ 		} else if (S_ISDIR(inode->i_mode)) {
+@@ -530,6 +531,68 @@ out:
+ 	return err;
+ }
+ 
++static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
++{
++	atomic_set(&l_ctx->count, 1);
++	l_ctx->lockowner = current->files;
++	l_ctx->pid = current->tgid;
++	INIT_LIST_HEAD(&l_ctx->list);
++}
++
++static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *pos;
++
++	list_for_each_entry(pos, &ctx->lock_context.list, list) {
++		if (pos->lockowner != current->files)
++			continue;
++		if (pos->pid != current->tgid)
++			continue;
++		atomic_inc(&pos->count);
++		return pos;
++	}
++	return NULL;
++}
++
++struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
++{
++	struct nfs_lock_context *res, *new = NULL;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	spin_lock(&inode->i_lock);
++	res = __nfs_find_lock_context(ctx);
++	if (res == NULL) {
++		spin_unlock(&inode->i_lock);
++		new = kmalloc(sizeof(*new), GFP_KERNEL);
++		if (new == NULL)
++			return NULL;
++		nfs_init_lock_context(new);
++		spin_lock(&inode->i_lock);
++		res = __nfs_find_lock_context(ctx);
++		if (res == NULL) {
++			list_add_tail(&new->list, &ctx->lock_context.list);
++			new->open_context = ctx;
++			res = new;
++			new = NULL;
++		}
++	}
++	spin_unlock(&inode->i_lock);
++	kfree(new);
++	return res;
++}
++
++void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
++{
++	struct nfs_open_context *ctx = l_ctx->open_context;
++	struct inode *inode = ctx->path.dentry->d_inode;
++
++	if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
++		return;
++	list_del(&l_ctx->list);
++	spin_unlock(&inode->i_lock);
++	kfree(l_ctx);
++}
++
+ /**
+  * nfs_close_context - Common close_context() routine NFSv2/v3
+  * @ctx: pointer to context
+@@ -566,11 +629,11 @@ static struct nfs_open_context *alloc_nf
+ 		path_get(&ctx->path);
+ 		ctx->cred = get_rpccred(cred);
+ 		ctx->state = NULL;
+-		ctx->lockowner = current->files;
+ 		ctx->flags = 0;
+ 		ctx->error = 0;
+ 		ctx->dir_cookie = 0;
+-		atomic_set(&ctx->count, 1);
++		nfs_init_lock_context(&ctx->lock_context);
++		ctx->lock_context.open_context = ctx;
+ 	}
+ 	return ctx;
+ }
+@@ -578,15 +641,16 @@ static struct nfs_open_context *alloc_nf
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ 	if (ctx != NULL)
+-		atomic_inc(&ctx->count);
++		atomic_inc(&ctx->lock_context.count);
+ 	return ctx;
+ }
++EXPORT_SYMBOL(get_nfs_open_context);
+ 
+ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
+ {
+ 	struct inode *inode = ctx->path.dentry->d_inode;
+ 
+-	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
++	if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+ 		return;
+ 	list_del(&ctx->list);
+ 	spin_unlock(&inode->i_lock);
+@@ -933,6 +997,7 @@ void nfs_fattr_init(struct nfs_fattr *fa
+ 	fattr->time_start = jiffies;
+ 	fattr->gencount = nfs_inc_attr_generation_counter();
+ }
++EXPORT_SYMBOL(nfs_fattr_init);
+ 
+ struct nfs_fattr *nfs_alloc_fattr(void)
+ {
+@@ -1142,6 +1207,14 @@ static int nfs_update_inode(struct inode
+ 		server->fsid = fattr->fsid;
+ 
+ 	/*
++	 * file needs layout commit, server attributes may be stale
++	 */
++	if (layoutcommit_needed(nfsi) && nfsi->change_attr >= fattr->change_attr) {
++		dprintk("NFS: %s: layoutcommit is needed for file %s/%ld\n",
++			__func__, inode->i_sb->s_id, inode->i_ino);
++		return 0;
++	}
++	/*
+ 	 * Update the read time so we don't revalidate too often.
+ 	 */
+ 	nfsi->read_cache_jiffies = fattr->time_start;
+@@ -1340,9 +1413,10 @@ static int nfs_update_inode(struct inode
+  */
+ void nfs4_clear_inode(struct inode *inode)
+ {
++	pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	/* If we are holding a delegation, return it! */
+ 	nfs_inode_return_delegation_noreclaim(inode);
+-	/* First call standard NFS clear_inode() code */
+ 	nfs_clear_inode(inode);
+ }
+ #endif
+@@ -1367,7 +1441,10 @@ struct inode *nfs_alloc_inode(struct sup
+ 
+ void nfs_destroy_inode(struct inode *inode)
+ {
+-	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
++	struct nfs_inode *nfsi = NFS_I(inode);
++
++	pnfs_destroy_layout(nfsi);
++	kmem_cache_free(nfs_inode_cachep, nfsi);
+ }
+ 
+ static inline void nfs4_init_once(struct nfs_inode *nfsi)
+@@ -1377,6 +1454,11 @@ static inline void nfs4_init_once(struct
+ 	nfsi->delegation = NULL;
+ 	nfsi->delegation_state = 0;
+ 	init_rwsem(&nfsi->rwsem);
++#ifdef CONFIG_NFS_V4_1
++	init_waitqueue_head(&nfsi->lo_waitq);
++	nfsi->pnfs_layout_suspend = 0;
++	nfsi->layout = NULL;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif
+ }
+ 
+@@ -1488,6 +1570,12 @@ static int __init init_nfs_fs(void)
+ 	if (err)
+ 		goto out0;
+ 
++#ifdef CONFIG_NFS_V4_1
++	err = pnfs_initialize();
++	if (err)
++		goto out00;
++#endif /* CONFIG_NFS_V4_1 */
++
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_register(&nfs_rpcstat);
+ #endif
+@@ -1498,6 +1586,10 @@ out:
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++out00:
++	pnfs_uninitialize();
++#endif /* CONFIG_NFS_V4_1 */
+ 	nfs_destroy_directcache();
+ out0:
+ 	nfs_destroy_writepagecache();
+@@ -1531,6 +1623,9 @@ static void __exit exit_nfs_fs(void)
+ #ifdef CONFIG_PROC_FS
+ 	rpc_proc_unregister("nfs");
+ #endif
++#ifdef CONFIG_NFS_V4_1
++	pnfs_uninitialize();
++#endif
+ 	unregister_nfs_fs();
+ 	nfs_fs_proc_exit();
+ 	nfsiod_stop();
+diff -up linux-2.6.34.noarch/fs/nfs/internal.h.orig linux-2.6.34.noarch/fs/nfs/internal.h
+--- linux-2.6.34.noarch/fs/nfs/internal.h.orig	2010-09-30 10:15:17.775713000 -0400
++++ linux-2.6.34.noarch/fs/nfs/internal.h	2010-09-30 10:17:08.637996000 -0400
+@@ -139,6 +139,16 @@ extern struct nfs_server *nfs_clone_serv
+ 					   struct nfs_fattr *);
+ extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
+ extern int nfs4_check_client_ready(struct nfs_client *clp);
++extern int nfs_sockaddr_cmp(const struct sockaddr *sa1,
++		const struct sockaddr *sa2);
++extern int nfs4_set_client(struct nfs_server *server,
++		const char *hostname,
++		const struct sockaddr *addr,
++		const size_t addrlen,
++		const char *ip_addr,
++		rpc_authflavor_t authflavour,
++		int proto, const struct rpc_timeout *timeparms,
++		u32 minorversion);
+ #ifdef CONFIG_PROC_FS
+ extern int __init nfs_fs_proc_init(void);
+ extern void nfs_fs_proc_exit(void);
+@@ -201,6 +211,8 @@ extern const u32 nfs41_maxwrite_overhead
+ extern struct rpc_procinfo nfs4_procedures[];
+ #endif
+ 
++extern int nfs4_recover_expired_lease(struct nfs_client *clp);
++
+ /* proc.c */
+ void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+ 
+@@ -248,10 +260,31 @@ extern int nfs4_get_rootfh(struct nfs_se
+ #endif
+ 
+ /* read.c */
++extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
++extern int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++			     const struct rpc_call_ops *call_ops);
+ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+ 
+ /* write.c */
++extern int nfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int pnfs_initiate_write(struct nfs_write_data *data,
++			      struct rpc_clnt *clnt,
++			      const struct rpc_call_ops *call_ops,
++			      int how);
++extern int nfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++			       int how);
++extern int pnfs_initiate_commit(struct nfs_write_data *data,
++			       struct rpc_clnt *clnt,
++			       const struct rpc_call_ops *call_ops,
++				int how, int pnfs);
+ extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
++extern void nfs_mark_list_commit(struct list_head *head);
+ #ifdef CONFIG_MIGRATION
+ extern int nfs_migrate_page(struct address_space *,
+ 		struct page *, struct page *);
+diff -up linux-2.6.34.noarch/fs/nfs/Kconfig.orig linux-2.6.34.noarch/fs/nfs/Kconfig
+--- linux-2.6.34.noarch/fs/nfs/Kconfig.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Kconfig	2010-09-30 10:17:08.515988000 -0400
+@@ -79,10 +79,48 @@ config NFS_V4_1
+ 	depends on NFS_V4 && EXPERIMENTAL
+ 	help
+ 	  This option enables support for minor version 1 of the NFSv4 protocol
+-	  (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client.
++	  (RFC5661) including support for the parallel NFS (pNFS) features
++	  in the kernel's NFS client.
+ 
+ 	  Unless you're an NFS developer, say N.
+ 
++config PNFS_FILE_LAYOUT
++	tristate "NFS client support for the pNFS nfs-files layout (DEVELOPER ONLY)"
++	depends on NFS_FS && NFS_V4_1
++	default y
++	help
++	  This option enables support for the pNFS nfs-files layout.
++
++	  Unless you're an NFS developer, say N.
++
++config PNFS_OBJLAYOUT
++	tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
++	help
++	  Say M here if you want your pNFS client to support the Objects Layout Driver.
++	  Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
++	  upper level driver (SCSI_OSD_ULD).
++
++	  If unsure, say N.
++
++config PNFS_PANLAYOUT
++	tristate "Provide support for the Panasas OSD Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
++	depends on PNFS_OBJLAYOUT
++	help
++	  Say M or y here if you want your pNFS client to support the Panasas OSD Layout Driver.
++
++	  If unsure, say N.
++
++config PNFS_BLOCK
++	tristate "Provide a pNFS block client (EXPERIMENTAL)"
++	depends on NFS_FS && NFS_V4_1
++	select MD
++	select BLK_DEV_DM
++	help
++	  Say M or y here if you want your pNfs client to support the block protocol
++
++	  If unsure, say N.
++
+ config ROOT_NFS
+ 	bool "Root file system on NFS"
+ 	depends on NFS_FS=y && IP_PNP
+diff -up linux-2.6.34.noarch/fs/nfs/Makefile.orig linux-2.6.34.noarch/fs/nfs/Makefile
+--- linux-2.6.34.noarch/fs/nfs/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/Makefile	2010-09-30 10:17:08.520988000 -0400
+@@ -15,5 +15,12 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4x
+ 			   delegation.o idmap.o \
+ 			   callback.o callback_xdr.o callback_proc.o \
+ 			   nfs4namespace.o
++nfs-$(CONFIG_NFS_V4_1)	+= pnfs.o
+ nfs-$(CONFIG_SYSCTL) += sysctl.o
+ nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
++
++obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
++nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
++
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
++obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
+diff -up linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs3proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs3proc.c.orig	2010-09-30 10:15:17.806716000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs3proc.c	2010-09-30 10:17:08.643994000 -0400
+@@ -833,6 +833,7 @@ const struct nfs_rpc_ops nfs_v3_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs3_dir_inode_operations,
+ 	.file_inode_ops	= &nfs3_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs3_proc_get_root,
+ 	.getattr	= nfs3_proc_getattr,
+ 	.setattr	= nfs3_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c.orig	2010-09-30 10:17:08.652995000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.c	2010-09-30 10:17:08.654992000 -0400
+@@ -0,0 +1,768 @@
++/*
++ *  linux/fs/nfs/nfs4filelayout.c
++ *
++ *  Module for the pnfs nfs4 file layout driver.
++ *  Defines all I/O and Policy interface operations, plus code
++ *  to register itself with the pNFS client.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/time.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/unistd.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4_pnfs.h>
++
++#include "nfs4filelayout.h"
++#include "nfs4_fs.h"
++#include "internal.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Dean Hildebrand <dhildebz@eecs.umich.edu>");
++MODULE_DESCRIPTION("The NFSv4 file layout driver");
++
++/* Callback operations to the pNFS client */
++struct pnfs_client_operations *pnfs_callback_ops;
++
++/* Forward declaration */
++struct layoutdriver_io_operations filelayout_io_operations;
++
++int
++filelayout_initialize_mountpoint(struct nfs_server *nfss,
++				 const struct nfs_fh *mntfh)
++{
++	int status = nfs4_alloc_init_deviceid_cache(nfss->nfs_client,
++						nfs4_fl_free_deviceid_callback);
++	if (status) {
++		printk(KERN_WARNING "%s: deviceid cache could not be "
++			"initialized\n", __func__);
++		return status;
++	}
++	dprintk("%s: deviceid cache has been initialized successfully\n",
++		__func__);
++	return 0;
++}
++
++/* Uninitialize a mountpoint by destroying its device list */
++int
++filelayout_uninitialize_mountpoint(struct nfs_server *nfss)
++{
++	dprintk("--> %s\n", __func__);
++
++	if (nfss->pnfs_curr_ld && nfss->nfs_client->cl_devid_cache)
++		nfs4_put_deviceid_cache(nfss->nfs_client);
++	return 0;
++}
++
++/* This function is used by the layout driver to calculate the
++ * offset of the file on the dserver based on whether the
++ * layout type is STRIPE_DENSE or STRIPE_SPARSE
++ */
++static loff_t
++filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++
++	switch (flseg->stripe_type) {
++	case STRIPE_SPARSE:
++		return offset;
++
++	case STRIPE_DENSE:
++	{
++		u32 stripe_width;
++		u64 tmp, off;
++		u32 unit = flseg->stripe_unit;
++
++		stripe_width = unit * FILE_DSADDR(lseg)->stripe_count;
++		tmp = off = offset - flseg->pattern_offset;
++		do_div(tmp, stripe_width);
++		return tmp * unit + do_div(off, unit);
++	}
++	default:
++		BUG();
++	}
++
++	/* We should never get here... just to stop the gcc warning */
++	return 0;
++}
++
++/*
++ * Call ops for the async read/write cases
++ * In the case of dense layouts, the offset needs to be reset to its
++ * original value.
++ */
++static void filelayout_read_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	if (rdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			rdata->args.offset, rdata->fldata.orig_offset);
++		rdata->args.offset = rdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	rdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_read_release(void *data)
++{
++	struct nfs_read_data *rdata = (struct nfs_read_data *)data;
++
++	put_lseg(rdata->pdata.lseg);
++	rdata->pdata.lseg = NULL;
++	rdata->pdata.call_ops->rpc_release(data);
++}
++
++static void filelayout_write_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	if (wdata->fldata.orig_offset) {
++		dprintk("%s new off %llu orig offset %llu\n", __func__,
++			wdata->args.offset, wdata->fldata.orig_offset);
++		wdata->args.offset = wdata->fldata.orig_offset;
++	}
++
++	/* Note this may cause RPC to be resent */
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static void filelayout_write_release(void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	put_lseg(wdata->pdata.lseg);
++	wdata->pdata.lseg = NULL;
++	wdata->pdata.call_ops->rpc_release(data);
++}
++
++struct rpc_call_ops filelayout_read_call_ops = {
++	.rpc_call_prepare = nfs_read_prepare,
++	.rpc_call_done = filelayout_read_call_done,
++	.rpc_release = filelayout_read_release,
++};
++
++struct rpc_call_ops filelayout_write_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_write_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/* Perform sync or async reads.
++ *
++ * An optimization for the NFS file layout driver
++ * allows the original read/write data structs to be passed in the
++ * last argument.
++ *
++ * TODO: join with write_pagelist?
++ */
++static enum pnfs_try_status
++filelayout_read_pagelist(struct nfs_read_data *data, unsigned nr_pages)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	dprintk("--> %s ino %lu nr_pages %d pgbase %u req %Zu@%llu\n",
++		__func__, data->inode->i_ino, nr_pages,
++		data->args.pgbase, (size_t)data->args.count, offset);
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s USE DS:ip %x %s\n", __func__,
++		htonl(ds->ds_ip_addr), ds->r_addr);
++
++	/* just try the first data server for the index..*/
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++
++	/*
++	 * Now get the file offset on the dserver
++	 * Set the read offset to this offset, and
++	 * save the original offset in orig_offset
++	 * In the case of aync reads, the offset will be reset in the
++	 * call_ops->rpc_call_done() routine.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/* Perform an asynchronous read */
++	nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
++			  &filelayout_read_call_ops);
++
++	data->pdata.pnfs_error = 0;
++
++	return PNFS_ATTEMPTED;
++}
++
++/* Perform async writes. */
++static enum pnfs_try_status
++filelayout_write_pagelist(struct nfs_write_data *data, unsigned nr_pages, int sync)
++{
++	struct pnfs_layout_segment *lseg = data->pdata.lseg;
++	struct nfs4_pnfs_ds *ds;
++	loff_t offset = data->args.offset;
++	u32 idx;
++	struct nfs_fh *fh;
++
++	/* Retrieve the correct rpc_client for the byte range */
++	idx = nfs4_fl_calc_ds_index(lseg, offset);
++	ds = nfs4_fl_prepare_ds(lseg, idx);
++	if (!ds) {
++		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
++		return PNFS_NOT_ATTEMPTED;
++	}
++	dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu %s\n", __func__,
++		data->inode->i_ino, sync, (size_t) data->args.count, offset,
++		htonl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->r_addr);
++
++	data->fldata.ds_nfs_client = ds->ds_clp;
++	fh = nfs4_fl_select_ds_fh(lseg, offset);
++	if (fh)
++		data->args.fh = fh;
++	/*
++	 * Get the file offset on the dserver. Set the write offset to
++	 * this offset and save the original offset.
++	 */
++	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
++	data->fldata.orig_offset = offset;
++
++	/*
++	 * Perform an asynchronous write The offset will be reset in the
++	 * call_ops->rpc_call_done() routine
++	 */
++	nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
++			   &filelayout_write_call_ops, sync);
++
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++}
++
++/*
++ * Create a filelayout layout structure and return it.  The pNFS client
++ * will use the pnfs_layout_hdr type to refer to the layout for this
++ * inode from now on.
++ */
++static struct pnfs_layout_hdr *
++filelayout_alloc_layout(struct inode *inode)
++{
++	struct nfs4_filelayout *flp;
++
++	dprintk("NFS_FILELAYOUT: allocating layout\n");
++	flp =  kzalloc(sizeof(struct nfs4_filelayout), GFP_KERNEL);
++	return flp ? &flp->fl_layout : NULL;
++}
++
++/* Free a filelayout layout structure */
++static void
++filelayout_free_layout(struct pnfs_layout_hdr *lo)
++{
++	dprintk("NFS_FILELAYOUT: freeing layout\n");
++	kfree(FILE_LO(lo));
++}
++
++/*
++ * filelayout_check_layout()
++ *
++ * Make sure layout segment parameters are sane WRT the device.
++ *
++ * Notes:
++ * 1) current code insists that # stripe index = # data servers in ds_list
++ *    which is wrong.
++ * 2) pattern_offset is ignored and must == 0 which is wrong;
++ * 3) the pattern_offset needs to be a mutliple of the stripe unit.
++ * 4) stripe unit is multiple of page size
++ */
++
++static int
++filelayout_check_layout(struct pnfs_layout_hdr *lo,
++			struct pnfs_layout_segment *lseg)
++{
++	struct nfs4_filelayout_segment *fl = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	int status = -EINVAL;
++	struct nfs_server *nfss = NFS_SERVER(PNFS_INODE(lo));
++
++	dprintk("--> %s\n", __func__);
++	/* find in list or get from server and reference the deviceid */
++	dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, &fl->dev_id);
++	if (dsaddr == NULL) {
++		dsaddr = get_device_info(PNFS_INODE(lo), &fl->dev_id);
++		if (dsaddr == NULL) {
++			dprintk("%s NO device for dev_id %s\n",
++				__func__, deviceid_fmt(&fl->dev_id));
++			goto out;
++		}
++	}
++	if (fl->first_stripe_index < 0 ||
++	    fl->first_stripe_index > dsaddr->stripe_count) {
++		dprintk("%s Bad first_stripe_index %d\n",
++				__func__, fl->first_stripe_index);
++		goto out_put;
++	}
++
++	if (fl->pattern_offset != 0) {
++		dprintk("%s Unsupported no-zero pattern_offset %Ld\n",
++				__func__, fl->pattern_offset);
++		goto out_put;
++	}
++
++	if (fl->stripe_unit % PAGE_SIZE) {
++		dprintk("%s Stripe unit (%u) not page aligned\n",
++			__func__, fl->stripe_unit);
++		goto out_put;
++	}
++
++	/* XXX only support SPARSE packing. Don't support use MDS open fh */
++	if (!(fl->num_fh == 1 || fl->num_fh == dsaddr->ds_num)) {
++		dprintk("%s num_fh %u not equal to 1 or ds_num %u\n",
++			__func__, fl->num_fh, dsaddr->ds_num);
++		goto out_put;
++	}
++
++	if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
++		dprintk("%s Stripe unit (%u) not aligned with rsize %u "
++			"wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
++			nfss->wsize);
++	}
++
++	nfs4_set_layout_deviceid(lseg, &dsaddr->deviceid);
++
++	status = 0;
++out:
++	dprintk("--> %s returns %d\n", __func__, status);
++	return status;
++out_put:
++	nfs4_put_unset_layout_deviceid(lseg, &dsaddr->deviceid,
++				       nfs4_fl_free_deviceid_callback);
++	goto out;
++}
++
++static void _filelayout_free_lseg(struct pnfs_layout_segment *lseg);
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl);
++
++/* Decode layout and store in layoutid.  Overwrite any existing layout
++ * information for this file.
++ */
++static int
++filelayout_set_layout(struct nfs4_filelayout *flo,
++		      struct nfs4_filelayout_segment *fl,
++		      struct nfs4_layoutget_res *lgr)
++{
++	uint32_t *p = (uint32_t *)lgr->layout.buf;
++	uint32_t nfl_util;
++	int i;
++
++	dprintk("%s: set_layout_map Begin\n", __func__);
++
++	memcpy(&fl->dev_id, p, NFS4_PNFS_DEVICEID4_SIZE);
++	p += XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE);
++	nfl_util = be32_to_cpup(p++);
++	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
++		fl->commit_through_mds = 1;
++	if (nfl_util & NFL4_UFLG_DENSE)
++		fl->stripe_type = STRIPE_DENSE;
++	else
++		fl->stripe_type = STRIPE_SPARSE;
++	fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
++
++	if (!flo->stripe_unit)
++		flo->stripe_unit = fl->stripe_unit;
++	else if (flo->stripe_unit != fl->stripe_unit) {
++		printk(KERN_NOTICE "%s: updating strip_unit from %u to %u\n",
++			__func__, flo->stripe_unit, fl->stripe_unit);
++		flo->stripe_unit = fl->stripe_unit;
++	}
++
++	fl->first_stripe_index = be32_to_cpup(p++);
++	p = xdr_decode_hyper(p, &fl->pattern_offset);
++	fl->num_fh = be32_to_cpup(p++);
++
++	dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu dev_id %s\n",
++		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
++		fl->pattern_offset, deviceid_fmt(&fl->dev_id));
++
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE) {
++		fl->fh_array = vmalloc(fl->num_fh * sizeof(struct nfs_fh));
++		if (fl->fh_array)
++			memset(fl->fh_array, 0,
++				fl->num_fh * sizeof(struct nfs_fh));
++	} else {
++		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh),
++					GFP_KERNEL);
++       }
++	if (!fl->fh_array)
++		return -ENOMEM;
++
++	for (i = 0; i < fl->num_fh; i++) {
++		/* fh */
++		fl->fh_array[i].size = be32_to_cpup(p++);
++		if (sizeof(struct nfs_fh) < fl->fh_array[i].size) {
++			printk(KERN_ERR "Too big fh %d received %d\n",
++				i, fl->fh_array[i].size);
++			/* Layout is now invalid, pretend it doesn't exist */
++			filelayout_free_fh_array(fl);
++			fl->num_fh = 0;
++			break;
++		}
++		memcpy(fl->fh_array[i].data, p, fl->fh_array[i].size);
++		p += XDR_QUADLEN(fl->fh_array[i].size);
++		dprintk("DEBUG: %s: fh len %d\n", __func__,
++					fl->fh_array[i].size);
++	}
++
++	return 0;
++}
++
++static struct pnfs_layout_segment *
++filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
++		      struct nfs4_layoutget_res *lgr)
++{
++	struct nfs4_filelayout *flo = FILE_LO(layoutid);
++	struct pnfs_layout_segment *lseg;
++	int rc;
++
++	dprintk("--> %s\n", __func__);
++	lseg = kzalloc(sizeof(struct pnfs_layout_segment) +
++		       sizeof(struct nfs4_filelayout_segment), GFP_KERNEL);
++	if (!lseg)
++		return NULL;
++
++	rc = filelayout_set_layout(flo, LSEG_LD_DATA(lseg), lgr);
++
++	if (rc != 0 || filelayout_check_layout(layoutid, lseg)) {
++		_filelayout_free_lseg(lseg);
++		lseg = NULL;
++	}
++	return lseg;
++}
++
++static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
++{
++	if (fl->num_fh * sizeof(struct nfs_fh) > 2*PAGE_SIZE)
++		vfree(fl->fh_array);
++	else
++		kfree(fl->fh_array);
++
++	fl->fh_array = NULL;
++}
++
++static void
++_filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	filelayout_free_fh_array(LSEG_LD_DATA(lseg));
++	kfree(lseg);
++}
++
++static void
++filelayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	dprintk("--> %s\n", __func__);
++	nfs4_put_unset_layout_deviceid(lseg, lseg->deviceid,
++				   nfs4_fl_free_deviceid_callback);
++	_filelayout_free_lseg(lseg);
++}
++
++/* Allocate a new nfs_write_data struct and initialize */
++static struct nfs_write_data *
++filelayout_clone_write_data(struct nfs_write_data *old)
++{
++	static struct nfs_write_data *new;
++
++	new = nfs_commitdata_alloc();
++	if (!new)
++		goto out;
++	kref_init(&new->refcount);
++	new->parent      = old;
++	kref_get(&old->refcount);
++	new->inode       = old->inode;
++	new->cred        = old->cred;
++	new->args.offset = 0;
++	new->args.count  = 0;
++	new->res.count   = 0;
++	new->res.fattr   = &new->fattr;
++	nfs_fattr_init(&new->fattr);
++	new->res.verf    = &new->verf;
++	new->args.context = get_nfs_open_context(old->args.context);
++	new->pdata.lseg = NULL;
++	new->pdata.call_ops = old->pdata.call_ops;
++	new->pdata.how = old->pdata.how;
++out:
++	return new;
++}
++
++static void filelayout_commit_call_done(struct rpc_task *task, void *data)
++{
++	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
++
++	wdata->pdata.call_ops->rpc_call_done(task, data);
++}
++
++static struct rpc_call_ops filelayout_commit_call_ops = {
++	.rpc_call_prepare = nfs_write_prepare,
++	.rpc_call_done = filelayout_commit_call_done,
++	.rpc_release = filelayout_write_release,
++};
++
++/*
++ * Execute a COMMIT op to the MDS or to each data server on which a page
++ * in 'pages' exists.
++ * Invoke the pnfs_commit_complete callback.
++ */
++enum pnfs_try_status
++filelayout_commit(struct nfs_write_data *data, int sync)
++{
++	LIST_HEAD(head);
++	struct nfs_page *req;
++	loff_t file_offset = 0;
++	u16 idx, i;
++	struct list_head **ds_page_list = NULL;
++	u16 *indices_used;
++	int num_indices_seen = 0;
++	const struct rpc_call_ops *call_ops;
++	struct rpc_clnt *clnt;
++	struct nfs_write_data **clone_list = NULL;
++	struct nfs_write_data *dsdata;
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("%s data %p sync %d\n", __func__, data, sync);
++
++	/* Alloc room for both in one go */
++	ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
++			       (sizeof(u16) + sizeof(struct list_head *)),
++			       GFP_KERNEL);
++	if (!ds_page_list)
++		goto mem_error;
++	indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
++	/*
++	 * Sort pages based on which ds to send to.
++	 * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
++	 * Note we are assuming there is only a single lseg in play.
++	 * When that is not true, we could first sort on lseg, then
++	 * sort within each as we do here.
++	 */
++	while (!list_empty(&data->pages)) {
++		req = nfs_list_entry(data->pages.next);
++		nfs_list_remove_request(req);
++		if (!req->wb_lseg ||
++		    ((struct nfs4_filelayout_segment *)
++		     LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
++			idx = NFS4_PNFS_MAX_MULTI_CNT;
++		else {
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
++		}
++		if (ds_page_list[idx]) {
++			/* Already seen this idx */
++			list_add(&req->wb_list, ds_page_list[idx]);
++		} else {
++			/* New idx not seen so far */
++			list_add_tail(&req->wb_list, &head);
++			indices_used[num_indices_seen++] = idx;
++		}
++		ds_page_list[idx] = &req->wb_list;
++	}
++	/* Once created, clone must be released via call_op */
++	clone_list = kzalloc(num_indices_seen *
++			     sizeof(struct nfs_write_data *), GFP_KERNEL);
++	if (!clone_list)
++		goto mem_error;
++	for (i = 0; i < num_indices_seen - 1; i++) {
++		clone_list[i] = filelayout_clone_write_data(data);
++		if (!clone_list[i])
++			goto mem_error;
++	}
++	clone_list[i] = data;
++	/*
++	 * Now send off the RPCs to each ds.  Note that it is important
++	 * that any RPC to the MDS be sent last (or at least after all
++	 * clones have been made.)
++	 */
++	for (i = 0; i < num_indices_seen; i++) {
++		dsdata = clone_list[i];
++		idx = indices_used[i];
++		list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
++		if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
++			call_ops = data->pdata.call_ops;;
++			clnt = NFS_CLIENT(dsdata->inode);
++			ds = NULL;
++		} else {
++			struct nfs_fh *fh;
++
++			call_ops = &filelayout_commit_call_ops;
++			req = nfs_list_entry(dsdata->pages.next);
++			ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
++			if (!ds) {
++				/* Trigger retry of this chunk through MDS */
++				dsdata->task.tk_status = -EIO;
++				data->pdata.call_ops->rpc_release(dsdata);
++				continue;
++			}
++			clnt = ds->ds_clp->cl_rpcclient;
++			dsdata->fldata.ds_nfs_client = ds->ds_clp;
++			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
++			fh = nfs4_fl_select_ds_fh(req->wb_lseg, file_offset);
++			if (fh)
++				dsdata->args.fh = fh;
++		}
++		dprintk("%s: Initiating commit: %llu USE DS:\n",
++			__func__, file_offset);
++		print_ds(ds);
++
++		/* Send COMMIT to data server */
++		nfs_initiate_commit(dsdata, clnt, call_ops, sync);
++	}
++	kfree(clone_list);
++	kfree(ds_page_list);
++	data->pdata.pnfs_error = 0;
++	return PNFS_ATTEMPTED;
++
++ mem_error:
++	if (clone_list) {
++		for (i = 0; i < num_indices_seen - 1; i++) {
++			if (!clone_list[i])
++				break;
++			data->pdata.call_ops->rpc_release(clone_list[i]);
++		}
++		kfree(clone_list);
++	}
++	kfree(ds_page_list);
++	/* One of these will be empty, but doesn't hurt to do both */
++	nfs_mark_list_commit(&head);
++	nfs_mark_list_commit(&data->pages);
++	data->pdata.call_ops->rpc_release(data);
++	return PNFS_ATTEMPTED;
++}
++
++/* Return the stripesize for the specified file */
++ssize_t
++filelayout_get_stripesize(struct pnfs_layout_hdr *lo)
++{
++	struct nfs4_filelayout *flo = FILE_LO(lo);
++
++	return flo->stripe_unit;
++}
++
++/*
++ * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
++ *
++ * return 1 :  coalesce page
++ * return 0 :  don't coalesce page
++ */
++int
++filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
++		   struct nfs_page *req)
++{
++	u64 p_stripe, r_stripe;
++
++	if (pgio->pg_boundary == 0)
++		return 1;
++	p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
++	r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
++
++	do_div(p_stripe, pgio->pg_boundary);
++	do_div(r_stripe, pgio->pg_boundary);
++
++	return (p_stripe == r_stripe);
++}
++
++struct layoutdriver_io_operations filelayout_io_operations = {
++	.commit                  = filelayout_commit,
++	.read_pagelist           = filelayout_read_pagelist,
++	.write_pagelist          = filelayout_write_pagelist,
++	.alloc_layout            = filelayout_alloc_layout,
++	.free_layout             = filelayout_free_layout,
++	.alloc_lseg              = filelayout_alloc_lseg,
++	.free_lseg               = filelayout_free_lseg,
++	.initialize_mountpoint   = filelayout_initialize_mountpoint,
++	.uninitialize_mountpoint = filelayout_uninitialize_mountpoint,
++};
++
++struct layoutdriver_policy_operations filelayout_policy_operations = {
++	.flags                 = PNFS_USE_RPC_CODE,
++	.get_stripesize        = filelayout_get_stripesize,
++	.pg_test               = filelayout_pg_test,
++};
++
++struct pnfs_layoutdriver_type filelayout_type = {
++	.id = LAYOUT_NFSV4_1_FILES,
++	.name = "LAYOUT_NFSV4_1_FILES",
++	.ld_io_ops = &filelayout_io_operations,
++	.ld_policy_ops = &filelayout_policy_operations,
++};
++
++static int __init nfs4filelayout_init(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
++	       __func__);
++
++	/*
++	 * Need to register file_operations struct with global list to indicate
++	 * that NFS4 file layout is a possible pNFS I/O module
++	 */
++	pnfs_callback_ops = pnfs_register_layoutdriver(&filelayout_type);
++
++	return 0;
++}
++
++static void __exit nfs4filelayout_exit(void)
++{
++	printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
++	       __func__);
++
++	/* Unregister NFS4 file layout driver with pNFS client*/
++	pnfs_unregister_layoutdriver(&filelayout_type);
++}
++
++module_init(nfs4filelayout_init);
++module_exit(nfs4filelayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c.orig	2010-09-30 10:17:08.661995000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayoutdev.c	2010-09-30 10:17:08.663993000 -0400
+@@ -0,0 +1,635 @@
++/*
++ *  linux/fs/nfs/nfs4filelayoutdev.c
++ *
++ *  Device operations for the pnfs nfs4 file layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *  Garth Goodson   <Garth.Goodson@netapp.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/hash.h>
++
++#include <linux/nfs4.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_xdr.h>
++
++#include <asm/div64.h>
++
++#include <linux/utsname.h>
++#include <linux/vmalloc.h>
++#include <linux/nfs4_pnfs.h>
++#include "nfs4filelayout.h"
++#include "internal.h"
++#include "nfs4_fs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS_LD
++
++DEFINE_SPINLOCK(nfs4_ds_cache_lock);
++static LIST_HEAD(nfs4_data_server_cache);
++
++void
++print_ds(struct nfs4_pnfs_ds *ds)
++{
++	if (ds == NULL) {
++		dprintk("%s NULL device \n", __func__);
++		return;
++	}
++	dprintk("        ip_addr %x\n", ntohl(ds->ds_ip_addr));
++	dprintk("        port %hu\n", ntohs(ds->ds_port));
++	dprintk("        client %p\n", ds->ds_clp);
++	dprintk("        ref count %d\n", atomic_read(&ds->ds_count));
++	if (ds->ds_clp)
++		dprintk("        cl_exchange_flags %x\n",
++					    ds->ds_clp->cl_exchange_flags);
++	dprintk("        ip:port %s\n", ds->r_addr);
++}
++
++void
++print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	int i;
++
++	dprintk("%s dsaddr->ds_num %d\n", __func__,
++		dsaddr->ds_num);
++	for (i = 0; i < dsaddr->ds_num; i++)
++		print_ds(dsaddr->ds_list[i]);
++}
++
++/* Debugging function assuming a 64bit major/minor split of the deviceid */
++char *
++deviceid_fmt(const struct pnfs_deviceid *dev_id)
++{
++	static char buf[17];
++	uint32_t *p = (uint32_t *)dev_id->data;
++	uint64_t major, minor;
++
++	p = xdr_decode_hyper(p, &major);
++	p = xdr_decode_hyper(p, &minor);
++
++	sprintf(buf, "%08llu %08llu", major, minor);
++	return buf;
++}
++
++/* nfs4_ds_cache_lock is held */
++static inline struct nfs4_pnfs_ds *
++_data_server_lookup(u32 ip_addr, u32 port)
++{
++	struct nfs4_pnfs_ds *ds;
++
++	dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
++			ntohl(ip_addr), ntohs(port));
++
++	list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
++		if (ds->ds_ip_addr == ip_addr &&
++		    ds->ds_port == port) {
++			return ds;
++		}
++	}
++	return NULL;
++}
++
++/* Create an rpc to the data server defined in 'dev_list' */
++static int
++nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
++{
++	struct nfs_server	*tmp;
++	struct sockaddr_in	sin;
++	struct rpc_clnt 	*mds_clnt = mds_srv->client;
++	struct nfs_client	*clp = mds_srv->nfs_client;
++	struct sockaddr		*mds_addr;
++	int err = 0;
++
++	dprintk("--> %s ip:port %s au_flavor %d\n", __func__,
++		ds->r_addr, mds_clnt->cl_auth->au_flavor);
++
++	sin.sin_family = AF_INET;
++	sin.sin_addr.s_addr = ds->ds_ip_addr;
++	sin.sin_port = ds->ds_port;
++
++	/*
++	 * If this DS is also the MDS, use the MDS session only if the
++	 * MDS exchangeid flags show the EXCHGID4_FLAG_USE_PNFS_DS pNFS role.
++	 */
++	mds_addr = (struct sockaddr *)&clp->cl_addr;
++	if (nfs_sockaddr_cmp((struct sockaddr *)&sin, mds_addr)) {
++		if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++			printk(KERN_INFO "ip:port %s is not a pNFS Data "
++				"Server\n", ds->r_addr);
++			err = -ENODEV;
++		} else {
++			atomic_inc(&clp->cl_count);
++			ds->ds_clp = clp;
++			dprintk("%s Using MDS Session for DS\n", __func__);
++		}
++		goto out;
++	}
++
++	/* Temporay server for nfs4_set_client */
++	tmp = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
++	if (!tmp)
++		goto out;
++
++	/*
++	 * Set a retrans, timeout interval, and authflavor equual to the MDS
++	 * values. Use the MDS nfs_client cl_ipaddr field so as to use the
++	 * same co_ownerid as the MDS.
++	 */
++	err = nfs4_set_client(tmp,
++			      mds_srv->nfs_client->cl_hostname,
++			      (struct sockaddr *)&sin,
++			      sizeof(struct sockaddr),
++			      mds_srv->nfs_client->cl_ipaddr,
++			      mds_clnt->cl_auth->au_flavor,
++			      IPPROTO_TCP,
++			      mds_clnt->cl_xprt->timeout,
++			      1 /* minorversion */);
++	if (err < 0)
++		goto out_free;
++
++	clp = tmp->nfs_client;
++
++	/* Ask for only the EXCHGID4_FLAG_USE_PNFS_DS pNFS role */
++	dprintk("%s EXCHANGE_ID for clp %p\n", __func__, clp);
++	clp->cl_exchange_flags = EXCHGID4_FLAG_USE_PNFS_DS;
++
++	err = nfs4_recover_expired_lease(clp);
++	if (!err)
++		err = nfs4_check_client_ready(clp);
++	if (err)
++		goto out_put;
++
++	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
++		printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
++			ds->r_addr);
++		err = -ENODEV;
++		goto out_put;
++	}
++	/*
++	 * Mask the (possibly) returned EXCHGID4_FLAG_USE_PNFS_MDS pNFS role
++	 * The is_ds_only_session depends on this.
++	 */
++	clp->cl_exchange_flags &= ~EXCHGID4_FLAG_USE_PNFS_MDS;
++	/*
++	 * Set DS lease equal to the MDS lease, renewal is scheduled in
++	 * create_session
++	 */
++	spin_lock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
++	spin_unlock(&mds_srv->nfs_client->cl_lock);
++	clp->cl_last_renewal = jiffies;
++
++	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
++	ds->ds_clp = clp;
++
++	dprintk("%s: ip=%x, port=%hu, rpcclient %p\n", __func__,
++				ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
++				clp->cl_rpcclient);
++out_free:
++	kfree(tmp);
++out:
++	dprintk("%s Returns %d\n", __func__, err);
++	return err;
++out_put:
++	nfs_put_client(clp);
++	goto out_free;
++}
++
++static void
++destroy_ds(struct nfs4_pnfs_ds *ds)
++{
++	dprintk("--> %s\n", __func__);
++	print_ds(ds);
++
++	if (ds->ds_clp)
++		nfs_put_client(ds->ds_clp);
++	kfree(ds);
++}
++
++static void
++nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
++{
++	struct nfs4_pnfs_ds *ds;
++	int i;
++
++	dprintk("%s: device id=%s\n", __func__,
++		deviceid_fmt(&dsaddr->deviceid.de_id));
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		ds = dsaddr->ds_list[i];
++		if (ds != NULL) {
++			if (atomic_dec_and_lock(&ds->ds_count,
++						&nfs4_ds_cache_lock)) {
++				list_del_init(&ds->ds_node);
++				spin_unlock(&nfs4_ds_cache_lock);
++				destroy_ds(ds);
++			}
++		}
++	}
++	kfree(dsaddr->stripe_indices);
++	kfree(dsaddr);
++}
++
++void
++nfs4_fl_free_deviceid_callback(struct kref *kref)
++{
++	struct nfs4_deviceid *device =
++		container_of(kref, struct nfs4_deviceid, de_kref);
++	struct nfs4_file_layout_dsaddr *dsaddr =
++		container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
++
++	nfs4_fl_free_deviceid(dsaddr);
++}
++
++static void
++nfs4_pnfs_ds_add(struct inode *inode, struct nfs4_pnfs_ds **dsp,
++		 u32 ip_addr, u32 port, char *r_addr, int len)
++{
++	struct nfs4_pnfs_ds *tmp_ds, *ds;
++
++	*dsp = NULL;
++
++	ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
++	if (!ds)
++		return;
++
++	spin_lock(&nfs4_ds_cache_lock);
++	tmp_ds = _data_server_lookup(ip_addr, port);
++	if (tmp_ds == NULL) {
++		ds->ds_ip_addr = ip_addr;
++		ds->ds_port = port;
++		strncpy(ds->r_addr, r_addr, len);
++		atomic_set(&ds->ds_count, 1);
++		INIT_LIST_HEAD(&ds->ds_node);
++		ds->ds_clp = NULL;
++		list_add(&ds->ds_node, &nfs4_data_server_cache);
++		*dsp = ds;
++		dprintk("%s add new data server ip 0x%x\n", __func__,
++				ds->ds_ip_addr);
++		spin_unlock(&nfs4_ds_cache_lock);
++	} else {
++		atomic_inc(&tmp_ds->ds_count);
++		*dsp = tmp_ds;
++		dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
++				__func__, tmp_ds->ds_ip_addr,
++				atomic_read(&tmp_ds->ds_count));
++		spin_unlock(&nfs4_ds_cache_lock);
++		kfree(ds);
++	}
++}
++
++static struct nfs4_pnfs_ds *
++decode_and_add_ds(uint32_t **pp, struct inode *inode)
++{
++	struct nfs4_pnfs_ds *ds = NULL;
++	char r_addr[29]; /* max size of ip/port string */
++	int len;
++	u32 ip_addr, port;
++	int tmp[6];
++	uint32_t *p = *pp;
++
++	dprintk("%s enter\n", __func__);
++	/* check and skip r_netid */
++	len = be32_to_cpup(p++);
++	/* "tcp" */
++	if (len != 3) {
++		printk("%s: ERROR: non TCP r_netid len %d\n",
++			__func__, len);
++		goto out_err;
++	}
++	/*
++	 * Read the bytes into a temporary buffer
++	 * XXX: should probably sanity check them
++	 */
++	tmp[0] = be32_to_cpup(p++);
++
++	len = be32_to_cpup(p++);
++	if (len >= sizeof(r_addr)) {
++		printk("%s: ERROR: Device ip/port too long (%d)\n",
++			__func__, len);
++		goto out_err;
++	}
++	memcpy(r_addr, p, len);
++	p += XDR_QUADLEN(len);
++	*pp = p;
++	r_addr[len] = '\0';
++	sscanf(r_addr, "%d.%d.%d.%d.%d.%d", &tmp[0], &tmp[1],
++	       &tmp[2], &tmp[3], &tmp[4], &tmp[5]);
++	ip_addr = htonl((tmp[0]<<24) | (tmp[1]<<16) | (tmp[2]<<8) | (tmp[3]));
++	port = htons((tmp[4] << 8) | (tmp[5]));
++
++	nfs4_pnfs_ds_add(inode, &ds, ip_addr, port, r_addr, len);
++
++	dprintk("%s: addr:port string = %s\n", __func__, r_addr);
++	return ds;
++out_err:
++	dprintk("%s returned NULL\n", __func__);
++	return NULL;
++}
++
++/* Decode opaque device data and return the result */
++static struct nfs4_file_layout_dsaddr*
++decode_device(struct inode *ino, struct pnfs_device *pdev)
++{
++	int i, dummy;
++	u32 cnt, num;
++	u8 *indexp;
++	uint32_t *p = (u32 *)pdev->area, *indicesp;
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	/* Get the stripe count (number of stripe index) */
++	cnt = be32_to_cpup(p++);
++	dprintk("%s stripe count  %d\n", __func__, cnt);
++	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
++		printk(KERN_WARNING "%s: stripe count %d greater than "
++		       "supported maximum %d\n", __func__,
++			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
++		goto out_err;
++	}
++
++	/* Check the multipath list count */
++	indicesp = p;
++	p += XDR_QUADLEN(cnt << 2);
++	num = be32_to_cpup(p++);
++	dprintk("%s ds_num %u\n", __func__, num);
++	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
++		printk(KERN_WARNING "%s: multipath count %d greater than "
++			"supported maximum %d\n", __func__,
++			num, NFS4_PNFS_MAX_MULTI_CNT);
++		goto out_err;
++	}
++	dsaddr = kzalloc(sizeof(*dsaddr) +
++			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
++			GFP_KERNEL);
++	if (!dsaddr)
++		goto out_err;
++
++	dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
++	if (!dsaddr->stripe_indices)
++		goto out_err_free;
++
++	dsaddr->stripe_count = cnt;
++	dsaddr->ds_num = num;
++
++	memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id,
++	       NFS4_PNFS_DEVICEID4_SIZE);
++
++	/* Go back an read stripe indices */
++	p = indicesp;
++	indexp = &dsaddr->stripe_indices[0];
++	for (i = 0; i < dsaddr->stripe_count; i++) {
++		dummy = be32_to_cpup(p++);
++		*indexp = dummy; /* bound by NFS4_PNFS_MAX_MULTI_CNT */
++		indexp++;
++	}
++	/* Skip already read multipath list count */
++	p++;
++
++	for (i = 0; i < dsaddr->ds_num; i++) {
++		int j;
++
++		dummy = be32_to_cpup(p++); /* multipath count */
++		if (dummy > 1) {
++			printk(KERN_WARNING
++			       "%s: Multipath count %d not supported, "
++			       "skipping all greater than 1\n", __func__,
++				dummy);
++		}
++		for (j = 0; j < dummy; j++) {
++			if (j == 0) {
++				dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
++				if (dsaddr->ds_list[i] == NULL)
++					goto out_err_free;
++			} else {
++				u32 len;
++				/* skip extra multipath */
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				len = be32_to_cpup(p++);
++				p += XDR_QUADLEN(len);
++				continue;
++			}
++		}
++	}
++	nfs4_init_deviceid_node(&dsaddr->deviceid);
++
++	return dsaddr;
++
++out_err_free:
++	nfs4_fl_free_deviceid(dsaddr);
++out_err:
++	dprintk("%s ERROR: returning NULL\n", __func__);
++	return NULL;
++}
++
++/*
++ * Decode the opaque device specified in 'dev'
++ * and add it to the list of available devices.
++ * If the deviceid is already cached, nfs4_add_deviceid will return
++ * a pointer to the cached struct and throw away the new.
++ */
++static struct nfs4_file_layout_dsaddr*
++decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
++{
++	struct nfs4_file_layout_dsaddr *dsaddr;
++	struct nfs4_deviceid *d;
++
++	dsaddr = decode_device(inode, dev);
++	if (!dsaddr) {
++		printk(KERN_WARNING "%s: Could not decode or add device\n",
++			__func__);
++		return NULL;
++	}
++
++	d = nfs4_add_get_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
++			      &dsaddr->deviceid);
++
++	return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Retrieve the information for dev_id, add it to the list
++ * of available devices, and return it.
++ */
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id)
++{
++	struct pnfs_device *pdev = NULL;
++	u32 max_resp_sz;
++	int max_pages;
++	struct page **pages = NULL;
++	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
++	int rc, i;
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	/*
++	 * Use the session max response size as the basis for setting
++	 * GETDEVICEINFO's maxcount
++	 */
++	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
++	max_pages = max_resp_sz >> PAGE_SHIFT;
++	dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
++		__func__, inode, max_resp_sz, max_pages);
++
++	pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
++	if (pdev == NULL)
++		return NULL;
++
++	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
++	if (pages == NULL) {
++		kfree(pdev);
++		return NULL;
++	}
++	for (i = 0; i < max_pages; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			goto out_free;
++	}
++
++	/* set pdev->area */
++	pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
++	if (!pdev->area)
++		goto out_free;
++
++	memcpy(&pdev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE);
++	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
++	pdev->pages = pages;
++	pdev->pgbase = 0;
++	pdev->pglen = PAGE_SIZE * max_pages;
++	pdev->mincount = 0;
++	/* TODO: Update types when CB_NOTIFY_DEVICEID is available */
++	pdev->dev_notify_types = 0;
++
++	rc = pnfs_callback_ops->nfs_getdeviceinfo(server, pdev);
++	dprintk("%s getdevice info returns %d\n", __func__, rc);
++	if (rc)
++		goto out_free;
++
++	/*
++	 * Found new device, need to decode it and then add it to the
++	 * list of known devices for this mountpoint.
++	 */
++	dsaddr = decode_and_add_device(inode, pdev);
++out_free:
++	if (pdev->area != NULL)
++		vunmap(pdev->area);
++	for (i = 0; i < max_pages; i++)
++		__free_page(pages[i]);
++	kfree(pages);
++	kfree(pdev);
++	dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
++	return dsaddr;
++}
++
++struct nfs4_file_layout_dsaddr *
++nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++
++	d = nfs4_find_get_deviceid(clp->cl_devid_cache, id);
++	dprintk("%s device id (%s) nfs4_deviceid %p\n", __func__,
++		deviceid_fmt(id), d);
++	return (d == NULL) ? NULL :
++		container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
++}
++
++/*
++ * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
++ * Then: ((res + fsi) % dsaddr->stripe_count)
++ */
++static inline u32
++_nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u64 tmp;
++
++	tmp = offset - flseg->pattern_offset;
++	do_div(tmp, flseg->stripe_unit);
++	tmp += flseg->first_stripe_index;
++	return do_div(tmp, FILE_DSADDR(lseg)->stripe_count);
++}
++
++u32
++nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	u32 j;
++
++	j = _nfs4_fl_calc_j_index(lseg, offset);
++	return FILE_DSADDR(lseg)->stripe_indices[j];
++}
++
++struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	u32 i;
++
++	if (flseg->stripe_type == STRIPE_SPARSE) {
++		if (flseg->num_fh == 1)
++			i = 0;
++		else if (flseg->num_fh == 0)
++			return NULL;
++		else
++			i = nfs4_fl_calc_ds_index(lseg, offset);
++	} else
++		i = _nfs4_fl_calc_j_index(lseg, offset);
++	return &flseg->fh_array[i];
++}
++
++struct nfs4_pnfs_ds *
++nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
++{
++	struct nfs4_filelayout_segment *flseg = LSEG_LD_DATA(lseg);
++	struct nfs4_file_layout_dsaddr *dsaddr;
++
++	dsaddr = FILE_DSADDR(lseg);
++	if (dsaddr->ds_list[ds_idx] == NULL) {
++		printk(KERN_ERR "%s: No data server for device id (%s)!!\n",
++			__func__, deviceid_fmt(&flseg->dev_id));
++		return NULL;
++	}
++
++	if (!dsaddr->ds_list[ds_idx]->ds_clp) {
++		int err;
++
++		err = nfs4_pnfs_ds_create(PNFS_NFS_SERVER(lseg->layout),
++					  dsaddr->ds_list[ds_idx]);
++		if (err) {
++			printk(KERN_ERR "%s nfs4_pnfs_ds_create error %d\n",
++			       __func__, err);
++			return NULL;
++		}
++	}
++	dprintk("%s: dev_id=%s, ds_idx=%u\n",
++		__func__, deviceid_fmt(&flseg->dev_id), ds_idx);
++
++	return dsaddr->ds_list[ds_idx];
++}
++
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h.orig	2010-09-30 10:17:08.657991000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4filelayout.h	2010-09-30 10:17:08.658997000 -0400
+@@ -0,0 +1,96 @@
++/*
++ *  pnfs_nfs4filelayout.h
++ *
++ *  NFSv4 file layout driver data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_NFS4FILELAYOUT_H
++#define FS_NFS_NFS4FILELAYOUT_H
++
++#include <linux/kref.h>
++#include <linux/nfs4_pnfs.h>
++
++#define NFS4_PNFS_DEV_HASH_BITS 5
++#define NFS4_PNFS_DEV_HASH_SIZE (1 << NFS4_PNFS_DEV_HASH_BITS)
++#define NFS4_PNFS_DEV_HASH_MASK (NFS4_PNFS_DEV_HASH_SIZE - 1)
++
++#define NFS4_PNFS_MAX_STRIPE_CNT 4096
++#define NFS4_PNFS_MAX_MULTI_CNT  64 /* 256 fit into a u8 stripe_index */
++#define NFS4_PNFS_MAX_MULTI_DS   2
++
++#define FILE_DSADDR(lseg) (container_of(lseg->deviceid, \
++					struct nfs4_file_layout_dsaddr, \
++					deviceid))
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++/* Individual ip address */
++struct nfs4_pnfs_ds {
++	struct list_head	ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
++	u32 			ds_ip_addr;
++	u32 			ds_port;
++	struct nfs_client	*ds_clp;
++	atomic_t		ds_count;
++	char r_addr[29];
++};
++
++struct nfs4_file_layout_dsaddr {
++	struct nfs4_deviceid	deviceid;
++	u32 			stripe_count;
++	u8			*stripe_indices;
++	u32			ds_num;
++	struct nfs4_pnfs_ds	*ds_list[1];
++};
++
++struct nfs4_pnfs_dev_hlist {
++	rwlock_t		dev_lock;
++	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH_SIZE];
++};
++
++struct nfs4_filelayout_segment {
++	u32 stripe_type;
++	u32 commit_through_mds;
++	u32 stripe_unit;
++	u32 first_stripe_index;
++	u64 pattern_offset;
++	struct pnfs_deviceid dev_id;
++	unsigned int num_fh;
++	struct nfs_fh *fh_array;
++};
++
++struct nfs4_filelayout {
++	struct pnfs_layout_hdr fl_layout;
++	u32 stripe_unit;
++};
++
++extern struct nfs_fh *
++nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset);
++
++static inline struct nfs4_filelayout *
++FILE_LO(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct nfs4_filelayout, fl_layout);
++}
++
++extern struct pnfs_client_operations *pnfs_callback_ops;
++
++extern void nfs4_fl_free_deviceid_callback(struct kref *);
++extern void print_ds(struct nfs4_pnfs_ds *ds);
++char *deviceid_fmt(const struct pnfs_deviceid *dev_id);
++u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, loff_t offset);
++struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
++					u32 ds_idx);
++extern struct nfs4_file_layout_dsaddr *
++nfs4_fl_find_get_deviceid(struct nfs_client *, struct pnfs_deviceid *dev_id);
++struct nfs4_file_layout_dsaddr *
++get_device_info(struct inode *inode, struct pnfs_deviceid *dev_id);
++
++#endif /* FS_NFS_NFS4FILELAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig linux-2.6.34.noarch/fs/nfs/nfs4_fs.h
+--- linux-2.6.34.noarch/fs/nfs/nfs4_fs.h.orig	2010-09-30 10:15:17.839715000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4_fs.h	2010-09-30 10:17:08.649992000 -0400
+@@ -45,8 +45,28 @@ enum nfs4_client_state {
+ 	NFS4CLNT_RECLAIM_NOGRACE,
+ 	NFS4CLNT_DELEGRETURN,
+ 	NFS4CLNT_SESSION_RESET,
+-	NFS4CLNT_SESSION_DRAINING,
+ 	NFS4CLNT_RECALL_SLOT,
++	NFS4CLNT_LAYOUT_RECALL,
++};
++
++enum nfs4_session_state {
++	NFS4_SESSION_INITING,
++	NFS4_SESSION_DRAINING,
++};
++
++struct nfs4_minor_version_ops {
++	u32	minor_version;
++
++	int	(*call_sync)(struct nfs_server *server,
++			struct rpc_message *msg,
++			struct nfs4_sequence_args *args,
++			struct nfs4_sequence_res *res,
++			int cache_reply);
++	int	(*validate_stateid)(struct nfs_delegation *,
++			const nfs4_stateid *);
++	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
++	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
++	const struct nfs4_state_maintenance_ops *state_renewal_ops;
+ };
+ 
+ /*
+@@ -89,7 +109,6 @@ struct nfs_unique_id {
+  */
+ struct nfs4_state_owner {
+ 	struct nfs_unique_id so_owner_id;
+-	struct nfs_client    *so_client;
+ 	struct nfs_server    *so_server;
+ 	struct rb_node	     so_client_node;
+ 
+@@ -99,7 +118,6 @@ struct nfs4_state_owner {
+ 	atomic_t	     so_count;
+ 	unsigned long	     so_flags;
+ 	struct list_head     so_states;
+-	struct list_head     so_delegations;
+ 	struct nfs_seqid_counter so_seqid;
+ 	struct rpc_sequence  so_sequence;
+ };
+@@ -125,10 +143,20 @@ enum {
+  * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
+  */
+ 
++struct nfs4_lock_owner {
++	unsigned int lo_type;
++#define NFS4_ANY_LOCK_TYPE	(0U)
++#define NFS4_FLOCK_LOCK_TYPE	(1U << 0)
++#define NFS4_POSIX_LOCK_TYPE	(1U << 1)
++	union {
++		fl_owner_t posix_owner;
++		pid_t flock_owner;
++	} lo_u;
++};
++
+ struct nfs4_lock_state {
+ 	struct list_head	ls_locks;	/* Other lock stateids */
+ 	struct nfs4_state *	ls_state;	/* Pointer to open state */
+-	fl_owner_t		ls_owner;	/* POSIX lock owner */
+ #define NFS_LOCK_INITIALIZED 1
+ 	int			ls_flags;
+ 	struct nfs_seqid_counter	ls_seqid;
+@@ -136,6 +164,7 @@ struct nfs4_lock_state {
+ 	struct nfs_unique_id	ls_id;
+ 	nfs4_stateid		ls_stateid;
+ 	atomic_t		ls_count;
++	struct nfs4_lock_owner	ls_owner;
+ };
+ 
+ /* bits for nfs4_state->flags */
+@@ -219,22 +248,34 @@ extern int nfs4_open_revalidate(struct i
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
+ 		struct nfs4_fs_locations *fs_locations, struct page *page);
++extern void nfs4_release_lockowner(const struct nfs4_lock_state *);
+ 
+-extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[];
+-extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[];
+ #if defined(CONFIG_NFS_V4_1)
+-extern int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return server->nfs_client->cl_session;
++}
++
++extern int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task);
+ extern void nfs4_destroy_session(struct nfs4_session *session);
+ extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
++extern int nfs4_proc_exchange_id(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_create_session(struct nfs_client *);
+ extern int nfs4_proc_destroy_session(struct nfs4_session *);
+ extern int nfs4_init_session(struct nfs_server *server);
+ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
+ 		struct nfs_fsinfo *fsinfo);
+ #else /* CONFIG_NFS_v4_1 */
+-static inline int nfs4_setup_sequence(struct nfs_client *clp,
++static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
++{
++	return NULL;
++}
++
++static inline int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
+ 		int cache_reply, struct rpc_task *task)
+ {
+@@ -247,12 +288,12 @@ static inline int nfs4_init_session(stru
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+-extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
++extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
+ 
+ extern const u32 nfs4_fattr_bitmap[2];
+ extern const u32 nfs4_statfs_bitmap[2];
+ extern const u32 nfs4_pathconf_bitmap[2];
+-extern const u32 nfs4_fsinfo_bitmap[2];
++extern const u32 nfs4_fsinfo_bitmap[3];
+ extern const u32 nfs4_fs_locations_bitmap[2];
+ 
+ /* nfs4renewd.c */
+@@ -284,7 +325,7 @@ extern void nfs41_handle_sequence_flag_e
+ extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
+-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
++extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
+ 
+ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
+ extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+@@ -293,6 +334,7 @@ extern void nfs_increment_lock_seqid(int
+ extern void nfs_release_seqid(struct nfs_seqid *seqid);
+ extern void nfs_free_seqid(struct nfs_seqid *seqid);
+ 
++/* write.c */
+ extern const nfs4_stateid zero_stateid;
+ 
+ /* nfs4xdr.c */
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig linux-2.6.34.noarch/fs/nfs/nfs4proc.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4proc.c.orig	2010-09-30 10:15:17.855715000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4proc.c	2010-09-30 10:17:08.673994000 -0400
+@@ -49,12 +49,14 @@
+ #include <linux/mount.h>
+ #include <linux/module.h>
+ #include <linux/sunrpc/bc_xprt.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "delegation.h"
+ #include "internal.h"
+ #include "iostat.h"
+ #include "callback.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PROC
+ 
+@@ -67,7 +69,7 @@ struct nfs4_opendata;
+ static int _nfs4_proc_open(struct nfs4_opendata *data);
+ static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
+ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
++static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, struct nfs_client *);
+ static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
+@@ -125,11 +127,16 @@ const u32 nfs4_pathconf_bitmap[2] = {
+ 	0
+ };
+ 
+-const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
++const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
+ 			| FATTR4_WORD0_MAXREAD
+ 			| FATTR4_WORD0_MAXWRITE
+ 			| FATTR4_WORD0_LEASE_TIME,
++#ifdef CONFIG_NFS_V4_1
++			FATTR4_WORD1_FS_LAYOUT_TYPES,
++			FATTR4_WORD2_LAYOUT_BLKSIZE
++#else /* CONFIG_NFS_V4_1 */
+ 			0
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ const u32 nfs4_fs_locations_bitmap[2] = {
+@@ -356,7 +363,7 @@ static void nfs41_check_drain_session_co
+ {
+ 	struct rpc_task *task;
+ 
+-	if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
++	if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
+ 		if (task)
+ 			rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+@@ -370,12 +377,11 @@ static void nfs41_check_drain_session_co
+ 	complete(&ses->complete);
+ }
+ 
+-static void nfs41_sequence_free_slot(const struct nfs_client *clp,
+-			      struct nfs4_sequence_res *res)
++static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
+ {
+ 	struct nfs4_slot_table *tbl;
+ 
+-	tbl = &clp->cl_session->fc_slot_table;
++	tbl = &res->sr_session->fc_slot_table;
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+ 		/* just wake up the next guy waiting since
+ 		 * we may have not consumed a slot after all */
+@@ -385,18 +391,17 @@ static void nfs41_sequence_free_slot(con
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+ 	nfs4_free_slot(tbl, res->sr_slotid);
+-	nfs41_check_drain_session_complete(clp->cl_session);
++	nfs41_check_drain_session_complete(res->sr_session);
+ 	spin_unlock(&tbl->slot_tbl_lock);
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ }
+ 
+-static void nfs41_sequence_done(struct nfs_client *clp,
+-				struct nfs4_sequence_res *res,
+-				int rpc_status)
++static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+ {
+ 	unsigned long timestamp;
+ 	struct nfs4_slot_table *tbl;
+ 	struct nfs4_slot *slot;
++	struct nfs_client *clp;
+ 
+ 	/*
+ 	 * sr_status remains 1 if an RPC level error occurred. The server
+@@ -411,13 +416,16 @@ static void nfs41_sequence_done(struct n
+ 	if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+ 		goto out;
+ 
++	tbl = &res->sr_session->fc_slot_table;
++	slot = tbl->slots + res->sr_slotid;
++
+ 	/* Check the SEQUENCE operation status */
+-	if (res->sr_status == 0) {
+-		tbl = &clp->cl_session->fc_slot_table;
+-		slot = tbl->slots + res->sr_slotid;
++	switch (res->sr_status) {
++	case 0:
+ 		/* Update the slot's sequence and clientid lease timer */
+ 		++slot->seq_nr;
+ 		timestamp = res->sr_renewal_time;
++		clp = res->sr_session->clp;
+ 		spin_lock(&clp->cl_lock);
+ 		if (time_before(clp->cl_last_renewal, timestamp))
+ 			clp->cl_last_renewal = timestamp;
+@@ -425,11 +433,39 @@ static void nfs41_sequence_done(struct n
+ 		/* Check sequence flags */
+ 		if (atomic_read(&clp->cl_count) > 1)
+ 			nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
++		break;
++	case -NFS4ERR_DELAY:
++		/* The server detected a resend of the RPC call and
++		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
++		 * of RFC5661.
++		 */
++		dprintk("%s: slot=%d seq=%d: Operation in progress\n",
++				__func__, res->sr_slotid, slot->seq_nr);
++		goto out_retry;
++	default:
++		/* Just update the slot sequence no. */
++		++slot->seq_nr;
+ 	}
+ out:
+ 	/* The session may be reset by one of the error handlers. */
+ 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
+-	nfs41_sequence_free_slot(clp, res);
++	nfs41_sequence_free_slot(res);
++	return 1;
++out_retry:
++	rpc_delay(task, NFS4_POLL_RETRY_MAX);
++	rpc_restart_call(task);
++	/* FIXME: rpc_restart_call() should be made to return success/fail */
++	if (RPC_ASSASSINATED(task))
++		goto out;
++	return 0;
++}
++
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	if (res->sr_session == NULL)
++		return 1;
++	return nfs41_sequence_done(task, res);
+ }
+ 
+ /*
+@@ -480,12 +516,11 @@ static int nfs41_setup_sequence(struct n
+ 	if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+ 		return 0;
+ 
+-	memset(res, 0, sizeof(*res));
+ 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ 	tbl = &session->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
++	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
+ 	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+ 		/*
+ 		 * The state manager will wait until the slot table is empty.
+@@ -525,6 +560,7 @@ static int nfs41_setup_sequence(struct n
+ 	res->sr_session = session;
+ 	res->sr_slotid = slotid;
+ 	res->sr_renewal_time = jiffies;
++	res->sr_status_flags = 0;
+ 	/*
+ 	 * sr_status is only set in decode_sequence, and so will remain
+ 	 * set to 1 if an rpc level failure occurs.
+@@ -533,33 +569,36 @@ static int nfs41_setup_sequence(struct n
+ 	return 0;
+ }
+ 
+-int nfs4_setup_sequence(struct nfs_client *clp,
++int nfs4_setup_sequence(const struct nfs_server *server,
++		struct nfs4_session *ds_session,
+ 			struct nfs4_sequence_args *args,
+ 			struct nfs4_sequence_res *res,
+ 			int cache_reply,
+ 			struct rpc_task *task)
+ {
++	struct nfs4_session *session = nfs4_get_session(server);
+ 	int ret = 0;
+ 
++	if (ds_session)
++		session = ds_session;
++	if (session == NULL) {
++		args->sa_session = NULL;
++		res->sr_session = NULL;
++		goto out;
++	}
++
+ 	dprintk("--> %s clp %p session %p sr_slotid %d\n",
+-		__func__, clp, clp->cl_session, res->sr_slotid);
++		__func__, session->clp, session, res->sr_slotid);
+ 
+-	if (!nfs4_has_session(clp))
+-		goto out;
+-	ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply,
++	ret = nfs41_setup_sequence(session, args, res, cache_reply,
+ 				   task);
+-	if (ret && ret != -EAGAIN) {
+-		/* terminate rpc task */
+-		task->tk_status = ret;
+-		task->tk_action = NULL;
+-	}
+ out:
+ 	dprintk("<-- %s status=%d\n", __func__, ret);
+ 	return ret;
+ }
+ 
+ struct nfs41_call_sync_data {
+-	struct nfs_client *clp;
++	const struct nfs_server *seq_server;
+ 	struct nfs4_sequence_args *seq_args;
+ 	struct nfs4_sequence_res *seq_res;
+ 	int cache_reply;
+@@ -569,9 +608,9 @@ static void nfs41_call_sync_prepare(stru
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	dprintk("--> %s data->clp->cl_session %p\n", __func__,
+-		data->clp->cl_session);
+-	if (nfs4_setup_sequence(data->clp, data->seq_args,
++	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
++
++	if (nfs4_setup_sequence(data->seq_server, NULL, data->seq_args,
+ 				data->seq_res, data->cache_reply, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -587,7 +626,7 @@ static void nfs41_call_sync_done(struct 
+ {
+ 	struct nfs41_call_sync_data *data = calldata;
+ 
+-	nfs41_sequence_done(data->clp, data->seq_res, task->tk_status);
++	nfs41_sequence_done(task, data->seq_res);
+ }
+ 
+ struct rpc_call_ops nfs41_call_sync_ops = {
+@@ -600,8 +639,7 @@ struct rpc_call_ops nfs41_call_priv_sync
+ 	.rpc_call_done = nfs41_call_sync_done,
+ };
+ 
+-static int nfs4_call_sync_sequence(struct nfs_client *clp,
+-				   struct rpc_clnt *clnt,
++static int nfs4_call_sync_sequence(struct nfs_server *server,
+ 				   struct rpc_message *msg,
+ 				   struct nfs4_sequence_args *args,
+ 				   struct nfs4_sequence_res *res,
+@@ -611,13 +649,13 @@ static int nfs4_call_sync_sequence(struc
+ 	int ret;
+ 	struct rpc_task *task;
+ 	struct nfs41_call_sync_data data = {
+-		.clp = clp,
++		.seq_server = server,
+ 		.seq_args = args,
+ 		.seq_res = res,
+ 		.cache_reply = cache_reply,
+ 	};
+ 	struct rpc_task_setup task_setup = {
+-		.rpc_client = clnt,
++		.rpc_client = server->client,
+ 		.rpc_message = msg,
+ 		.callback_ops = &nfs41_call_sync_ops,
+ 		.callback_data = &data
+@@ -642,10 +680,15 @@ int _nfs4_call_sync_session(struct nfs_s
+ 			    struct nfs4_sequence_res *res,
+ 			    int cache_reply)
+ {
+-	return nfs4_call_sync_sequence(server->nfs_client, server->client,
+-				       msg, args, res, cache_reply, 0);
++	return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
+ }
+ 
++#else
++static int nfs4_sequence_done(struct rpc_task *task,
++			       struct nfs4_sequence_res *res)
++{
++	return 1;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ int _nfs4_call_sync(struct nfs_server *server,
+@@ -659,18 +702,9 @@ int _nfs4_call_sync(struct nfs_server *s
+ }
+ 
+ #define nfs4_call_sync(server, msg, args, res, cache_reply) \
+-	(server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \
++	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
+ 			&(res)->seq_res, (cache_reply))
+ 
+-static void nfs4_sequence_done(const struct nfs_server *server,
+-			       struct nfs4_sequence_res *res, int rpc_status)
+-{
+-#ifdef CONFIG_NFS_V4_1
+-	if (nfs4_has_session(server->nfs_client))
+-		nfs41_sequence_done(server->nfs_client, res, rpc_status);
+-#endif /* CONFIG_NFS_V4_1 */
+-}
+-
+ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(dir);
+@@ -745,19 +779,14 @@ static struct nfs4_opendata *nfs4_openda
+ 	p->o_arg.server = server;
+ 	p->o_arg.bitmask = server->attr_bitmask;
+ 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+-	if (flags & O_EXCL) {
+-		if (nfs4_has_persistent_session(server->nfs_client)) {
+-			/* GUARDED */
+-			p->o_arg.u.attrs = &p->attrs;
+-			memcpy(&p->attrs, attrs, sizeof(p->attrs));
+-		} else { /* EXCLUSIVE4_1 */
+-			u32 *s = (u32 *) p->o_arg.u.verifier.data;
+-			s[0] = jiffies;
+-			s[1] = current->pid;
+-		}
+-	} else if (flags & O_CREAT) {
++	if (flags & O_CREAT) {
++		u32 *s;
++
+ 		p->o_arg.u.attrs = &p->attrs;
+ 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
++		s = (u32 *) p->o_arg.u.verifier.data;
++		s[0] = jiffies;
++		s[1] = current->pid;
+ 	}
+ 	p->c_arg.fh = &p->o_res.fh;
+ 	p->c_arg.stateid = &p->o_res.stateid;
+@@ -851,8 +880,10 @@ static void update_open_stateflags(struc
+ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+ {
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+-	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
++		memcpy(state->stateid.u.data, stateid->u.data,
++		       sizeof(state->stateid.u.data));
++	memcpy(state->open_stateid.u.data, stateid->u.data,
++	       sizeof(state->open_stateid.u.data));
+ 	switch (fmode) {
+ 		case FMODE_READ:
+ 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+@@ -880,7 +911,8 @@ static void __update_open_stateid(struct
+ 	 */
+ 	write_seqlock(&state->seqlock);
+ 	if (deleg_stateid != NULL) {
+-		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
++		memcpy(state->stateid.u.data, deleg_stateid->u.data,
++		       sizeof(state->stateid.u.data));
+ 		set_bit(NFS_DELEGATED_STATE, &state->flags);
+ 	}
+ 	if (open_stateid != NULL)
+@@ -911,7 +943,8 @@ static int update_open_stateid(struct nf
+ 
+ 	if (delegation == NULL)
+ 		delegation = &deleg_cur->stateid;
+-	else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
++	else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
++			NFS4_STATEID_SIZE) != 0)
+ 		goto no_delegation_unlock;
+ 
+ 	nfs_mark_delegation_referenced(deleg_cur);
+@@ -973,7 +1006,8 @@ static struct nfs4_state *nfs4_try_open_
+ 			break;
+ 		}
+ 		/* Save the delegation */
+-		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
++		memcpy(stateid.u.data, delegation->stateid.u.data,
++		       sizeof(stateid.u.data));
+ 		rcu_read_unlock();
+ 		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
+ 		if (ret != 0)
+@@ -1127,10 +1161,13 @@ static int nfs4_open_recover(struct nfs4
+ 	 * Check if we need to update the current stateid.
+ 	 */
+ 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+-	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
++	    memcmp(state->stateid.u.data, state->open_stateid.u.data,
++		   sizeof(state->stateid.u.data)) != 0) {
+ 		write_seqlock(&state->seqlock);
+ 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+-			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
++			memcpy(state->stateid.u.data,
++			       state->open_stateid.u.data,
++			       sizeof(state->stateid.u.data));
+ 		write_sequnlock(&state->seqlock);
+ 	}
+ 	return 0;
+@@ -1199,8 +1236,8 @@ static int _nfs4_open_delegation_recall(
+ 	if (IS_ERR(opendata))
+ 		return PTR_ERR(opendata);
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+-	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
+-			sizeof(opendata->o_arg.u.delegation.data));
++	memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
++			sizeof(opendata->o_arg.u.delegation.u.data));
+ 	ret = nfs4_open_recover(opendata, state);
+ 	nfs4_opendata_put(opendata);
+ 	return ret;
+@@ -1258,8 +1295,8 @@ static void nfs4_open_confirm_done(struc
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+-				sizeof(data->o_res.stateid.data));
++		memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
++				sizeof(data->o_res.stateid.u.data));
+ 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 		renew_lease(data->o_res.server, data->timestamp);
+ 		data->rpc_done = 1;
+@@ -1356,13 +1393,13 @@ static void nfs4_open_prepare(struct rpc
+ 	}
+ 	/* Update sequence id. */
+ 	data->o_arg.id = sp->so_owner_id.id;
+-	data->o_arg.clientid = sp->so_client->cl_clientid;
++	data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
+ 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
+ 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
+ 	}
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
++	if (nfs4_setup_sequence(data->o_arg.server, NULL,
+ 				&data->o_arg.seq_args,
+ 				&data->o_res.seq_res, 1, task))
+ 		return;
+@@ -1385,8 +1422,8 @@ static void nfs4_open_done(struct rpc_ta
+ 
+ 	data->rpc_status = task->tk_status;
+ 
+-	nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->o_res.seq_res))
++		return;
+ 
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+@@ -1539,9 +1576,8 @@ static int _nfs4_proc_open(struct nfs4_o
+ 	return 0;
+ }
+ 
+-static int nfs4_recover_expired_lease(struct nfs_server *server)
++int nfs4_recover_expired_lease(struct nfs_client *clp)
+ {
+-	struct nfs_client *clp = server->nfs_client;
+ 	unsigned int loop;
+ 	int ret;
+ 
+@@ -1557,6 +1593,7 @@ static int nfs4_recover_expired_lease(st
+ 	}
+ 	return ret;
+ }
++EXPORT_SYMBOL(nfs4_recover_expired_lease);
+ 
+ /*
+  * OPEN_EXPIRED:
+@@ -1646,7 +1683,7 @@ static int _nfs4_do_open(struct inode *d
+ 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
+ 		goto out_err;
+ 	}
+-	status = nfs4_recover_expired_lease(server);
++	status = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (status != 0)
+ 		goto err_put_state_owner;
+ 	if (path->dentry->d_inode != NULL)
+@@ -1773,7 +1810,7 @@ static int _nfs4_do_setattr(struct inode
+ 	if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+ 		/* Use that stateid */
+ 	} else if (state != NULL) {
+-		nfs4_copy_stateid(&arg.stateid, state, current->files);
++		nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid);
+ 	} else
+ 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
+ 
+@@ -1838,7 +1875,8 @@ static void nfs4_close_done(struct rpc_t
+ 	struct nfs4_state *state = calldata->state;
+ 	struct nfs_server *server = NFS_SERVER(calldata->inode);
+ 
+-	nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+         /* hmm. we are done with the inode, and in the process of freeing
+@@ -1858,7 +1896,7 @@ static void nfs4_close_done(struct rpc_t
+ 			if (calldata->arg.fmode == 0)
+ 				break;
+ 		default:
+-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
++			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ 				rpc_restart_call_prepare(task);
+ 	}
+ 	nfs_release_seqid(calldata->arg.seqid);
+@@ -1903,7 +1941,7 @@ static void nfs4_close_prepare(struct rp
+ 
+ 	nfs_fattr_init(calldata->res.fattr);
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client,
++	if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), NULL,
+ 				&calldata->arg.seq_args, &calldata->res.seq_res,
+ 				1, task))
+ 		return;
+@@ -2325,6 +2363,9 @@ nfs4_proc_setattr(struct dentry *dentry,
+ 	struct nfs4_state *state = NULL;
+ 	int status;
+ 
++	if (pnfs_ld_layoutret_on_setattr(inode))
++		pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true);
++
+ 	nfs_fattr_init(fattr);
+ 	
+ 	/* Search for an existing open(O_WRITE) file */
+@@ -2650,8 +2691,9 @@ static int nfs4_proc_unlink_done(struct 
+ {
+ 	struct nfs_removeres *res = task->tk_msg.rpc_resp;
+ 
+-	nfs4_sequence_done(res->server, &res->seq_res, task->tk_status);
+-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
++	if (!nfs4_sequence_done(task, &res->seq_res))
++		return 0;
++	if (nfs4_async_handle_error(task, res->server, NULL, NULL) == -EAGAIN)
+ 		return 0;
+ 	update_changeattr(dir, &res->cinfo);
+ 	nfs_post_op_update_inode(dir, res->dir_attr);
+@@ -3092,18 +3134,31 @@ static int nfs4_proc_pathconf(struct nfs
+ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+ {
+ 	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+ 	dprintk("--> %s\n", __func__);
+ 
+-	nfs4_sequence_done(server, &data->res.seq_res, task->tk_status);
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
+ 
+-	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, server->nfs_client);
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
+ 
+ 	nfs_invalidate_atime(data->inode);
+-	if (task->tk_status > 0)
++	if (task->tk_status > 0 && client == server->nfs_client)
+ 		renew_lease(server, data->timestamp);
+ 	return 0;
+ }
+@@ -3114,20 +3169,56 @@ static void nfs4_proc_read_setup(struct 
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+ }
+ 
++static void pnfs4_update_write_done(struct nfs_inode *nfsi, struct nfs_write_data *data)
++{
++#ifdef CONFIG_NFS_V4_1
++	pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++	pnfs_need_layoutcommit(nfsi, data->args.context);
++#endif /* CONFIG_NFS_V4_1 */
++}
++
+ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
++	struct nfs_server *server = NFS_SERVER(inode);
++	struct nfs_client *client = server->nfs_client;
+ 
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+-		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++#ifdef CONFIG_NFS_V4_1
++	/* restore original count after retry? */
++	if (data->pdata.orig_count) {
++		dprintk("%s: restoring original count %u\n", __func__,
++			data->pdata.orig_count);
++		data->args.count = data->pdata.orig_count;
++	}
++
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (nfs4_async_handle_error(task, server, data->args.context->state, client) == -EAGAIN) {
++		nfs_restart_rpc(task, client);
+ 		return -EAGAIN;
+ 	}
++
++	/*
++	 * MDS write: renew lease
++	 * DS write: update lastbyte written, mark for layout commit
++	 */
+ 	if (task->tk_status >= 0) {
+-		renew_lease(NFS_SERVER(inode), data->timestamp);
+-		nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		if (client == server->nfs_client) {
++			renew_lease(server, data->timestamp);
++			nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
++		} else
++			pnfs4_update_write_done(NFS_I(inode), data);
+ 	}
+ 	return 0;
+ }
+@@ -3140,20 +3231,42 @@ static void nfs4_proc_write_setup(struct
+ 	data->res.server = server;
+ 	data->timestamp   = jiffies;
+ 
++#ifdef CONFIG_NFS_V4_1
++	/* writes to DS use pnfs vector */
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_WRITE];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
+ }
+ 
+ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+ {
+ 	struct inode *inode = data->inode;
+-	
+-	nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res,
+-			   task->tk_status);
+-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
++	struct nfs_server *server = NFS_SERVER(data->inode);
++	struct nfs_client *client = server->nfs_client;
++
++#ifdef CONFIG_NFS_V4_1
++	if (data->pdata.pnfsflags & PNFS_NO_RPC)
++		return 0;
++
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS commit\n", __func__);
++		client = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return -EAGAIN;
++
++	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL, NULL) == -EAGAIN) {
+ 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
+ 		return -EAGAIN;
+ 	}
+-	nfs_refresh_inode(inode, data->res.fattr);
++	if (client == server->nfs_client)
++		nfs_refresh_inode(inode, data->res.fattr);
+ 	return 0;
+ }
+ 
+@@ -3163,6 +3276,12 @@ static void nfs4_proc_commit_setup(struc
+ 	
+ 	data->args.bitmask = server->cache_consistency_bitmask;
+ 	data->res.server = server;
++#if defined(CONFIG_NFS_V4_1)
++	if (data->fldata.ds_nfs_client) {
++		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PNFS_COMMIT];
++		return;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
+ }
+ 
+@@ -3466,9 +3585,12 @@ static int nfs4_proc_set_acl(struct inod
+ }
+ 
+ static int
+-_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state)
++nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state, struct nfs_client *clp)
+ {
+-	if (!clp || task->tk_status >= 0)
++	if (!clp)
++		clp = server->nfs_client;
++
++	if (task->tk_status >= 0)
+ 		return 0;
+ 	switch(task->tk_status) {
+ 		case -NFS4ERR_ADMIN_REVOKED:
+@@ -3493,8 +3615,9 @@ _nfs4_async_handle_error(struct rpc_task
+ 		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ 		case -NFS4ERR_SEQ_FALSE_RETRY:
+ 		case -NFS4ERR_SEQ_MISORDERED:
+-			dprintk("%s ERROR %d, Reset session\n", __func__,
+-				task->tk_status);
++			dprintk("%s ERROR %d, Reset session. Exchangeid "
++				"flags 0x%x\n", __func__, task->tk_status,
++				clp->cl_exchange_flags);
+ 			nfs4_schedule_state_recovery(clp);
+ 			task->tk_status = 0;
+ 			return -EAGAIN;
+@@ -3514,6 +3637,8 @@ _nfs4_async_handle_error(struct rpc_task
+ 	task->tk_status = nfs4_map_errors(task->tk_status);
+ 	return 0;
+ do_state_recovery:
++	if (is_ds_only_client(clp))
++		return 0;
+ 	rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+ 	nfs4_schedule_state_recovery(clp);
+ 	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+@@ -3522,12 +3647,6 @@ do_state_recovery:
+ 	return -EAGAIN;
+ }
+ 
+-static int
+-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+-{
+-	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
+-}
+-
+ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
+ 		unsigned short port, struct rpc_cred *cred,
+ 		struct nfs4_setclientid_res *res)
+@@ -3643,8 +3762,8 @@ static void nfs4_delegreturn_done(struct
+ {
+ 	struct nfs4_delegreturndata *data = calldata;
+ 
+-	nfs4_sequence_done(data->res.server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_STALE_STATEID:
+@@ -3653,8 +3772,8 @@ static void nfs4_delegreturn_done(struct
+ 		renew_lease(data->res.server, data->timestamp);
+ 		break;
+ 	default:
+-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
+-				-EAGAIN) {
++		if (nfs4_async_handle_error(task, data->res.server, NULL, NULL)
++				== -EAGAIN) {
+ 			nfs_restart_rpc(task, data->res.server->nfs_client);
+ 			return;
+ 		}
+@@ -3674,7 +3793,7 @@ static void nfs4_delegreturn_prepare(str
+ 
+ 	d_data = (struct nfs4_delegreturndata *)data;
+ 
+-	if (nfs4_setup_sequence(d_data->res.server->nfs_client,
++	if (nfs4_setup_sequence(d_data->res.server, NULL,
+ 				&d_data->args.seq_args,
+ 				&d_data->res.seq_res, 1, task))
+ 		return;
+@@ -3894,15 +4013,16 @@ static void nfs4_locku_done(struct rpc_t
+ {
+ 	struct nfs4_unlockdata *calldata = data;
+ 
+-	nfs4_sequence_done(calldata->server, &calldata->res.seq_res,
+-			   task->tk_status);
++	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
++		return;
+ 	if (RPC_ASSASSINATED(task))
+ 		return;
+ 	switch (task->tk_status) {
+ 		case 0:
+-			memcpy(calldata->lsp->ls_stateid.data,
+-					calldata->res.stateid.data,
+-					sizeof(calldata->lsp->ls_stateid.data));
++			memcpy(calldata->lsp->ls_stateid.u.data,
++					calldata->res.stateid.u.data,
++					sizeof(calldata->lsp->ls_stateid.u.
++					       data));
+ 			renew_lease(calldata->server, calldata->timestamp);
+ 			break;
+ 		case -NFS4ERR_BAD_STATEID:
+@@ -3911,7 +4031,7 @@ static void nfs4_locku_done(struct rpc_t
+ 		case -NFS4ERR_EXPIRED:
+ 			break;
+ 		default:
+-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
++			if (nfs4_async_handle_error(task, calldata->server, NULL, NULL) == -EAGAIN)
+ 				nfs_restart_rpc(task,
+ 						 calldata->server->nfs_client);
+ 	}
+@@ -3929,7 +4049,7 @@ static void nfs4_locku_prepare(struct rp
+ 		return;
+ 	}
+ 	calldata->timestamp = jiffies;
+-	if (nfs4_setup_sequence(calldata->server->nfs_client,
++	if (nfs4_setup_sequence(calldata->server, NULL,
+ 				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 1, task))
+ 		return;
+@@ -4084,7 +4204,8 @@ static void nfs4_lock_prepare(struct rpc
+ 	} else
+ 		data->arg.new_lock_owner = 0;
+ 	data->timestamp = jiffies;
+-	if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args,
++	if (nfs4_setup_sequence(data->server, NULL,
++				&data->arg.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+@@ -4103,8 +4224,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 
+ 	dprintk("%s: begin!\n", __func__);
+ 
+-	nfs4_sequence_done(data->server, &data->res.seq_res,
+-			task->tk_status);
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
+ 
+ 	data->rpc_status = task->tk_status;
+ 	if (RPC_ASSASSINATED(task))
+@@ -4116,8 +4237,8 @@ static void nfs4_lock_done(struct rpc_ta
+ 			goto out;
+ 	}
+ 	if (data->rpc_status == 0) {
+-		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+-					sizeof(data->lsp->ls_stateid.data));
++		memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
++					sizeof(data->lsp->ls_stateid.u.data));
+ 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ 		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ 	}
+@@ -4426,6 +4547,34 @@ out:
+ 	return err;
+ }
+ 
++static void nfs4_release_lockowner_release(void *calldata)
++{
++	kfree(calldata);
++}
++
++const struct rpc_call_ops nfs4_release_lockowner_ops = {
++	.rpc_release = nfs4_release_lockowner_release,
++};
++
++void nfs4_release_lockowner(const struct nfs4_lock_state *lsp)
++{
++	struct nfs_server *server = lsp->ls_state->owner->so_server;
++	struct nfs_release_lockowner_args *args;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER],
++	};
++
++	if (server->nfs_client->cl_mvops->minor_version != 0)
++		return;
++	args = kmalloc(sizeof(*args), GFP_NOFS);
++	if (!args)
++		return;
++	args->lock_owner.clientid = server->nfs_client->cl_clientid;
++	args->lock_owner.id = lsp->ls_id.id;
++	msg.rpc_argp = args;
++	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args);
++}
++
+ #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
+ 
+ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
+@@ -4528,7 +4677,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	nfs4_verifier verifier;
+ 	struct nfs41_exchange_id_args args = {
+ 		.client = clp,
+-		.flags = clp->cl_exchange_flags,
++		.flags = clp->cl_exchange_flags & ~EXCHGID4_FLAG_CONFIRMED_R,
+ 	};
+ 	struct nfs41_exchange_id_res res = {
+ 		.client = clp,
+@@ -4576,6 +4725,7 @@ int nfs4_proc_exchange_id(struct nfs_cli
+ 	dprintk("<-- %s status= %d\n", __func__, status);
+ 	return status;
+ }
++EXPORT_SYMBOL(nfs4_proc_exchange_id);
+ 
+ struct nfs4_get_lease_time_data {
+ 	struct nfs4_get_lease_time_args *args;
+@@ -4613,7 +4763,8 @@ static void nfs4_get_lease_time_done(str
+ 			(struct nfs4_get_lease_time_data *)calldata;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status);
++	if (!nfs41_sequence_done(task, &data->res->lr_seq_res))
++		return;
+ 	switch (task->tk_status) {
+ 	case -NFS4ERR_DELAY:
+ 	case -NFS4ERR_GRACE:
+@@ -4807,13 +4958,6 @@ struct nfs4_session *nfs4_alloc_session(
+ 	if (!session)
+ 		return NULL;
+ 
+-	/*
+-	 * The create session reply races with the server back
+-	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+-	 * so that the client back channel can find the
+-	 * nfs_client struct
+-	 */
+-	clp->cl_cons_state = NFS_CS_SESSION_INITING;
+ 	init_completion(&session->complete);
+ 
+ 	tbl = &session->fc_slot_table;
+@@ -4826,6 +4970,8 @@ struct nfs4_session *nfs4_alloc_session(
+ 	spin_lock_init(&tbl->slot_tbl_lock);
+ 	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
+ 
++	session->session_state = 1<<NFS4_SESSION_INITING;
++
+ 	session->clp = clp;
+ 	return session;
+ }
+@@ -5042,6 +5188,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (!nfs4_has_session(clp))
+ 		return 0;
+ 
++	session = clp->cl_session;
++	if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
++		return 0;
++
+ 	rsize = server->rsize;
+ 	if (rsize == 0)
+ 		rsize = NFS_MAX_FILE_IO_SIZE;
+@@ -5049,11 +5199,10 @@ int nfs4_init_session(struct nfs_server 
+ 	if (wsize == 0)
+ 		wsize = NFS_MAX_FILE_IO_SIZE;
+ 
+-	session = clp->cl_session;
+ 	session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ 	session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+ 
+-	ret = nfs4_recover_expired_lease(server);
++	ret = nfs4_recover_expired_lease(server->nfs_client);
+ 	if (!ret)
+ 		ret = nfs4_check_client_ready(clp);
+ 	return ret;
+@@ -5062,69 +5211,70 @@ int nfs4_init_session(struct nfs_server 
+ /*
+  * Renew the cl_session lease.
+  */
+-static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+-{
++struct nfs4_sequence_data {
++	struct nfs_client *clp;
+ 	struct nfs4_sequence_args args;
+ 	struct nfs4_sequence_res res;
+-
+-	struct rpc_message msg = {
+-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+-		.rpc_argp = &args,
+-		.rpc_resp = &res,
+-		.rpc_cred = cred,
+-	};
+-
+-	args.sa_cache_this = 0;
+-
+-	return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
+-				       &res, args.sa_cache_this, 1);
+-}
++};
+ 
+ static void nfs41_sequence_release(void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+ 	if (atomic_read(&clp->cl_count) > 1)
+ 		nfs4_schedule_state_renewal(clp);
+ 	nfs_put_client(clp);
++	kfree(calldata);
++}
++
++static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
+ }
+ 
+ static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp = (struct nfs_client *)data;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 
+-	nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status);
++	if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
++		return;
+ 
+ 	if (task->tk_status < 0) {
+ 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+ 		if (atomic_read(&clp->cl_count) == 1)
+ 			goto out;
+ 
+-		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
+-								== -EAGAIN) {
+-			nfs_restart_rpc(task, clp);
++		if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) {
++			rpc_restart_call_prepare(task);
+ 			return;
+ 		}
+ 	}
+ 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
+ out:
+-	kfree(task->tk_msg.rpc_argp);
+-	kfree(task->tk_msg.rpc_resp);
+-
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
+ {
+-	struct nfs_client *clp;
++	struct nfs4_sequence_data *calldata = data;
++	struct nfs_client *clp = calldata->clp;
+ 	struct nfs4_sequence_args *args;
+ 	struct nfs4_sequence_res *res;
+ 
+-	clp = (struct nfs_client *)data;
+ 	args = task->tk_msg.rpc_argp;
+ 	res = task->tk_msg.rpc_resp;
+ 
+-	if (nfs4_setup_sequence(clp, args, res, 0, task))
++	if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
+@@ -5135,32 +5285,67 @@ static const struct rpc_call_ops nfs41_s
+ 	.rpc_release = nfs41_sequence_release,
+ };
+ 
+-static int nfs41_proc_async_sequence(struct nfs_client *clp,
+-				     struct rpc_cred *cred)
++static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+ {
+-	struct nfs4_sequence_args *args;
+-	struct nfs4_sequence_res *res;
++	struct nfs4_sequence_data *calldata;
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE],
+ 		.rpc_cred = cred,
+ 	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = clp->cl_rpcclient,
++		.rpc_message = &msg,
++		.callback_ops = &nfs41_sequence_ops,
++		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
++	};
+ 
+ 	if (!atomic_inc_not_zero(&clp->cl_count))
+-		return -EIO;
+-	args = kzalloc(sizeof(*args), GFP_NOFS);
+-	res = kzalloc(sizeof(*res), GFP_NOFS);
+-	if (!args || !res) {
+-		kfree(args);
+-		kfree(res);
++		return ERR_PTR(-EIO);
++	calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
++	if (calldata == NULL) {
+ 		nfs_put_client(clp);
+-		return -ENOMEM;
++		return ERR_PTR(-ENOMEM);
+ 	}
+-	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+-	msg.rpc_argp = args;
+-	msg.rpc_resp = res;
++	calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	msg.rpc_argp = &calldata->args;
++	msg.rpc_resp = &calldata->res;
++	calldata->clp = clp;
++	task_setup_data.callback_data = calldata;
+ 
+-	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
+-			      &nfs41_sequence_ops, (void *)clp);
++	return rpc_run_task(&task_setup_data);
++}
++
++static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret = 0;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task))
++		ret = PTR_ERR(task);
++	else
++		rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
++}
++
++static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
++{
++	struct rpc_task *task;
++	int ret;
++
++	task = _nfs41_proc_sequence(clp, cred);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	ret = rpc_wait_for_completion_task(task);
++	if (!ret)
++		ret = task->tk_status;
++	rpc_put_task(task);
++out:
++	dprintk("<-- %s status=%d\n", __func__, ret);
++	return ret;
+ }
+ 
+ struct nfs4_reclaim_complete_data {
+@@ -5174,13 +5359,31 @@ static void nfs4_reclaim_complete_prepar
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+ 
+ 	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+-	if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
++	if (nfs41_setup_sequence(calldata->clp->cl_session,
++				&calldata->arg.seq_args,
+ 				&calldata->res.seq_res, 0, task))
+ 		return;
+ 
+ 	rpc_call_start(task);
+ }
+ 
++static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp)
++{
++	switch(task->tk_status) {
++	case 0:
++	case -NFS4ERR_COMPLETE_ALREADY:
++	case -NFS4ERR_WRONG_CRED: /* What to do here? */
++		break;
++	case -NFS4ERR_DELAY:
++	case -EKEYEXPIRED:
++		rpc_delay(task, NFS4_POLL_RETRY_MAX);
++		return -EAGAIN;
++	default:
++		nfs4_schedule_state_recovery(clp);
++	}
++	return 0;
++}
++
+ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data)
+ {
+ 	struct nfs4_reclaim_complete_data *calldata = data;
+@@ -5188,32 +5391,13 @@ static void nfs4_reclaim_complete_done(s
+ 	struct nfs4_sequence_res *res = &calldata->res.seq_res;
+ 
+ 	dprintk("--> %s\n", __func__);
+-	nfs41_sequence_done(clp, res, task->tk_status);
+-	switch (task->tk_status) {
+-	case 0:
+-	case -NFS4ERR_COMPLETE_ALREADY:
+-		break;
+-	case -NFS4ERR_BADSESSION:
+-	case -NFS4ERR_DEADSESSION:
+-		/*
+-		 * Handle the session error, but do not retry the operation, as
+-		 * we have no way of telling whether the clientid had to be
+-		 * reset before we got our reply.  If reset, a new wave of
+-		 * reclaim operations will follow, containing their own reclaim
+-		 * complete.  We don't want our retry to get on the way of
+-		 * recovery by incorrectly indicating to the server that we're
+-		 * done reclaiming state since the process had to be restarted.
+-		 */
+-		_nfs4_async_handle_error(task, NULL, clp, NULL);
+-		break;
+-	default:
+-		if (_nfs4_async_handle_error(
+-				task, NULL, clp, NULL) == -EAGAIN) {
+-			rpc_restart_call_prepare(task);
+-			return;
+-		}
+-	}
++	if (!nfs41_sequence_done(task, res))
++		return;
+ 
++	if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
++		rpc_restart_call_prepare(task);
++		return;
++	}
+ 	dprintk("<-- %s\n", __func__);
+ }
+ 
+@@ -5270,6 +5454,404 @@ out:
+ 	dprintk("<-- %s status=%d\n", __func__, status);
+ 	return status;
+ }
++
++static void
++nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
++				&lgp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++	struct inode *ino = lgp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lgp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	pnfs_get_layout_done(lgp, task->tk_status);
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	lgp->status = task->tk_status;
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_layoutget_release(void *calldata)
++{
++	struct nfs4_layoutget *lgp = calldata;
++
++	dprintk("--> %s\n", __func__);
++	pnfs_layout_release(NFS_I(lgp->args.inode)->layout, NULL);
++	if (lgp->res.layout.buf != NULL)
++		free_page((unsigned long) lgp->res.layout.buf);
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_layoutget_call_ops = {
++	.rpc_call_prepare = nfs4_layoutget_prepare,
++	.rpc_call_done = nfs4_layoutget_done,
++	.rpc_release = nfs4_layoutget_release,
++};
++
++/* FIXME: We need to call nfs4_handle_exception
++ * and deal with retries.
++ * Currently we can't since we release lgp and its contents.
++ */
++static int _nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
++		.rpc_argp = &lgp->args,
++		.rpc_resp = &lgp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutget_call_ops,
++		.callback_data = lgp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
++	if (lgp->res.layout.buf == NULL) {
++		nfs4_layoutget_release(lgp);
++		return -ENOMEM;
++	}
++
++	lgp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = lgp->status;
++	if (status != 0)
++		goto out;
++	status = pnfs_layout_process(lgp);
++out:
++	rpc_put_task(task);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
++{
++	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server, _nfs4_proc_layoutget(lgp),
++					    &exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *data)
++{
++	struct nfs4_layoutcommit_data *ldata =
++		(struct nfs4_layoutcommit_data *)data;
++	struct nfs_server *server = NFS_SERVER(ldata->args.inode);
++
++	if (nfs4_setup_sequence(server, NULL, &ldata->args.seq_args,
++				&ldata->res.seq_res, 1, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void
++nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutcommit_data *data =
++		(struct nfs4_layoutcommit_data *)calldata;
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++
++	if (!nfs4_sequence_done(task, &data->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	data->status = task->tk_status;
++}
++
++static void nfs4_layoutcommit_release(void *lcdata)
++{
++	struct nfs4_layoutcommit_data *data =
++		(struct nfs4_layoutcommit_data *)lcdata;
++
++	put_rpccred(data->cred);
++	pnfs_cleanup_layoutcommit(lcdata);
++	pnfs_layoutcommit_free(lcdata);
++	/* Matched by get_layout in pnfs_layoutcommit_inode */
++	put_layout(data->args.inode);
++}
++
++static const struct rpc_call_ops nfs4_layoutcommit_ops = {
++	.rpc_call_prepare = nfs4_layoutcommit_prepare,
++	.rpc_call_done = nfs4_layoutcommit_done,
++	.rpc_release = nfs4_layoutcommit_release,
++};
++
++/* Execute a layoutcommit to the server */
++static int
++_nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
++{
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
++		.rpc_argp = &data->args,
++		.rpc_resp = &data->res,
++		.rpc_cred = data->cred,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.task = &data->task,
++		.rpc_client = NFS_CLIENT(data->args.inode),
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutcommit_ops,
++		.callback_data = data,
++		.flags = RPC_TASK_ASYNC,
++	};
++	struct rpc_task *task;
++	int status = 0;
++
++	dprintk("NFS: %4d initiating layoutcommit call. %llu@%llu lbw: %llu "
++		"type: %d issync %d\n",
++		data->task.tk_pid,
++		data->args.range.length,
++		data->args.range.offset,
++		data->args.lastbytewritten,
++		data->args.layout_type, issync);
++
++	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = data->status;
++out:
++	dprintk("%s: status %d\n", __func__, status);
++	rpc_put_task(task);
++	return 0;
++}
++
++int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
++{
++	struct nfs4_exception exception = { };
++	struct nfs_server *server = NFS_SERVER(data->args.inode);
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++					_nfs4_proc_layoutcommit(data, issync),
++					&exception);
++	} while (exception.retry);
++	return err;
++}
++
++static void
++nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++	if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
++				&lrp->res.seq_res, 0, task))
++		return;
++	rpc_call_start(task);
++}
++
++static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++
++	dprintk("--> %s\n", __func__);
++
++	if (!nfs4_sequence_done(task, &lrp->res.seq_res))
++		return;
++
++	if (RPC_ASSASSINATED(task))
++		return;
++
++	if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
++		nfs_restart_rpc(task, server->nfs_client);
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void nfs4_layoutreturn_release(void *calldata)
++{
++	struct nfs4_layoutreturn *lrp = calldata;
++	struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
++
++	dprintk("--> %s return_type %d lo %p\n", __func__,
++		lrp->args.return_type, lo);
++
++	if (lrp->args.return_type == RETURN_FILE) {
++		if (!lrp->res.lrs_present)
++			pnfs_set_layout_stateid(lo, &zero_stateid);
++		pnfs_layout_release(lo, &lrp->args.range);
++	}
++	kfree(calldata);
++	dprintk("<-- %s\n", __func__);
++}
++
++static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
++	.rpc_call_prepare = nfs4_layoutreturn_prepare,
++	.rpc_call_done = nfs4_layoutreturn_done,
++	.rpc_release = nfs4_layoutreturn_release,
++};
++
++int _nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
++{
++	struct inode *ino = lrp->args.inode;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct rpc_task *task;
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
++		.rpc_argp = &lrp->args,
++		.rpc_resp = &lrp->res,
++	};
++	struct rpc_task_setup task_setup_data = {
++		.rpc_client = server->client,
++		.rpc_message = &msg,
++		.callback_ops = &nfs4_layoutreturn_call_ops,
++		.callback_data = lrp,
++		.flags = RPC_TASK_ASYNC,
++	};
++	int status = 0;
++
++	dprintk("--> %s\n", __func__);
++	lrp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	if (!issync)
++		goto out;
++	status = nfs4_wait_for_completion_rpc_task(task);
++	if (status != 0)
++		goto out;
++	status = task->tk_status;
++out:
++	dprintk("<-- %s\n", __func__);
++	rpc_put_task(task);
++	return status;
++}
++
++int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
++{
++	struct nfs_server *server = NFS_SERVER(lrp->args.inode);
++	struct nfs4_exception exception = { };
++	int err;
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_proc_layoutreturn(lrp, issync),
++				&exception);
++	} while (exception.retry);
++
++	return err;
++}
++
++/*
++ * Retrieve the list of Data Server devices from the MDS.
++ */
++static int _nfs4_getdevicelist(struct nfs_server *server,
++				    const struct nfs_fh *fh,
++				    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_getdevicelist_args args = {
++		.fh = fh,
++		.layoutclass = server->pnfs_curr_ld->id,
++	};
++	struct nfs4_getdevicelist_res res = {
++		.devlist = devlist,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++	return status;
++}
++
++int nfs4_proc_getdevicelist(struct nfs_server *server,
++			    const struct nfs_fh *fh,
++			    struct pnfs_devicelist *devlist)
++{
++	struct nfs4_exception exception = { };
++	int err;
++
++	do {
++		err = nfs4_handle_exception(server,
++				_nfs4_getdevicelist(server, fh, devlist),
++				&exception);
++	} while (exception.retry);
++
++	dprintk("nfs4_pnfs_getdevlist: err=%d, num_devs=%u\n",
++		err, devlist->num_devs);
++
++	return err;
++}
++
++int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
++{
++	struct nfs4_getdeviceinfo_args args = {
++		.pdev = pdev,
++	};
++	struct nfs4_getdeviceinfo_res res = {
++		.pdev = pdev,
++	};
++	struct rpc_message msg = {
++		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
++		.rpc_argp = &args,
++		.rpc_resp = &res,
++	};
++	int status;
++
++	dprintk("--> %s\n", __func__);
++	status = nfs4_call_sync(server, &msg, &args, &res, 0);
++	dprintk("<-- %s status=%d\n", __func__, status);
++
++	return status;
++}
++
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
+@@ -5327,28 +5909,30 @@ struct nfs4_state_maintenance_ops nfs41_
+ };
+ #endif
+ 
+-/*
+- * Per minor version reboot and network partition recovery ops
+- */
+-
+-struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = {
+-	&nfs40_reboot_recovery_ops,
+-#if defined(CONFIG_NFS_V4_1)
+-	&nfs41_reboot_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
++	.minor_version = 0,
++	.call_sync = _nfs4_call_sync,
++	.validate_stateid = nfs4_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
++	.state_renewal_ops = &nfs40_state_renewal_ops,
+ };
+ 
+-struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = {
+-	&nfs40_nograce_recovery_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_nograce_recovery_ops,
+-#endif
++static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
++	.minor_version = 1,
++	.call_sync = _nfs4_call_sync_session,
++	.validate_stateid = nfs41_validate_delegation_stateid,
++	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
++	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
++	.state_renewal_ops = &nfs41_state_renewal_ops,
+ };
++#endif
+ 
+-struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = {
+-	&nfs40_state_renewal_ops,
++const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
++	[0] = &nfs_v4_0_minor_ops,
+ #if defined(CONFIG_NFS_V4_1)
+-	&nfs41_state_renewal_ops,
++	[1] = &nfs_v4_1_minor_ops,
+ #endif
+ };
+ 
+@@ -5366,6 +5950,7 @@ const struct nfs_rpc_ops nfs_v4_clientop
+ 	.dentry_ops	= &nfs4_dentry_operations,
+ 	.dir_inode_ops	= &nfs4_dir_inode_operations,
+ 	.file_inode_ops	= &nfs4_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs4_proc_get_root,
+ 	.getattr	= nfs4_proc_getattr,
+ 	.setattr	= nfs4_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig linux-2.6.34.noarch/fs/nfs/nfs4renewd.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4renewd.c.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4renewd.c	2010-09-30 10:17:08.679993000 -0400
+@@ -54,17 +54,17 @@
+ void
+ nfs4_renew_state(struct work_struct *work)
+ {
+-	struct nfs4_state_maintenance_ops *ops;
++	const struct nfs4_state_maintenance_ops *ops;
+ 	struct nfs_client *clp =
+ 		container_of(work, struct nfs_client, cl_renewd.work);
+ 	struct rpc_cred *cred;
+ 	long lease;
+ 	unsigned long last, now;
+ 
+-	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
++	ops = clp->cl_mvops->state_renewal_ops;
+ 	dprintk("%s: start\n", __func__);
+ 	/* Are there any active superblocks? */
+-	if (list_empty(&clp->cl_superblocks))
++	if (list_empty(&clp->cl_superblocks) && !is_ds_only_client(clp))
+ 		goto out;
+ 	spin_lock(&clp->cl_lock);
+ 	lease = clp->cl_lease_time;
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig linux-2.6.34.noarch/fs/nfs/nfs4state.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4state.c.orig	2010-09-30 10:15:17.863715000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4state.c	2010-09-30 10:17:08.685993000 -0400
+@@ -48,11 +48,13 @@
+ #include <linux/random.h>
+ #include <linux/workqueue.h>
+ #include <linux/bitops.h>
++#include <linux/nfs4_pnfs.h>
+ 
+ #include "nfs4_fs.h"
+ #include "callback.h"
+ #include "delegation.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define OPENOWNER_POOL_SIZE	8
+ 
+@@ -126,6 +128,11 @@ static int nfs41_setup_state_renewal(str
+ 	int status;
+ 	struct nfs_fsinfo fsinfo;
+ 
++	if (is_ds_only_client(clp)) {
++		nfs4_schedule_state_renewal(clp);
++		return 0;
++	}
++
+ 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
+ 	if (status == 0) {
+ 		/* Update lease time and schedule renewal */
+@@ -145,7 +152,9 @@ static void nfs4_end_drain_session(struc
+ 	struct nfs4_session *ses = clp->cl_session;
+ 	int max_slots;
+ 
+-	if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
++	if (ses == NULL)
++		return;
++	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
+ 		spin_lock(&ses->fc_slot_table.slot_tbl_lock);
+ 		max_slots = ses->fc_slot_table.max_slots;
+ 		while (max_slots--) {
+@@ -167,7 +176,7 @@ static int nfs4_begin_drain_session(stru
+ 	struct nfs4_slot_table *tbl = &ses->fc_slot_table;
+ 
+ 	spin_lock(&tbl->slot_tbl_lock);
+-	set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state);
++	set_bit(NFS4_SESSION_DRAINING, &ses->session_state);
+ 	if (tbl->highest_used_slotid != -1) {
+ 		INIT_COMPLETION(ses->complete);
+ 		spin_unlock(&tbl->slot_tbl_lock);
+@@ -371,7 +380,6 @@ nfs4_alloc_state_owner(void)
+ 		return NULL;
+ 	spin_lock_init(&sp->so_lock);
+ 	INIT_LIST_HEAD(&sp->so_states);
+-	INIT_LIST_HEAD(&sp->so_delegations);
+ 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+ 	sp->so_seqid.sequence = &sp->so_sequence;
+ 	spin_lock_init(&sp->so_sequence.lock);
+@@ -384,7 +392,7 @@ static void
+ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
+ {
+ 	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+-		struct nfs_client *clp = sp->so_client;
++		struct nfs_client *clp = sp->so_server->nfs_client;
+ 
+ 		spin_lock(&clp->cl_lock);
+ 		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+@@ -406,7 +414,6 @@ struct nfs4_state_owner *nfs4_get_state_
+ 	new = nfs4_alloc_state_owner();
+ 	if (new == NULL)
+ 		return NULL;
+-	new->so_client = clp;
+ 	new->so_server = server;
+ 	new->so_cred = cred;
+ 	spin_lock(&clp->cl_lock);
+@@ -423,7 +430,7 @@ struct nfs4_state_owner *nfs4_get_state_
+ 
+ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
+ {
+-	struct nfs_client *clp = sp->so_client;
++	struct nfs_client *clp = sp->so_server->nfs_client;
+ 	struct rpc_cred *cred = sp->so_cred;
+ 
+ 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
+@@ -583,8 +590,24 @@ static void __nfs4_close(struct path *pa
+ 	if (!call_close) {
+ 		nfs4_put_open_state(state);
+ 		nfs4_put_state_owner(owner);
+-	} else
++	} else {
++		u32 roc_iomode;
++		struct nfs_inode *nfsi = NFS_I(state->inode);
++
++		if (has_layout(nfsi) &&
++		    (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
++			struct pnfs_layout_range range = {
++				.iomode = roc_iomode,
++				.offset = 0,
++				.length = NFS4_MAX_UINT64,
++			};
++
++			pnfs_return_layout(state->inode, &range, NULL,
++					   RETURN_FILE, wait);
++		}
++
+ 		nfs4_do_close(path, state, gfp_mask, wait);
++	}
+ }
+ 
+ void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
+@@ -602,12 +625,21 @@ void nfs4_close_sync(struct path *path, 
+  * that is compatible with current->files
+  */
+ static struct nfs4_lock_state *
+-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *pos;
+ 	list_for_each_entry(pos, &state->lock_states, ls_locks) {
+-		if (pos->ls_owner != fl_owner)
++		if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ 			continue;
++		switch (pos->ls_owner.lo_type) {
++		case NFS4_POSIX_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.posix_owner != fl_owner)
++				continue;
++			break;
++		case NFS4_FLOCK_LOCK_TYPE:
++			if (pos->ls_owner.lo_u.flock_owner != fl_pid)
++				continue;
++		}
+ 		atomic_inc(&pos->ls_count);
+ 		return pos;
+ 	}
+@@ -619,10 +651,10 @@ __nfs4_find_lock_state(struct nfs4_state
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
++static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp;
+-	struct nfs_client *clp = state->owner->so_client;
++	struct nfs_client *clp = state->owner->so_server->nfs_client;
+ 
+ 	lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ 	if (lsp == NULL)
+@@ -633,7 +665,18 @@ static struct nfs4_lock_state *nfs4_allo
+ 	lsp->ls_seqid.sequence = &lsp->ls_sequence;
+ 	atomic_set(&lsp->ls_count, 1);
+ 	lsp->ls_state = state;
+-	lsp->ls_owner = fl_owner;
++	lsp->ls_owner.lo_type = type;
++	switch (lsp->ls_owner.lo_type) {
++	case NFS4_FLOCK_LOCK_TYPE:
++		lsp->ls_owner.lo_u.flock_owner = fl_pid;
++		break;
++	case NFS4_POSIX_LOCK_TYPE:
++		lsp->ls_owner.lo_u.posix_owner = fl_owner;
++		break;
++	default:
++		kfree(lsp);
++		return NULL;
++	}
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
+ 	spin_unlock(&clp->cl_lock);
+@@ -643,7 +686,7 @@ static struct nfs4_lock_state *nfs4_allo
+ 
+ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+ {
+-	struct nfs_client *clp = lsp->ls_state->owner->so_client;
++	struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client;
+ 
+ 	spin_lock(&clp->cl_lock);
+ 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+@@ -657,13 +700,13 @@ static void nfs4_free_lock_state(struct 
+  * exists, return an uninitialized one.
+  *
+  */
+-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
++static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+ {
+ 	struct nfs4_lock_state *lsp, *new = NULL;
+ 	
+ 	for(;;) {
+ 		spin_lock(&state->state_lock);
+-		lsp = __nfs4_find_lock_state(state, owner);
++		lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ 		if (lsp != NULL)
+ 			break;
+ 		if (new != NULL) {
+@@ -674,7 +717,7 @@ static struct nfs4_lock_state *nfs4_get_
+ 			break;
+ 		}
+ 		spin_unlock(&state->state_lock);
+-		new = nfs4_alloc_lock_state(state, owner);
++		new = nfs4_alloc_lock_state(state, owner, pid, type);
+ 		if (new == NULL)
+ 			return NULL;
+ 	}
+@@ -701,6 +744,8 @@ void nfs4_put_lock_state(struct nfs4_loc
+ 	if (list_empty(&state->lock_states))
+ 		clear_bit(LK_STATE_IN_USE, &state->flags);
+ 	spin_unlock(&state->state_lock);
++	if (lsp->ls_flags & NFS_LOCK_INITIALIZED)
++		nfs4_release_lockowner(lsp);
+ 	nfs4_free_lock_state(lsp);
+ }
+ 
+@@ -728,7 +773,12 @@ int nfs4_set_lock_state(struct nfs4_stat
+ 
+ 	if (fl->fl_ops != NULL)
+ 		return 0;
+-	lsp = nfs4_get_lock_state(state, fl->fl_owner);
++	if (fl->fl_flags & FL_POSIX)
++		lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
++	else if (fl->fl_flags & FL_FLOCK)
++		lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE);
++	else
++		return -EINVAL;
+ 	if (lsp == NULL)
+ 		return -ENOMEM;
+ 	fl->fl_u.nfs4_fl.owner = lsp;
+@@ -740,7 +790,7 @@ int nfs4_set_lock_state(struct nfs4_stat
+  * Byte-range lock aware utility to initialize the stateid of read/write
+  * requests.
+  */
+-void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
++void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid)
+ {
+ 	struct nfs4_lock_state *lsp;
+ 	int seq;
+@@ -753,7 +803,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst
+ 		return;
+ 
+ 	spin_lock(&state->state_lock);
+-	lsp = __nfs4_find_lock_state(state, fl_owner);
++	lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ 	if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ 		memcpy(dst, &lsp->ls_stateid, sizeof(*dst));
+ 	spin_unlock(&state->state_lock);
+@@ -1031,8 +1081,8 @@ restart:
+ 				 * Open state on this file cannot be recovered
+ 				 * All we can do is revert to using the zero stateid.
+ 				 */
+-				memset(state->stateid.data, 0,
+-					sizeof(state->stateid.data));
++				memset(state->stateid.u.data, 0,
++					sizeof(state->stateid.u.data));
+ 				/* Mark the file as being 'closed' */
+ 				state->state = 0;
+ 				break;
+@@ -1041,11 +1091,11 @@ restart:
+ 			case -NFS4ERR_BAD_STATEID:
+ 			case -NFS4ERR_RECLAIM_BAD:
+ 			case -NFS4ERR_RECLAIM_CONFLICT:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 				break;
+ 			case -NFS4ERR_EXPIRED:
+ 			case -NFS4ERR_NO_GRACE:
+-				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
++				nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ 			case -NFS4ERR_STALE_CLIENTID:
+ 			case -NFS4ERR_BADSESSION:
+ 			case -NFS4ERR_BADSLOT:
+@@ -1120,8 +1170,7 @@ static void nfs4_state_end_reclaim_reboo
+ 	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ 		return;
+ 
+-	nfs4_reclaim_complete(clp,
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++	nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+ 
+ 	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+ 		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
+@@ -1211,8 +1260,8 @@ restart:
+ static int nfs4_check_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_maintenance_ops *ops =
+-		nfs4_state_renewal_ops[clp->cl_minorversion];
++	const struct nfs4_state_maintenance_ops *ops =
++		clp->cl_mvops->state_renewal_ops;
+ 	int status = -NFS4ERR_EXPIRED;
+ 
+ 	/* Is the client already known to have an expired lease? */
+@@ -1235,8 +1284,8 @@ out:
+ static int nfs4_reclaim_lease(struct nfs_client *clp)
+ {
+ 	struct rpc_cred *cred;
+-	struct nfs4_state_recovery_ops *ops =
+-		nfs4_reboot_recovery_ops[clp->cl_minorversion];
++	const struct nfs4_state_recovery_ops *ops =
++		clp->cl_mvops->reboot_recovery_ops;
+ 	int status = -ENOENT;
+ 
+ 	cred = ops->get_clid_cred(clp);
+@@ -1421,6 +1470,7 @@ static void nfs4_state_manager(struct nf
+ 			}
+ 			clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ 			set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
++			pnfs_destroy_all_layouts(clp);
+ 		}
+ 
+ 		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+@@ -1444,7 +1494,7 @@ static void nfs4_state_manager(struct nf
+ 		/* First recover reboot state... */
+ 		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_reboot_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->reboot_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+ 				continue;
+@@ -1458,7 +1508,7 @@ static void nfs4_state_manager(struct nf
+ 		/* Now recover expired state... */
+ 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ 			status = nfs4_do_reclaim(clp,
+-				nfs4_nograce_recovery_ops[clp->cl_minorversion]);
++				clp->cl_mvops->nograce_recovery_ops);
+ 			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
+ 			    test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+diff -up linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig linux-2.6.34.noarch/fs/nfs/nfs4xdr.c
+--- linux-2.6.34.noarch/fs/nfs/nfs4xdr.c.orig	2010-09-30 10:15:17.872720000 -0400
++++ linux-2.6.34.noarch/fs/nfs/nfs4xdr.c	2010-09-30 10:17:08.709998000 -0400
+@@ -50,8 +50,10 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
++#include <linux/nfs4_pnfs.h>
+ #include "nfs4_fs.h"
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_XDR
+ 
+@@ -89,7 +91,7 @@ static int nfs4_stat_to_errno(int);
+ #define encode_getfh_maxsz      (op_encode_hdr_maxsz)
+ #define decode_getfh_maxsz      (op_decode_hdr_maxsz + 1 + \
+ 				((3+NFS4_FHSIZE) >> 2))
+-#define nfs4_fattr_bitmap_maxsz 3
++#define nfs4_fattr_bitmap_maxsz 4
+ #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+ #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
+ #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+@@ -111,7 +113,11 @@ static int nfs4_stat_to_errno(int);
+ #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+ #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
+ #define encode_fsinfo_maxsz	(encode_getattr_maxsz)
+-#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
++/* The 5 accounts for the PNFS attributes, and assumes that at most three
++ * layout types will be returned.
++ */
++#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + \
++				 nfs4_fattr_bitmap_maxsz + 8 + 5)
+ #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
+ #define decode_renew_maxsz	(op_decode_hdr_maxsz)
+ #define encode_setclientid_maxsz \
+@@ -202,14 +208,17 @@ static int nfs4_stat_to_errno(int);
+ #define encode_link_maxsz	(op_encode_hdr_maxsz + \
+ 				nfs4_name_maxsz)
+ #define decode_link_maxsz	(op_decode_hdr_maxsz + decode_change_info_maxsz)
++#define encode_lockowner_maxsz	(7)
+ #define encode_lock_maxsz	(op_encode_hdr_maxsz + \
+ 				 7 + \
+-				 1 + encode_stateid_maxsz + 8)
++				 1 + encode_stateid_maxsz + 1 + \
++				 encode_lockowner_maxsz)
+ #define decode_lock_denied_maxsz \
+ 				(8 + decode_lockowner_maxsz)
+ #define decode_lock_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+-#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 12)
++#define encode_lockt_maxsz	(op_encode_hdr_maxsz + 5 + \
++				encode_lockowner_maxsz)
+ #define decode_lockt_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_lock_denied_maxsz)
+ #define encode_locku_maxsz	(op_encode_hdr_maxsz + 3 + \
+@@ -217,6 +226,11 @@ static int nfs4_stat_to_errno(int);
+ 				 4)
+ #define decode_locku_maxsz	(op_decode_hdr_maxsz + \
+ 				 decode_stateid_maxsz)
++#define encode_release_lockowner_maxsz \
++				(op_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define decode_release_lockowner_maxsz \
++				(op_decode_hdr_maxsz)
+ #define encode_access_maxsz	(op_encode_hdr_maxsz + 1)
+ #define decode_access_maxsz	(op_decode_hdr_maxsz + 2)
+ #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
+@@ -302,6 +316,35 @@ static int nfs4_stat_to_errno(int);
+ 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+ #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
+ #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
++#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
++				encode_verifier_maxsz)
++#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 +  \
++				decode_verifier_maxsz +             \
++				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM *  \
++				NFS4_PNFS_DEVICEID4_SIZE))
++#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
++				XDR_QUADLEN(NFS4_PNFS_DEVICEID4_SIZE))
++#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
++				4 /*layout type */ + \
++				4 /* opaque devaddr4 length */ +\
++				4 /* notification bitmap length */ + \
++				4 /* notification bitmap */)
++#define encode_layoutget_maxsz	(op_encode_hdr_maxsz + 10 + \
++				encode_stateid_maxsz)
++#define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
++				decode_stateid_maxsz + \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
++#define encode_layoutcommit_maxsz (18 +                           \
++				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
++				op_encode_hdr_maxsz +          \
++				encode_stateid_maxsz)
++#define decode_layoutcommit_maxsz (3 + op_decode_hdr_maxsz)
++#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
++				encode_stateid_maxsz + \
++				1 /* FIXME: opaque lrf_body always empty at
++				   *the moment */)
++#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
++				1 + decode_stateid_maxsz)
+ #else /* CONFIG_NFS_V4_1 */
+ #define encode_sequence_maxsz	0
+ #define decode_sequence_maxsz	0
+@@ -471,6 +514,12 @@ static int nfs4_stat_to_errno(int);
+ 				decode_sequence_maxsz + \
+ 				decode_putfh_maxsz + \
+ 				decode_locku_maxsz)
++#define NFS4_enc_release_lockowner_sz \
++				(compound_encode_hdr_maxsz + \
++				 encode_lockowner_maxsz)
++#define NFS4_dec_release_lockowner_sz \
++				(compound_decode_hdr_maxsz + \
++				 decode_lockowner_maxsz)
+ #define NFS4_enc_access_sz	(compound_encode_hdr_maxsz + \
+ 				encode_sequence_maxsz + \
+ 				encode_putfh_maxsz + \
+@@ -685,6 +734,60 @@ static int nfs4_stat_to_errno(int);
+ #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
+ 					 decode_sequence_maxsz + \
+ 					 decode_reclaim_complete_maxsz)
++#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_getdevicelist_maxsz)
++#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_getdevicelist_maxsz)
++#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
++				encode_sequence_maxsz +\
++				encode_getdeviceinfo_maxsz)
++#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz +    \
++				decode_sequence_maxsz + \
++				decode_getdeviceinfo_maxsz)
++#define NFS4_enc_layoutget_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz +        \
++				encode_layoutget_maxsz)
++#define NFS4_dec_layoutget_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz +        \
++				decode_layoutget_maxsz)
++#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_layoutcommit_maxsz + \
++				encode_getattr_maxsz)
++#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutcommit_maxsz + \
++				decode_getattr_maxsz)
++#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz + \
++				encode_putfh_maxsz + \
++				encode_layoutreturn_maxsz)
++#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_layoutreturn_maxsz)
++#define NFS4_enc_dswrite_sz	(compound_encode_hdr_maxsz + \
++				encode_sequence_maxsz +\
++				encode_putfh_maxsz + \
++				encode_write_maxsz)
++#define NFS4_dec_dswrite_sz	(compound_decode_hdr_maxsz + \
++				decode_sequence_maxsz + \
++				decode_putfh_maxsz + \
++				decode_write_maxsz)
++#define NFS4_enc_dscommit_sz	(compound_encode_hdr_maxsz + \
++				encode_putfh_maxsz + \
++				encode_commit_maxsz)
++#define NFS4_dec_dscommit_sz	(compound_decode_hdr_maxsz + \
++				decode_putfh_maxsz + \
++				decode_commit_maxsz)
+ 
+ const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
+ 				      compound_encode_hdr_maxsz +
+@@ -915,7 +1018,7 @@ static void encode_close(struct xdr_stre
+ 	p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_CLOSE);
+ 	*p++ = cpu_to_be32(arg->seqid->sequence->counter);
+-	xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_close_maxsz;
+ }
+@@ -989,6 +1092,35 @@ static void encode_getattr_two(struct xd
+ 	hdr->replen += decode_getattr_maxsz;
+ }
+ 
++static void
++encode_getattr_three(struct xdr_stream *xdr,
++		     uint32_t bm0, uint32_t bm1, uint32_t bm2,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_GETATTR);
++	if (bm2) {
++		p = reserve_space(xdr, 16);
++		*p++ = cpu_to_be32(3);
++		*p++ = cpu_to_be32(bm0);
++		*p++ = cpu_to_be32(bm1);
++		*p = cpu_to_be32(bm2);
++	} else if (bm1) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(2);
++		*p++ = cpu_to_be32(bm0);
++		*p = cpu_to_be32(bm1);
++	} else {
++		p = reserve_space(xdr, 8);
++		*p++ = cpu_to_be32(1);
++		*p = cpu_to_be32(bm0);
++	}
++	hdr->nops++;
++	hdr->replen += decode_getattr_maxsz;
++}
++
+ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+ 	encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
+@@ -997,8 +1129,11 @@ static void encode_getfattr(struct xdr_s
+ 
+ static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+ {
+-	encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
+-			   bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
++	encode_getattr_three(xdr,
++			     bitmask[0] & nfs4_fsinfo_bitmap[0],
++			     bitmask[1] & nfs4_fsinfo_bitmap[1],
++			     bitmask[2] & nfs4_fsinfo_bitmap[2],
++			     hdr);
+ }
+ 
+ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
+@@ -1042,6 +1177,17 @@ static inline uint64_t nfs4_lock_length(
+ 	return fl->fl_end - fl->fl_start + 1;
+ }
+ 
++static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 28);
++	p = xdr_encode_hyper(p, lowner->clientid);
++	*p++ = cpu_to_be32(16);
++	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
++	xdr_encode_hyper(p, lowner->id);
++}
++
+ /*
+  * opcode,type,reclaim,offset,length,new_lock_owner = 32
+  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
+@@ -1058,18 +1204,16 @@ static void encode_lock(struct xdr_strea
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	*p = cpu_to_be32(args->new_lock_owner);
+ 	if (args->new_lock_owner){
+-		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
++		p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 		*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
+-		p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
++					    NFS4_STATEID_SIZE);
+ 		*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
+-		p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-		*p++ = cpu_to_be32(16);
+-		p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-		xdr_encode_hyper(p, args->lock_owner.id);
++		encode_lockowner(xdr, &args->lock_owner);
+ 	}
+ 	else {
+ 		p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
+-		p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
++		p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
+ 		*p = cpu_to_be32(args->lock_seqid->sequence->counter);
+ 	}
+ 	hdr->nops++;
+@@ -1080,15 +1224,12 @@ static void encode_lockt(struct xdr_stre
+ {
+ 	__be32 *p;
+ 
+-	p = reserve_space(xdr, 52);
++	p = reserve_space(xdr, 24);
+ 	*p++ = cpu_to_be32(OP_LOCKT);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+-	p = xdr_encode_hyper(p, args->lock_owner.clientid);
+-	*p++ = cpu_to_be32(16);
+-	p = xdr_encode_opaque_fixed(p, "lock id:", 8);
+-	xdr_encode_hyper(p, args->lock_owner.id);
++	encode_lockowner(xdr, &args->lock_owner);
+ 	hdr->nops++;
+ 	hdr->replen += decode_lockt_maxsz;
+ }
+@@ -1101,13 +1242,25 @@ static void encode_locku(struct xdr_stre
+ 	*p++ = cpu_to_be32(OP_LOCKU);
+ 	*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
+ 	*p++ = cpu_to_be32(args->seqid->sequence->counter);
+-	p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
++				    NFS4_STATEID_SIZE);
+ 	p = xdr_encode_hyper(p, args->fl->fl_start);
+ 	xdr_encode_hyper(p, nfs4_lock_length(args->fl));
+ 	hdr->nops++;
+ 	hdr->replen += decode_locku_maxsz;
+ }
+ 
++static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr)
++{
++	__be32 *p;
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(OP_RELEASE_LOCKOWNER);
++	encode_lockowner(xdr, lowner);
++	hdr->nops++;
++	hdr->replen += decode_release_lockowner_maxsz;
++}
++
+ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
+ {
+ 	int len = name->len;
+@@ -1172,7 +1325,7 @@ static inline void encode_createmode(str
+ 		break;
+ 	default:
+ 		clp = arg->server->nfs_client;
+-		if (clp->cl_minorversion > 0) {
++		if (clp->cl_mvops->minor_version > 0) {
+ 			if (nfs4_has_persistent_session(clp)) {
+ 				*p = cpu_to_be32(NFS4_CREATE_GUARDED);
+ 				encode_attrs(xdr, arg->u.attrs, arg->server);
+@@ -1251,7 +1404,7 @@ static inline void encode_claim_delegate
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	encode_string(xdr, name->len, name->name);
+ }
+ 
+@@ -1282,7 +1435,7 @@ static void encode_open_confirm(struct x
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	hdr->nops++;
+ 	hdr->replen += decode_open_confirm_maxsz;
+@@ -1294,7 +1447,7 @@ static void encode_open_downgrade(struct
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
+ 	*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
+-	p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
++	p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
+ 	*p = cpu_to_be32(arg->seqid->sequence->counter);
+ 	encode_share_access(xdr, arg->fmode);
+ 	hdr->nops++;
+@@ -1324,17 +1477,17 @@ static void encode_putrootfh(struct xdr_
+ 	hdr->replen += decode_putrootfh_maxsz;
+ }
+ 
+-static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
++static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
+ {
+ 	nfs4_stateid stateid;
+ 	__be32 *p;
+ 
+ 	p = reserve_space(xdr, NFS4_STATEID_SIZE);
+ 	if (ctx->state != NULL) {
+-		nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
+-		xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
++		nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
++		xdr_encode_opaque_fixed(p, stateid.u.data, NFS4_STATEID_SIZE);
+ 	} else
+-		xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++		xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+@@ -1344,7 +1497,7 @@ static void encode_read(struct xdr_strea
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_READ);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 12);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1448,7 +1601,7 @@ encode_setacl(struct xdr_stream *xdr, st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
+ 	p = reserve_space(xdr, 2*4);
+ 	*p++ = cpu_to_be32(1);
+ 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
+@@ -1479,7 +1632,7 @@ static void encode_setattr(struct xdr_st
+ 
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 	*p++ = cpu_to_be32(OP_SETATTR);
+-	xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_setattr_maxsz;
+ 	encode_attrs(xdr, arg->iap, server);
+@@ -1523,7 +1676,7 @@ static void encode_write(struct xdr_stre
+ 	p = reserve_space(xdr, 4);
+ 	*p = cpu_to_be32(OP_WRITE);
+ 
+-	encode_stateid(xdr, args->context);
++	encode_stateid(xdr, args->context, args->lock_context);
+ 
+ 	p = reserve_space(xdr, 16);
+ 	p = xdr_encode_hyper(p, args->offset);
+@@ -1542,7 +1695,7 @@ static void encode_delegreturn(struct xd
+ 	p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
+ 
+ 	*p++ = cpu_to_be32(OP_DELEGRETURN);
+-	xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
++	xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
+ 	hdr->nops++;
+ 	hdr->replen += decode_delegreturn_maxsz;
+ }
+@@ -1696,6 +1849,162 @@ static void encode_sequence(struct xdr_s
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#ifdef CONFIG_NFS_V4_1
++static void
++encode_getdevicelist(struct xdr_stream *xdr,
++		     const struct nfs4_getdevicelist_args *args,
++		     struct compound_hdr *hdr)
++{
++	__be32 *p;
++	nfs4_verifier dummy = {
++		.data = "dummmmmy",
++	};
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_GETDEVICELIST);
++	*p++ = cpu_to_be32(args->layoutclass);
++	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
++	xdr_encode_hyper(p, 0ULL);                          /* cookie */
++	encode_nfs4_verifier(xdr, &dummy);
++	hdr->nops++;
++}
++
++static void
++encode_getdeviceinfo(struct xdr_stream *xdr,
++		     const struct nfs4_getdeviceinfo_args *args,
++		     struct compound_hdr *hdr)
++{
++	int has_bitmap = (args->pdev->dev_notify_types != 0);
++	int len = 16 + NFS4_PNFS_DEVICEID4_SIZE + (has_bitmap * 4);
++	__be32 *p;
++
++	p = reserve_space(xdr, len);
++	*p++ = cpu_to_be32(OP_GETDEVICEINFO);
++	p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
++				    NFS4_PNFS_DEVICEID4_SIZE);
++	*p++ = cpu_to_be32(args->pdev->layout_type);
++	*p++ = cpu_to_be32(args->pdev->pglen + len);	/* gdia_maxcount */
++	*p++ = cpu_to_be32(has_bitmap);			/* bitmap length [01] */
++	if (has_bitmap)
++		*p = cpu_to_be32(args->pdev->dev_notify_types);
++	hdr->nops++;
++}
++
++static void
++encode_layoutget(struct xdr_stream *xdr,
++		      const struct nfs4_layoutget_args *args,
++		      struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTGET);
++	*p++ = cpu_to_be32(0);     /* Signal layout available */
++	*p++ = cpu_to_be32(args->type);
++	*p++ = cpu_to_be32(args->range.iomode);
++	p = xdr_encode_hyper(p, args->range.offset);
++	p = xdr_encode_hyper(p, args->range.length);
++	p = xdr_encode_hyper(p, args->minlength);
++	pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++	p = xdr_encode_opaque_fixed(p, &stateid.u.data, NFS4_STATEID_SIZE);
++	*p = cpu_to_be32(args->maxcount);
++
++	dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
++		__func__,
++		args->type,
++		args->range.iomode,
++		(unsigned long)args->range.offset,
++		(unsigned long)args->range.length,
++		args->maxcount);
++	hdr->nops++;
++	hdr->replen += decode_layoutget_maxsz;
++}
++
++static int
++encode_layoutcommit(struct xdr_stream *xdr,
++		    const struct nfs4_layoutcommit_args *args,
++		    struct compound_hdr *hdr)
++{
++	struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++	__be32 *p;
++
++	dprintk("%s: %llu@%llu lbw: %llu type: %d\n", __func__,
++		args->range.length, args->range.offset, args->lastbytewritten,
++		args->layout_type);
++
++	p = reserve_space(xdr, 40 + NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
++	p = xdr_encode_hyper(p, args->range.offset);
++	p = xdr_encode_hyper(p, args->range.length);
++	*p++ = cpu_to_be32(0);     /* reclaim */
++	p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
++	*p++ = cpu_to_be32(1);     /* newoffset = TRUE */
++	p = xdr_encode_hyper(p, args->lastbytewritten);
++	*p = cpu_to_be32(args->time_modify_changed != 0);
++	if (args->time_modify_changed) {
++		p = reserve_space(xdr, 12);
++		*p++ = cpu_to_be32(0);
++		*p++ = cpu_to_be32(args->time_modify.tv_sec);
++		*p = cpu_to_be32(args->time_modify.tv_nsec);
++	}
++
++	p = reserve_space(xdr, 4);
++	*p = cpu_to_be32(args->layout_type);
++
++	if (ld_io_ops->encode_layoutcommit) {
++		ld_io_ops->encode_layoutcommit(NFS_I(args->inode)->layout,
++					       xdr, args);
++	} else {
++		p = reserve_space(xdr, 4);
++		xdr_encode_opaque(p, NULL, 0);
++	}
++
++	hdr->nops++;
++	hdr->replen += decode_layoutcommit_maxsz;
++	return 0;
++}
++
++static void
++encode_layoutreturn(struct xdr_stream *xdr,
++		    const struct nfs4_layoutreturn_args *args,
++		    struct compound_hdr *hdr)
++{
++	nfs4_stateid stateid;
++	__be32 *p;
++
++	p = reserve_space(xdr, 20);
++	*p++ = cpu_to_be32(OP_LAYOUTRETURN);
++	*p++ = cpu_to_be32(args->reclaim);
++	*p++ = cpu_to_be32(args->layout_type);
++	*p++ = cpu_to_be32(args->range.iomode);
++	*p = cpu_to_be32(args->return_type);
++	if (args->return_type == RETURN_FILE) {
++		struct layoutdriver_io_operations *ld_io_ops =
++			NFS_SERVER(args->inode)->pnfs_curr_ld->ld_io_ops;
++
++		p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
++		p = xdr_encode_hyper(p, args->range.offset);
++		p = xdr_encode_hyper(p, args->range.length);
++		pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout);
++		p = xdr_encode_opaque_fixed(p, &stateid.u.data,
++					    NFS4_STATEID_SIZE);
++		dprintk("%s: call %pF\n", __func__,
++		ld_io_ops->encode_layoutreturn);
++		if (ld_io_ops->encode_layoutreturn) {
++			ld_io_ops->encode_layoutreturn(
++				NFS_I(args->inode)->layout, xdr, args);
++		} else {
++			p = reserve_space(xdr, 4);
++			*p = cpu_to_be32(0);
++		}
++	}
++	hdr->nops++;
++	hdr->replen += decode_layoutreturn_maxsz;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" ENCODE ROUTINES.
+  */
+@@ -1704,7 +2013,7 @@ static u32 nfs4_xdr_minorversion(const s
+ {
+ #if defined(CONFIG_NFS_V4_1)
+ 	if (args->sa_session)
+-		return args->sa_session->clp->cl_minorversion;
++		return args->sa_session->clp->cl_mvops->minor_version;
+ #endif /* CONFIG_NFS_V4_1 */
+ 	return 0;
+ }
+@@ -2048,6 +2357,20 @@ static int nfs4_xdr_enc_locku(struct rpc
+ 	return 0;
+ }
+ 
++static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = 0,
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
+ /*
+  * Encode a READLINK request
+  */
+@@ -2330,7 +2653,7 @@ static int nfs4_xdr_enc_setclientid_conf
+ 	struct compound_hdr hdr = {
+ 		.nops	= 0,
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2395,7 +2718,7 @@ static int nfs4_xdr_enc_exchange_id(stru
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2413,7 +2736,7 @@ static int nfs4_xdr_enc_create_session(s
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = args->client->cl_minorversion,
++		.minorversion = args->client->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2431,7 +2754,7 @@ static int nfs4_xdr_enc_destroy_session(
+ {
+ 	struct xdr_stream xdr;
+ 	struct compound_hdr hdr = {
+-		.minorversion = session->clp->cl_minorversion,
++		.minorversion = session->clp->cl_mvops->minor_version,
+ 	};
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+@@ -2469,7 +2792,7 @@ static int nfs4_xdr_enc_get_lease_time(s
+ 	struct compound_hdr hdr = {
+ 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
+ 	};
+-	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
++	const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME, 0, 0 };
+ 
+ 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ 	encode_compound_hdr(&xdr, req, &hdr);
+@@ -2499,6 +2822,159 @@ static int nfs4_xdr_enc_reclaim_complete
+ 	return 0;
+ }
+ 
++/*
++ * Encode GETDEVICELIST request
++ */
++static int
++nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, uint32_t *p,
++			   struct nfs4_getdevicelist_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_getdevicelist(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode GETDEVICEINFO request
++ */
++static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
++				      struct nfs4_getdeviceinfo_args *args)
++{
++	struct xdr_stream xdr;
++	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++	int replen;
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_getdeviceinfo(&xdr, args, &hdr);
++
++	/* set up reply kvec. Subtract notification bitmap max size (8)
++	 * so that notification bitmap is put in xdr_buf tail */
++	replen = (RPC_REPHDRSIZE + auth->au_rslack +
++		  NFS4_dec_getdeviceinfo_sz - 8) << 2;
++	xdr_inline_pages(&req->rq_rcv_buf, replen, args->pdev->pages,
++			 args->pdev->pgbase, args->pdev->pglen);
++	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
++		__func__, replen, args->pdev->pages,
++		args->pdev->pgbase, args->pdev->pglen);
++
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTGET request
++ */
++static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
++				  struct nfs4_layoutget_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutget(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ *  Encode LAYOUTCOMMIT request
++ */
++static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_layoutcommit_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_layoutcommit(&xdr, args, &hdr);
++	encode_getfattr(&xdr, args->bitmask, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode LAYOUTRETURN request
++ */
++static int nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, uint32_t *p,
++				     struct nfs4_layoutreturn_args *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
++	encode_layoutreturn(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server WRITE request
++ */
++static int nfs4_xdr_enc_dswrite(struct rpc_rqst *req, uint32_t *p,
++				struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_write(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
++
++/*
++ * Encode a pNFS File Layout Data Server COMMIT request
++ */
++static int nfs4_xdr_enc_dscommit(struct rpc_rqst *req, uint32_t *p,
++				 struct nfs_writeargs *args)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr = {
++		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
++	};
++
++	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++	encode_compound_hdr(&xdr, req, &hdr);
++	encode_sequence(&xdr, &args->seq_args, &hdr);
++	encode_putfh(&xdr, args->fh, &hdr);
++	encode_commit(&xdr, args, &hdr);
++	encode_nops(&hdr);
++	return 0;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+@@ -2599,14 +3075,17 @@ static int decode_attr_bitmap(struct xdr
+ 		goto out_overflow;
+ 	bmlen = be32_to_cpup(p);
+ 
+-	bitmap[0] = bitmap[1] = 0;
++	bitmap[0] = bitmap[1] = bitmap[2] = 0;
+ 	p = xdr_inline_decode(xdr, (bmlen << 2));
+ 	if (unlikely(!p))
+ 		goto out_overflow;
+ 	if (bmlen > 0) {
+ 		bitmap[0] = be32_to_cpup(p++);
+-		if (bmlen > 1)
+-			bitmap[1] = be32_to_cpup(p);
++		if (bmlen > 1) {
++			bitmap[1] = be32_to_cpup(p++);
++			if (bmlen > 2)
++				bitmap[2] = be32_to_cpup(p);
++		}
+ 	}
+ 	return 0;
+ out_overflow:
+@@ -2635,8 +3114,9 @@ static int decode_attr_supported(struct 
+ 		decode_attr_bitmap(xdr, bitmask);
+ 		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
+ 	} else
+-		bitmask[0] = bitmask[1] = 0;
+-	dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
++		bitmask[0] = bitmask[1] = bitmask[2] = 0;
++	dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__,
++		bitmask[0], bitmask[1], bitmask[2]);
+ 	return 0;
+ }
+ 
+@@ -3565,7 +4045,7 @@ static int decode_opaque_fixed(struct xd
+ 
+ static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
+ {
+-	return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
++	return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
+ }
+ 
+ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
+@@ -3621,7 +4101,7 @@ out_overflow:
+ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3647,7 +4127,7 @@ xdr_error:
+ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3679,7 +4159,7 @@ xdr_error:
+ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
+ {
+ 	__be32 *savep;
+-	uint32_t attrlen, bitmap[2] = {0};
++	uint32_t attrlen, bitmap[3] = {0};
+ 	int status;
+ 
+ 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+@@ -3705,7 +4185,7 @@ static int decode_getfattr(struct xdr_st
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0},
++		 bitmap[3] = {0},
+ 		 type;
+ 	int status;
+ 	umode_t fmode = 0;
+@@ -3824,24 +4304,101 @@ xdr_error:
+ 	return status;
+ }
+ 
+-
+-static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * Decode potentially multiple layout types. Currently we only support
++ * one layout driver per file system.
++ */
++static int decode_pnfs_list(struct xdr_stream *xdr, uint32_t *layoutclass)
+ {
+-	__be32 *savep;
+-	uint32_t attrlen, bitmap[2];
+-	int status;
++	uint32_t *p;
++	int num;
+ 
+-	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+-		goto xdr_error;
+-	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+-		goto xdr_error;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	num = be32_to_cpup(p);
+ 
+-	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++	/* pNFS is not supported by the underlying file system */
++	if (num == 0) {
++		*layoutclass = 0;
++		return 0;
++	}
+ 
+-	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
+-		goto xdr_error;
++	/* TODO: We will eventually support multiple layout drivers ? */
++	if (num > 1)
++		printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
++			"per filesystem not supported\n", __func__);
++
++	/* Decode and set first layout type */
++	p = xdr_inline_decode(xdr, num * 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	*layoutclass = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++/*
++ * The type of file system exported
++ */
++static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
++				uint32_t *layoutclass)
++{
++	int status = 0;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
++	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
++		return -EIO;
++	if (likely(bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES)) {
++		status = decode_pnfs_list(xdr, layoutclass);
++		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
++	}
++	return status;
++}
++
++/*
++ * The prefered block size for layout directed io
++ */
++static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
++				      uint32_t *res)
++{
++	__be32 *p;
++
++	dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
++	*res = 0;
++	if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p)) {
++			print_overflow_msg(__func__, xdr);
++			return -EIO;
++		}
++		*res = be32_to_cpup(p);
++		bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
++	}
++	return 0;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
++static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
++{
++	__be32 *savep;
++	uint32_t attrlen, bitmap[3];
++	int status;
++
++	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
++		goto xdr_error;
++	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
++		goto xdr_error;
++
++	fsinfo->rtmult = fsinfo->wtmult = 512;	/* ??? */
++
++	if ((status = decode_attr_lease_time(xdr, bitmap, &fsinfo->lease_time)) != 0)
++		goto xdr_error;
+ 	if ((status = decode_attr_maxfilesize(xdr, bitmap, &fsinfo->maxfilesize)) != 0)
+ 		goto xdr_error;
+ 	if ((status = decode_attr_maxread(xdr, bitmap, &fsinfo->rtmax)) != 0)
+@@ -3850,6 +4407,14 @@ static int decode_fsinfo(struct xdr_stre
+ 	if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
+ 		goto xdr_error;
+ 	fsinfo->wtpref = fsinfo->wtmax;
++#if defined(CONFIG_NFS_V4_1)
++	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
++	if (status)
++		goto xdr_error;
++	status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
++	if (status)
++		goto xdr_error;
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ 	status = verify_attr_len(xdr, savep, attrlen);
+ xdr_error:
+@@ -3973,6 +4538,11 @@ static int decode_locku(struct xdr_strea
+ 	return status;
+ }
+ 
++static int decode_release_lockowner(struct xdr_stream *xdr)
++{
++	return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER);
++}
++
+ static int decode_lookup(struct xdr_stream *xdr)
+ {
+ 	return decode_op_hdr(xdr, OP_LOOKUP);
+@@ -4333,7 +4903,7 @@ static int decode_getacl(struct xdr_stre
+ {
+ 	__be32 *savep;
+ 	uint32_t attrlen,
+-		 bitmap[2] = {0};
++		 bitmap[3] = {0};
+ 	struct kvec *iov = req->rq_rcv_buf.head;
+ 	int status;
+ 
+@@ -4682,6 +5252,226 @@ out_overflow:
+ #endif /* CONFIG_NFS_V4_1 */
+ }
+ 
++#if defined(CONFIG_NFS_V4_1)
++/*
++ * TODO: Need to handle case when EOF != true;
++ */
++static int decode_getdevicelist(struct xdr_stream *xdr,
++				struct pnfs_devicelist *res)
++{
++	__be32 *p;
++	int status, i;
++	struct nfs_writeverf verftemp;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 8 + 8 + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++
++	/* TODO: Skip cookie for now */
++	p += 2;
++
++	/* Read verifier */
++	p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8);
++
++	res->num_devs = be32_to_cpup(p);
++
++	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
++
++	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM)
++		return -NFS4ERR_REP_TOO_BIG;
++
++	p = xdr_inline_decode(xdr,
++			      res->num_devs * NFS4_PNFS_DEVICEID4_SIZE + 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	for (i = 0; i < res->num_devs; i++)
++		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
++					    NFS4_PNFS_DEVICEID4_SIZE);
++	res->eof = be32_to_cpup(p);
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_getdeviceinfo(struct xdr_stream *xdr,
++				struct pnfs_device *pdev)
++{
++	__be32 *p;
++	uint32_t len, type;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
++	if (status) {
++		if (status == -ETOOSMALL) {
++			p = xdr_inline_decode(xdr, 4);
++			if (unlikely(!p))
++				goto out_overflow;
++			pdev->mincount = be32_to_cpup(p);
++			dprintk("%s: Min count too small. mincnt = %u\n",
++				__func__, pdev->mincount);
++		}
++		return status;
++	}
++
++	p = xdr_inline_decode(xdr, 8);
++	if (unlikely(!p))
++		goto out_overflow;
++	type = be32_to_cpup(p++);
++	if (type != pdev->layout_type) {
++		dprintk("%s: layout mismatch req: %u pdev: %u\n",
++			__func__, pdev->layout_type, type);
++		return -EINVAL;
++	}
++	/*
++	 * Get the length of the opaque device_addr4. xdr_read_pages places
++	 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
++	 * and places the remaining xdr data in xdr_buf->tail
++	 */
++	pdev->mincount = be32_to_cpup(p);
++	xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
++
++	/* At most one bitmap word */
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	len = be32_to_cpup(p);
++	if (len) {
++		p = xdr_inline_decode(xdr, 4);
++		if (unlikely(!p))
++			goto out_overflow;
++		pdev->dev_notify_types = be32_to_cpup(p);
++	} else
++		pdev->dev_notify_types = 0;
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
++			    struct nfs4_layoutget_res *res)
++{
++	__be32 *p;
++	int status;
++	u32 layout_count, dummy;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTGET);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->return_on_close = be32_to_cpup(p++);
++	p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
++	layout_count = be32_to_cpup(p);
++	if (!layout_count) {
++		dprintk("%s: server responded with empty layout array\n",
++			__func__);
++		return -EINVAL;
++	}
++
++	p = xdr_inline_decode(xdr, 24);
++	if (unlikely(!p))
++		goto out_overflow;
++	p = xdr_decode_hyper(p, &res->range.offset);
++	p = xdr_decode_hyper(p, &res->range.length);
++	res->range.iomode = be32_to_cpup(p++);
++	res->type = be32_to_cpup(p++);
++
++	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
++	if (unlikely(status))
++		return status;
++
++	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
++		__func__,
++		(unsigned long)res->range.offset,
++		(unsigned long)res->range.length,
++		res->range.iomode,
++		res->type,
++		res->layout.len);
++
++	/* presuambly, nfs4_proc_layoutget allocated a single page */
++	if (res->layout.len > PAGE_SIZE)
++		return -ENOMEM;
++	memcpy(res->layout.buf, p, res->layout.len);
++
++	/* FIXME: the whole layout array should be passed up to the pnfs
++	 * client */
++	if (layout_count > 1) {
++		dprintk("%s: server responded with %d layouts, dropping tail\n",
++			__func__, layout_count);
++
++		while (--layout_count) {
++			p = xdr_inline_decode(xdr, 24);
++			if (unlikely(!p))
++				goto out_overflow;
++			status = decode_opaque_inline(xdr, &dummy, (char **)&p);
++			if (unlikely(status))
++				return status;
++		}
++	}
++
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutreturn(struct xdr_stream *xdr,
++			       struct nfs4_layoutreturn_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
++	if (status)
++		return status;
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->lrs_present = be32_to_cpup(p);
++	if (res->lrs_present)
++		status = decode_stateid(xdr, &res->stateid);
++	return status;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++
++static int decode_layoutcommit(struct xdr_stream *xdr,
++				    struct rpc_rqst *req,
++				    struct nfs4_layoutcommit_res *res)
++{
++	__be32 *p;
++	int status;
++
++	status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
++	if (status)
++		return status;
++
++	p = xdr_inline_decode(xdr, 4);
++	if (unlikely(!p))
++		goto out_overflow;
++	res->sizechanged = be32_to_cpup(p);
++
++	if (res->sizechanged) {
++		p = xdr_inline_decode(xdr, 8);
++		if (unlikely(!p))
++			goto out_overflow;
++		xdr_decode_hyper(p, &res->newsize);
++	}
++	return 0;
++out_overflow:
++	print_overflow_msg(__func__, xdr);
++	return -EIO;
++}
++#endif /* CONFIG_NFS_V4_1 */
++
+ /*
+  * END OF "GENERIC" DECODE ROUTINES.
+  */
+@@ -5259,6 +6049,19 @@ out:
+ 	return status;
+ }
+ 
++static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (!status)
++		status = decode_release_lockowner(&xdr);
++	return status;
++}
++
+ /*
+  * Decode READLINK response
+  */
+@@ -5696,6 +6499,186 @@ static int nfs4_xdr_dec_reclaim_complete
+ 		status = decode_reclaim_complete(&xdr, (void *)NULL);
+ 	return status;
+ }
++
++/*
++ * Decode GETDEVICELIST response
++ */
++static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_getdevicelist_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	dprintk("encoding getdevicelist!\n");
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status != 0)
++		goto out;
++	status = decode_getdevicelist(&xdr, res->devlist);
++out:
++	return status;
++}
++
++/*
++ * Decode GETDEVINFO response
++ */
++static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
++				      struct nfs4_getdeviceinfo_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status != 0)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status != 0)
++		goto out;
++	status = decode_getdeviceinfo(&xdr, res->pdev);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTGET response
++ */
++static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
++				  struct nfs4_layoutget_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutget(&xdr, rqstp, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTRETURN response
++ */
++static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_layoutreturn_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutreturn(&xdr, res);
++out:
++	return status;
++}
++
++/*
++ * Decode LAYOUTCOMMIT response
++ */
++static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p,
++				     struct nfs4_layoutcommit_res *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_layoutcommit(&xdr, rqstp, res);
++	if (status)
++		goto out;
++	decode_getfattr(&xdr, res->fattr, res->server,
++			!RPC_IS_ASYNC(rqstp->rq_task));
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server WRITE response
++ */
++static int nfs4_xdr_dec_dswrite(struct rpc_rqst *rqstp, uint32_t *p,
++				struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_write(&xdr, res);
++	if (!status)
++		return res->count;
++out:
++	return status;
++}
++
++/*
++ * Decode pNFS File Layout Data Server COMMIT response
++ */
++static int nfs4_xdr_dec_dscommit(struct rpc_rqst *rqstp, uint32_t *p,
++				 struct nfs_writeres *res)
++{
++	struct xdr_stream xdr;
++	struct compound_hdr hdr;
++	int status;
++
++	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++	status = decode_compound_hdr(&xdr, &hdr);
++	if (status)
++		goto out;
++	status = decode_sequence(&xdr, &res->seq_res, rqstp);
++	if (status)
++		goto out;
++	status = decode_putfh(&xdr);
++	if (status)
++		goto out;
++	status = decode_commit(&xdr, res);
++out:
++	return status;
++}
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+@@ -5866,6 +6849,7 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(GETACL,		enc_getacl,	dec_getacl),
+   PROC(SETACL,		enc_setacl,	dec_setacl),
+   PROC(FS_LOCATIONS,	enc_fs_locations, dec_fs_locations),
++  PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
+ #if defined(CONFIG_NFS_V4_1)
+   PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
+   PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
+@@ -5873,6 +6857,13 @@ struct rpc_procinfo	nfs4_procedures[] = 
+   PROC(SEQUENCE,	enc_sequence,	dec_sequence),
+   PROC(GET_LEASE_TIME,	enc_get_lease_time,	dec_get_lease_time),
+   PROC(RECLAIM_COMPLETE, enc_reclaim_complete,  dec_reclaim_complete),
++  PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist),
++  PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
++  PROC(LAYOUTGET,  enc_layoutget,     dec_layoutget),
++  PROC(LAYOUTCOMMIT, enc_layoutcommit,  dec_layoutcommit),
++  PROC(LAYOUTRETURN, enc_layoutreturn,  dec_layoutreturn),
++  PROC(PNFS_WRITE, enc_dswrite,  dec_dswrite),
++  PROC(PNFS_COMMIT, enc_dscommit,  dec_dscommit),
+ #endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild
+--- linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild.orig	2010-09-30 10:17:08.713997000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/Kbuild	2010-09-30 10:17:08.715994000 -0400
+@@ -0,0 +1,11 @@
++#
++# Makefile for the pNFS Objects Layout Driver kernel module
++#
++objlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o objio_osd.o
++obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
++
++#
++# Panasas pNFS Layout Driver kernel module
++#
++panlayoutdriver-y := pnfs_osd_xdr_cli.o objlayout.o panfs_shim.o
++obj-$(CONFIG_PNFS_PANLAYOUT) += panlayoutdriver.o
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c.orig	2010-09-30 10:17:08.717999000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objio_osd.c	2010-09-30 10:17:08.719998000 -0400
+@@ -0,0 +1,1087 @@
++/*
++ *  objio_osd.c
++ *
++ *  pNFS Objects layout implementation over open-osd initiator library
++ *
++ *  Copyright (C) 2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bharrosh@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/module.h>
++#include <scsi/scsi_device.h>
++#include <scsi/osd_attributes.h>
++#include <scsi/osd_initiator.h>
++#include <scsi/osd_sec.h>
++#include <scsi/osd_sense.h>
++
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++#define _LLU(x) ((unsigned long long)x)
++
++enum { BIO_MAX_PAGES_KMALLOC =
++		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
++};
++
++/* A per mountpoint struct currently for device cache */
++struct objio_mount_type {
++	struct list_head dev_list;
++	spinlock_t dev_list_lock;
++};
++
++struct _dev_ent {
++	struct list_head list;
++	struct pnfs_deviceid d_id;
++	struct osd_dev *od;
++};
++
++static void _dev_list_remove_all(struct objio_mount_type *omt)
++{
++	spin_lock(&omt->dev_list_lock);
++
++	while (!list_empty(&omt->dev_list)) {
++		struct _dev_ent *de = list_entry(omt->dev_list.next,
++				 struct _dev_ent, list);
++
++		list_del_init(&de->list);
++		osduld_put_device(de->od);
++		kfree(de);
++	}
++
++	spin_unlock(&omt->dev_list_lock);
++}
++
++static struct osd_dev *___dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct list_head *le;
++
++	list_for_each(le, &omt->dev_list) {
++		struct _dev_ent *de = list_entry(le, struct _dev_ent, list);
++
++		if (0 == memcmp(&de->d_id, d_id, sizeof(*d_id)))
++			return de->od;
++	}
++
++	return NULL;
++}
++
++static struct osd_dev *_dev_list_find(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id)
++{
++	struct osd_dev *od;
++
++	spin_lock(&omt->dev_list_lock);
++	od = ___dev_list_find(omt, d_id);
++	spin_unlock(&omt->dev_list_lock);
++	return od;
++}
++
++static int _dev_list_add(struct objio_mount_type *omt,
++	struct pnfs_deviceid *d_id, struct osd_dev *od)
++{
++	struct _dev_ent *de = kzalloc(sizeof(*de), GFP_KERNEL);
++
++	if (!de)
++		return -ENOMEM;
++
++	spin_lock(&omt->dev_list_lock);
++
++	if (___dev_list_find(omt, d_id)) {
++		kfree(de);
++		goto out;
++	}
++
++	de->d_id = *d_id;
++	de->od = od;
++	list_add(&de->list, &omt->dev_list);
++
++out:
++	spin_unlock(&omt->dev_list_lock);
++	return 0;
++}
++
++struct objio_segment {
++	struct pnfs_osd_layout *layout;
++
++	unsigned mirrors_p1;
++	unsigned stripe_unit;
++	unsigned group_width;	/* Data stripe_units without integrity comps */
++	u64 group_depth;
++	unsigned group_count;
++
++	unsigned num_comps;
++	/* variable length */
++	struct osd_dev	*ods[1];
++};
++
++struct objio_state;
++typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
++
++struct objio_state {
++	/* Generic layer */
++	struct objlayout_io_state ol_state;
++
++	struct objio_segment *objio_seg;
++
++	struct kref kref;
++	objio_done_fn done;
++	void *private;
++
++	unsigned long length;
++	unsigned numdevs; /* Actually used devs in this IO */
++	/* A per-device variable array of size numdevs */
++	struct _objio_per_comp {
++		struct bio *bio;
++		struct osd_request *or;
++		unsigned long length;
++		u64 offset;
++		unsigned dev;
++	} per_dev[];
++};
++
++/* Send and wait for a get_device_info of devices in the layout,
++   then look them up with the osd_initiator library */
++static struct osd_dev *_device_lookup(struct pnfs_layout_hdr *pnfslay,
++			       struct objio_segment *objio_seg, unsigned comp)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	struct pnfs_osd_deviceaddr *deviceaddr;
++	struct pnfs_deviceid *d_id;
++	struct osd_dev *od;
++	struct osd_dev_info odi;
++	struct objio_mount_type *omt = PNFS_NFS_SERVER(pnfslay)->pnfs_ld_data;
++	int err;
++
++	d_id = &layout->olo_comps[comp].oc_object_id.oid_device_id;
++
++	od = _dev_list_find(omt, d_id);
++	if (od)
++		return od;
++
++	err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr);
++	if (unlikely(err)) {
++		dprintk("%s: objlayout_get_deviceinfo=>%d\n", __func__, err);
++		return ERR_PTR(err);
++	}
++
++	odi.systemid_len = deviceaddr->oda_systemid.len;
++	if (odi.systemid_len > sizeof(odi.systemid)) {
++		err = -EINVAL;
++		goto out;
++	} else if (odi.systemid_len)
++		memcpy(odi.systemid, deviceaddr->oda_systemid.data,
++		       odi.systemid_len);
++	odi.osdname_len	 = deviceaddr->oda_osdname.len;
++	odi.osdname	 = (u8 *)deviceaddr->oda_osdname.data;
++
++	if (!odi.osdname_len && !odi.systemid_len) {
++		dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
++			__func__);
++		err = -ENODEV;
++		goto out;
++	}
++
++	od = osduld_info_lookup(&odi);
++	if (unlikely(IS_ERR(od))) {
++		err = PTR_ERR(od);
++		dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
++		goto out;
++	}
++
++	_dev_list_add(omt, d_id, od);
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	objlayout_put_deviceinfo(deviceaddr);
++	return err ? ERR_PTR(err) : od;
++}
++
++static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
++	struct objio_segment *objio_seg)
++{
++	struct pnfs_osd_layout *layout = objio_seg->layout;
++	unsigned i, num_comps = layout->olo_num_comps;
++	int err;
++
++	/* lookup all devices */
++	for (i = 0; i < num_comps; i++) {
++		struct osd_dev *od;
++
++		od = _device_lookup(pnfslay, objio_seg, i);
++		if (unlikely(IS_ERR(od))) {
++			err = PTR_ERR(od);
++			goto out;
++		}
++		objio_seg->ods[i] = od;
++	}
++	objio_seg->num_comps = num_comps;
++	err = 0;
++
++out:
++	dprintk("%s: return=%d\n", __func__, err);
++	return err;
++}
++
++static int _verify_data_map(struct pnfs_osd_layout *layout)
++{
++	struct pnfs_osd_data_map *data_map = &layout->olo_map;
++	u64 stripe_length;
++	u32 group_width;
++
++/* FIXME: Only raid0 for now. if not go through MDS */
++	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
++		printk(KERN_ERR "Only RAID_0 for now\n");
++		return -ENOTSUPP;
++	}
++	if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
++		printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
++			  data_map->odm_num_comps, data_map->odm_mirror_cnt);
++		return -EINVAL;
++	}
++
++	if (data_map->odm_group_width)
++		group_width = data_map->odm_group_width;
++	else
++		group_width = data_map->odm_num_comps /
++						(data_map->odm_mirror_cnt + 1);
++
++	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
++	if (stripe_length >= (1ULL << 32)) {
++		printk(KERN_ERR "Total Stripe length(0x%llx)"
++			  " >= 32bit is not supported\n", _LLU(stripe_length));
++		return -ENOTSUPP;
++	}
++
++	if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
++		printk(KERN_ERR "Stripe Unit(0x%llx)"
++			  " must be Multples of PAGE_SIZE(0x%lx)\n",
++			  _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
++		return -ENOTSUPP;
++	}
++
++	return 0;
++}
++
++int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	struct objio_segment *objio_seg;
++	int err;
++
++	err = _verify_data_map(layout);
++	if (unlikely(err))
++		return err;
++
++	objio_seg = kzalloc(sizeof(*objio_seg) +
++			(layout->olo_num_comps - 1) * sizeof(objio_seg->ods[0]),
++			GFP_KERNEL);
++	if (!objio_seg)
++		return -ENOMEM;
++
++	objio_seg->layout = layout;
++	err = objio_devices_lookup(pnfslay, objio_seg);
++	if (err)
++		goto free_seg;
++
++	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
++	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
++	if (layout->olo_map.odm_group_width) {
++		objio_seg->group_width = layout->olo_map.odm_group_width;
++		objio_seg->group_depth = layout->olo_map.odm_group_depth;
++		objio_seg->group_count = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1 /
++						objio_seg->group_width;
++	} else {
++		objio_seg->group_width = layout->olo_map.odm_num_comps /
++						objio_seg->mirrors_p1;
++		objio_seg->group_depth = -1;
++		objio_seg->group_count = 1;
++	}
++
++	*outp = objio_seg;
++	return 0;
++
++free_seg:
++	dprintk("%s: Error: return %d\n", __func__, err);
++	kfree(objio_seg);
++	*outp = NULL;
++	return err;
++}
++
++void objio_free_lseg(void *p)
++{
++	struct objio_segment *objio_seg = p;
++
++	kfree(objio_seg);
++}
++
++int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct objio_segment *objio_seg = seg;
++	struct objio_state *ios;
++	const unsigned first_size = sizeof(*ios) +
++				objio_seg->num_comps * sizeof(ios->per_dev[0]);
++	const unsigned sec_size = objio_seg->num_comps *
++						sizeof(ios->ol_state.ioerrs[0]);
++
++	dprintk("%s: num_comps=%d\n", __func__, objio_seg->num_comps);
++	ios = kzalloc(first_size + sec_size, GFP_KERNEL);
++	if (unlikely(!ios))
++		return -ENOMEM;
++
++	ios->objio_seg = objio_seg;
++	ios->ol_state.ioerrs = ((void *)ios) + first_size;
++	ios->ol_state.num_comps = objio_seg->num_comps;
++
++	*outp = &ios->ol_state;
++	return 0;
++}
++
++void objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++
++	kfree(ios);
++}
++
++enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
++{
++	switch (oep) {
++	case OSD_ERR_PRI_NO_ERROR:
++		return (enum pnfs_osd_errno)0;
++
++	case OSD_ERR_PRI_CLEAR_PAGES:
++		BUG_ON(1);
++		return 0;
++
++	case OSD_ERR_PRI_RESOURCE:
++		return PNFS_OSD_ERR_RESOURCE;
++	case OSD_ERR_PRI_BAD_CRED:
++		return PNFS_OSD_ERR_BAD_CRED;
++	case OSD_ERR_PRI_NO_ACCESS:
++		return PNFS_OSD_ERR_NO_ACCESS;
++	case OSD_ERR_PRI_UNREACHABLE:
++		return PNFS_OSD_ERR_UNREACHABLE;
++	case OSD_ERR_PRI_NOT_FOUND:
++		return PNFS_OSD_ERR_NOT_FOUND;
++	case OSD_ERR_PRI_NO_SPACE:
++		return PNFS_OSD_ERR_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case OSD_ERR_PRI_EIO:
++		return PNFS_OSD_ERR_EIO;
++	}
++}
++
++static void _clear_bio(struct bio *bio)
++{
++	struct bio_vec *bv;
++	unsigned i;
++
++	__bio_for_each_segment(bv, bio, i, 0) {
++		unsigned this_count = bv->bv_len;
++
++		if (likely(PAGE_SIZE == this_count))
++			clear_highpage(bv->bv_page);
++		else
++			zero_user(bv->bv_page, bv->bv_offset, this_count);
++	}
++}
++
++static int _io_check(struct objio_state *ios, bool is_write)
++{
++	enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
++	int lin_ret = 0;
++	int i;
++
++	for (i = 0; i <  ios->numdevs; i++) {
++		struct osd_sense_info osi;
++		struct osd_request *or = ios->per_dev[i].or;
++		int ret;
++
++		if (!or)
++			continue;
++
++		ret = osd_req_decode_sense(or, &osi);
++		if (likely(!ret))
++			continue;
++
++		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
++			/* start read offset passed endof file */
++			BUG_ON(is_write);
++			_clear_bio(ios->per_dev[i].bio);
++			dprintk("%s: start read offset passed end of file "
++				"offset=0x%llx, length=0x%lx\n", __func__,
++				_LLU(ios->per_dev[i].offset),
++				ios->per_dev[i].length);
++
++			continue; /* we recovered */
++		}
++		objlayout_io_set_result(&ios->ol_state, ios->per_dev[i].dev,
++					osd_pri_2_pnfs_err(osi.osd_err_pri),
++					ios->per_dev[i].offset,
++					ios->per_dev[i].length,
++					is_write);
++
++		if (osi.osd_err_pri >= oep) {
++			oep = osi.osd_err_pri;
++			lin_ret = ret;
++		}
++	}
++
++	return lin_ret;
++}
++
++/*
++ * Common IO state helpers.
++ */
++static void _io_free(struct objio_state *ios)
++{
++	unsigned i;
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[i];
++
++		if (per_dev->or) {
++			osd_end_request(per_dev->or);
++			per_dev->or = NULL;
++		}
++
++		if (per_dev->bio) {
++			bio_put(per_dev->bio);
++			per_dev->bio = NULL;
++		}
++	}
++}
++
++struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
++{
++	unsigned min_dev = ios->objio_seg->layout->olo_comps_index;
++	unsigned max_dev = min_dev + ios->ol_state.num_comps;
++
++	BUG_ON(dev < min_dev || max_dev <= dev);
++	return ios->objio_seg->ods[dev - min_dev];
++}
++
++struct _striping_info {
++	u64 obj_offset;
++	u64 group_length;
++	u64 total_group_length;
++	u64 Major;
++	unsigned dev;
++	unsigned unit_off;
++};
++
++static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
++			      struct _striping_info *si)
++{
++	u32	stripe_unit = ios->objio_seg->stripe_unit;
++	u32	group_width = ios->objio_seg->group_width;
++	u64	group_depth = ios->objio_seg->group_depth;
++	u32	U = stripe_unit * group_width;
++
++	u64	T = U * group_depth;
++	u64	S = T * ios->objio_seg->group_count;
++	u64	M = div64_u64(file_offset, S);
++
++	/*
++	G = (L - (M * S)) / T
++	H = (L - (M * S)) % T
++	*/
++	u64	LmodU = file_offset - M * S;
++	u32	G = div64_u64(LmodU, T);
++	u64	H = LmodU - G * T;
++
++	u32	N = div_u64(H, U);
++
++	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
++	si->obj_offset = si->unit_off + (N * stripe_unit) +
++				  (M * group_depth * stripe_unit);
++
++	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
++	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
++	si->dev *= ios->objio_seg->mirrors_p1;
++
++	si->group_length = T - H;
++	si->total_group_length = T;
++	si->Major = M;
++}
++
++static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
++		unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len)
++{
++	unsigned pg = *cur_pg;
++	struct request_queue *q =
++			osd_request_queue(_io_od(ios, per_dev->dev));
++
++	per_dev->length += cur_len;
++
++	if (per_dev->bio == NULL) {
++		unsigned stripes = ios->ol_state.num_comps /
++						     ios->objio_seg->mirrors_p1;
++		unsigned pages_in_stripe = stripes *
++				      (ios->objio_seg->stripe_unit / PAGE_SIZE);
++		unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
++				    stripes;
++
++		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
++		if (unlikely(!per_dev->bio)) {
++			dprintk("Faild to allocate BIO size=%u\n", bio_size);
++			return -ENOMEM;
++		}
++	}
++
++	while (cur_len > 0) {
++		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
++		unsigned added_len;
++
++		BUG_ON(ios->ol_state.nr_pages <= pg);
++		cur_len -= pglen;
++
++		added_len = bio_add_pc_page(q, per_dev->bio,
++					ios->ol_state.pages[pg], pglen, pgbase);
++		if (unlikely(pglen != added_len))
++			return -ENOMEM;
++		pgbase = 0;
++		++pg;
++	}
++	BUG_ON(cur_len);
++
++	*cur_pg = pg;
++	return 0;
++}
++
++static int _prepare_one_group(struct objio_state *ios, u64 length,
++			      struct _striping_info *si, unsigned first_comp,
++			      unsigned *last_pg)
++{
++	unsigned stripe_unit = ios->objio_seg->stripe_unit;
++	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
++	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
++	unsigned dev = si->dev;
++	unsigned first_dev = dev - (dev % devs_in_group);
++	unsigned comp = first_comp + (dev - first_dev);
++	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
++	unsigned cur_pg = *last_pg;
++	int ret = 0;
++
++	while (length) {
++		struct _objio_per_comp *per_dev = &ios->per_dev[comp];
++		unsigned cur_len, page_off = 0;
++
++		if (!per_dev->length) {
++			per_dev->dev = dev;
++			if (dev < si->dev) {
++				per_dev->offset = si->obj_offset + stripe_unit -
++								   si->unit_off;
++				cur_len = stripe_unit;
++			} else if (dev == si->dev) {
++				per_dev->offset = si->obj_offset;
++				cur_len = stripe_unit - si->unit_off;
++				page_off = si->unit_off & ~PAGE_MASK;
++				BUG_ON(page_off &&
++				      (page_off != ios->ol_state.pgbase));
++			} else { /* dev > si->dev */
++				per_dev->offset = si->obj_offset - si->unit_off;
++				cur_len = stripe_unit;
++			}
++
++			if (max_comp < comp)
++				max_comp = comp;
++
++			dev += mirrors_p1;
++			dev = (dev % devs_in_group) + first_dev;
++		} else {
++			cur_len = stripe_unit;
++		}
++		if (cur_len >= length)
++			cur_len = length;
++
++		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
++				       cur_len);
++		if (unlikely(ret))
++			goto out;
++
++		comp += mirrors_p1;
++		comp = (comp % devs_in_group) + first_comp;
++
++		length -= cur_len;
++		ios->length += cur_len;
++	}
++out:
++	ios->numdevs = max_comp + mirrors_p1;
++	*last_pg = cur_pg;
++	return ret;
++}
++
++static int _io_rw_pagelist(struct objio_state *ios)
++{
++	u64 length = ios->ol_state.count;
++	struct _striping_info si;
++	unsigned devs_in_group = ios->objio_seg->group_width *
++				 ios->objio_seg->mirrors_p1;
++	unsigned first_comp = 0;
++	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
++	unsigned last_pg = 0;
++	int ret = 0;
++
++	_calc_stripe_info(ios, ios->ol_state.offset, &si);
++	while (length) {
++		if (length < si.group_length)
++			si.group_length = length;
++
++		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
++					 &last_pg);
++		if (unlikely(ret))
++			goto out;
++
++		length -= si.group_length;
++
++		si.group_length = si.total_group_length;
++		si.unit_off = 0;
++		++si.Major;
++		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
++						ios->objio_seg->group_depth;
++
++		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
++		si.dev %= num_comps;
++
++		first_comp += devs_in_group;
++		first_comp %= num_comps;
++	}
++
++out:
++	if (!ios->length)
++		return ret;
++
++	return 0;
++}
++
++static ssize_t _sync_done(struct objio_state *ios)
++{
++	struct completion *waiting = ios->private;
++
++	complete(waiting);
++	return 0;
++}
++
++static void _last_io(struct kref *kref)
++{
++	struct objio_state *ios = container_of(kref, struct objio_state, kref);
++
++	ios->done(ios);
++}
++
++static void _done_io(struct osd_request *or, void *p)
++{
++	struct objio_state *ios = p;
++
++	kref_put(&ios->kref, _last_io);
++}
++
++static ssize_t _io_exec(struct objio_state *ios)
++{
++	DECLARE_COMPLETION_ONSTACK(wait);
++	ssize_t status = 0; /* sync status */
++	unsigned i;
++	objio_done_fn saved_done_fn = ios->done;
++	bool sync = ios->ol_state.sync;
++
++	if (sync) {
++		ios->done = _sync_done;
++		ios->private = &wait;
++	}
++
++	kref_init(&ios->kref);
++
++	for (i = 0; i < ios->numdevs; i++) {
++		struct osd_request *or = ios->per_dev[i].or;
++
++		if (!or)
++			continue;
++
++		kref_get(&ios->kref);
++		osd_execute_request_async(or, _done_io, ios);
++	}
++
++	kref_put(&ios->kref, _last_io);
++
++	if (sync) {
++		wait_for_completion(&wait);
++		status = saved_done_fn(ios);
++	}
++
++	return status;
++}
++
++/*
++ * read
++ */
++static ssize_t _read_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, false);
++
++	_io_free(ios);
++
++	if (likely(!ret))
++		status = ios->length;
++	else
++		status = ret;
++
++	objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct osd_request *or = NULL;
++	struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++	unsigned dev = per_dev->dev;
++	struct pnfs_osd_object_cred *cred =
++			&ios->objio_seg->layout->olo_comps[dev];
++	struct osd_obj_id obj = {
++		.partition = cred->oc_object_id.oid_partition_id,
++		.id = cred->oc_object_id.oid_object_id,
++	};
++	int ret;
++
++	or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++	if (unlikely(!or)) {
++		ret = -ENOMEM;
++		goto err;
++	}
++	per_dev->or = or;
++
++	osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
++
++	ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++	if (ret) {
++		dprintk("%s: Faild to osd_finalize_request() => %d\n",
++			__func__, ret);
++		goto err;
++	}
++
++	dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++		__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++		per_dev->length);
++
++err:
++	return ret;
++}
++
++static ssize_t _read_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _read_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _read_done;
++	return _io_exec(ios); /* In sync mode exec returns the io status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _read_exec(ios);
++}
++
++/*
++ * write
++ */
++static ssize_t _write_done(struct objio_state *ios)
++{
++	ssize_t status;
++	int ret = _io_check(ios, true);
++
++	_io_free(ios);
++
++	if (likely(!ret)) {
++		/* FIXME: should be based on the OSD's persistence model
++		 * See OSD2r05 Section 4.13 Data persistence model */
++		ios->ol_state.committed = NFS_UNSTABLE; //NFS_FILE_SYNC;
++		status = ios->length;
++	} else {
++		status = ret;
++	}
++
++	objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync);
++	return status;
++}
++
++static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
++{
++	struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
++	unsigned dev = ios->per_dev[cur_comp].dev;
++	unsigned last_comp = cur_comp + ios->objio_seg->mirrors_p1;
++	int ret;
++
++	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
++		struct osd_request *or = NULL;
++		struct pnfs_osd_object_cred *cred =
++					&ios->objio_seg->layout->olo_comps[dev];
++		struct osd_obj_id obj = {
++			.partition = cred->oc_object_id.oid_partition_id,
++			.id = cred->oc_object_id.oid_object_id,
++		};
++		struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
++		struct bio *bio;
++
++		or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
++		if (unlikely(!or)) {
++			ret = -ENOMEM;
++			goto err;
++		}
++		per_dev->or = or;
++
++		if (per_dev != master_dev) {
++			bio = bio_kmalloc(GFP_KERNEL,
++					  master_dev->bio->bi_max_vecs);
++			if (unlikely(!bio)) {
++				dprintk("Faild to allocate BIO size=%u\n",
++					master_dev->bio->bi_max_vecs);
++				ret = -ENOMEM;
++				goto err;
++			}
++
++			__bio_clone(bio, master_dev->bio);
++			bio->bi_bdev = NULL;
++			bio->bi_next = NULL;
++			per_dev->bio = bio;
++			per_dev->dev = dev;
++			per_dev->length = master_dev->length;
++			per_dev->offset =  master_dev->offset;
++		} else {
++			bio = master_dev->bio;
++			/* FIXME: bio_set_dir() */
++			bio->bi_rw |= (1 << BIO_RW);
++		}
++
++		osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
++
++		ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
++		if (ret) {
++			dprintk("%s: Faild to osd_finalize_request() => %d\n",
++				__func__, ret);
++			goto err;
++		}
++
++		dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
++			__func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
++			per_dev->length);
++	}
++
++err:
++	return ret;
++}
++
++static ssize_t _write_exec(struct objio_state *ios)
++{
++	unsigned i;
++	int ret;
++
++	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
++		if (!ios->per_dev[i].length)
++			continue;
++		ret = _write_mirrors(ios, i);
++		if (unlikely(ret))
++			goto err;
++	}
++
++	ios->done = _write_done;
++	return _io_exec(ios); /* In sync mode exec returns the io->status */
++
++err:
++	_io_free(ios);
++	return ret;
++}
++
++ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable)
++{
++	struct objio_state *ios = container_of(ol_state, struct objio_state,
++					       ol_state);
++	int ret;
++
++	/* TODO: ios->stable = stable; */
++	ret = _io_rw_pagelist(ios);
++	if (unlikely(ret))
++		return ret;
++
++	return _write_exec(ios);
++}
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++objlayout_get_stripesize(struct pnfs_layout_hdr *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zx\n", __func__, maxsz);
++	return maxsz;
++}
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++objlayout_get_blocksize(void)
++{
++	ssize_t sz = BIO_MAX_PAGES_KMALLOC * PAGE_SIZE;
++
++	return sz;
++}
++
++static struct layoutdriver_policy_operations objlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = objlayout_get_stripesize,
++	.get_blocksize         = objlayout_get_blocksize,
++};
++
++static struct pnfs_layoutdriver_type objlayout_type = {
++	.id = LAYOUT_OSD2_OBJECTS,
++	.name = "LAYOUT_OSD2_OBJECTS",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &objlayout_policy_operations,
++};
++
++void *objio_init_mt(void)
++{
++	struct objio_mount_type *omt = kzalloc(sizeof(*omt), GFP_KERNEL);
++
++	if (!omt)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&omt->dev_list);
++	spin_lock_init(&omt->dev_list_lock);
++	return omt;
++}
++
++void objio_fini_mt(void *mountid)
++{
++	_dev_list_remove_all(mountid);
++	kfree(mountid);
++}
++
++MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++objlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++objlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&objlayout_type);
++	printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(objlayout_init);
++module_exit(objlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c.orig	2010-09-30 10:17:08.722997000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.c	2010-09-30 10:17:08.724995000 -0400
+@@ -0,0 +1,790 @@
++/*
++ *  objlayout.c
++ *
++ *  pNFS layout driver for Panasas OSDs
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <scsi/osd_initiator.h>
++#include "objlayout.h"
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct pnfs_client_operations *pnfs_client_ops;
++
++/*
++ * Create a objlayout layout structure for the given inode and return it.
++ */
++static struct pnfs_layout_hdr *
++objlayout_alloc_layout(struct inode *inode)
++{
++	struct objlayout *objlay;
++
++	objlay = kzalloc(sizeof(struct objlayout), GFP_KERNEL);
++	if (objlay) {
++		spin_lock_init(&objlay->lock);
++		INIT_LIST_HEAD(&objlay->err_list);
++	}
++	dprintk("%s: Return %p\n", __func__, objlay);
++	return &objlay->pnfs_layout;
++}
++
++/*
++ * Free an objlayout layout structure
++ */
++static void
++objlayout_free_layout(struct pnfs_layout_hdr *lo)
++{
++	struct objlayout *objlay = OBJLAYOUT(lo);
++
++	dprintk("%s: objlay %p\n", __func__, objlay);
++
++	WARN_ON(!list_empty(&objlay->err_list));
++	kfree(objlay);
++}
++
++/*
++ * Unmarshall layout and store it in pnfslay.
++ */
++static struct pnfs_layout_segment *
++objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
++		     struct nfs4_layoutget_res *lgr)
++{
++	int status;
++	void *layout = lgr->layout.buf;
++	struct pnfs_layout_segment *lseg;
++	struct objlayout_segment *objlseg;
++	struct pnfs_osd_layout *pnfs_osd_layout;
++
++	dprintk("%s: Begin pnfslay %p layout %p\n", __func__, pnfslay, layout);
++
++	BUG_ON(!layout);
++
++	status = -ENOMEM;
++	lseg = kzalloc(sizeof(*lseg) + sizeof(*objlseg) +
++		       pnfs_osd_layout_incore_sz(layout), GFP_KERNEL);
++	if (!lseg)
++		goto err;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	pnfs_osd_layout = (struct pnfs_osd_layout *)objlseg->pnfs_osd_layout;
++	pnfs_osd_xdr_decode_layout(pnfs_osd_layout, layout);
++
++	status = objio_alloc_lseg(&objlseg->internal, pnfslay, lseg,
++				  pnfs_osd_layout);
++	if (status)
++		goto err;
++
++	dprintk("%s: Return %p\n", __func__, lseg);
++	return lseg;
++
++ err:
++	kfree(lseg);
++	return ERR_PTR(status);
++}
++
++/*
++ * Free a layout segement
++ */
++static void
++objlayout_free_lseg(struct pnfs_layout_segment *lseg)
++{
++	struct objlayout_segment *objlseg;
++
++	dprintk("%s: freeing layout segment %p\n", __func__, lseg);
++
++	if (unlikely(!lseg))
++		return;
++
++	objlseg = LSEG_LD_DATA(lseg);
++	objio_free_lseg(objlseg->internal);
++	kfree(lseg);
++}
++
++/*
++ * I/O Operations
++ */
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end : NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1 : NFS4_MAX_UINT64;
++}
++
++static struct objlayout_io_state *
++objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
++			struct page **pages,
++			unsigned pgbase,
++			unsigned nr_pages,
++			loff_t offset,
++			size_t count,
++			struct pnfs_layout_segment *lseg,
++			void *rpcdata)
++{
++	struct objlayout_segment *objlseg = LSEG_LD_DATA(lseg);
++	struct objlayout_io_state *state;
++	u64 lseg_end_offset;
++	size_t size_nr_pages;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	if (objio_alloc_io_state(objlseg->internal, &state))
++		return NULL;
++
++	BUG_ON(offset < lseg->range.offset);
++	lseg_end_offset = end_offset(lseg->range.offset, lseg->range.length);
++	BUG_ON(offset >= lseg_end_offset);
++	if (offset + count > lseg_end_offset) {
++		count = lseg->range.length - (offset - lseg->range.offset);
++		dprintk("%s: truncated count %Zd\n", __func__, count);
++	}
++
++	if (pgbase > PAGE_SIZE) {
++		unsigned n = pgbase >> PAGE_SHIFT;
++
++		pgbase &= ~PAGE_MASK;
++		pages += n;
++		nr_pages -= n;
++	}
++
++	size_nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	BUG_ON(nr_pages < size_nr_pages);
++	if (nr_pages > size_nr_pages)
++		nr_pages = size_nr_pages;
++
++	INIT_LIST_HEAD(&state->err_list);
++	state->lseg = lseg;
++	state->rpcdata = rpcdata;
++	state->pages = pages;
++	state->pgbase = pgbase;
++	state->nr_pages = nr_pages;
++	state->offset = offset;
++	state->count = count;
++	state->sync = 0;
++
++	return state;
++}
++
++static void
++objlayout_free_io_state(struct objlayout_io_state *state)
++{
++	dprintk("%s: freeing io_state\n", __func__);
++	if (unlikely(!state))
++		return;
++
++	objio_free_io_state(state);
++}
++
++/*
++ * I/O done common code
++ */
++static void
++objlayout_iodone(struct objlayout_io_state *state)
++{
++	dprintk("%s: state %p status\n", __func__, state);
++
++	if (likely(state->status >= 0)) {
++		objlayout_free_io_state(state);
++	} else {
++		struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++		spin_lock(&objlay->lock);
++		objlay->delta_space_valid = OBJ_DSU_INVALID;
++		list_add(&objlay->err_list, &state->err_list);
++		spin_unlock(&objlay->lock);
++	}
++}
++
++/*
++ * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
++ *
++ * The @index component IO failed (error returned from target). Register
++ * the error for later reporting at layout-return.
++ */
++void
++objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
++			int osd_error, u64 offset, u64 length, bool is_write)
++{
++	struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
++
++	BUG_ON(index >= state->num_comps);
++	if (osd_error) {
++		struct objlayout_segment *objlseg = LSEG_LD_DATA(state->lseg);
++		struct pnfs_osd_layout *layout =
++				(typeof(layout))objlseg->pnfs_osd_layout;
++
++		ioerr->oer_component = layout->olo_comps[index].oc_object_id;
++		ioerr->oer_comp_offset = offset;
++		ioerr->oer_comp_length = length;
++		ioerr->oer_iswrite = is_write;
++		ioerr->oer_errno = osd_error;
++
++		dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
++			"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
++			__func__, index, ioerr->oer_errno,
++			ioerr->oer_iswrite,
++			_DEVID_LO(&ioerr->oer_component.oid_device_id),
++			_DEVID_HI(&ioerr->oer_component.oid_device_id),
++			ioerr->oer_component.oid_partition_id,
++			ioerr->oer_component.oid_object_id,
++			ioerr->oer_comp_offset,
++			ioerr->oer_comp_length);
++	} else {
++		/* User need not call if no error is reported */
++		ioerr->oer_errno = 0;
++	}
++}
++
++static void _rpc_commit_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_commit_complete(wdata);
++}
++
++/*
++ * Commit data remotely on OSDs
++ */
++enum pnfs_try_status
++objlayout_commit(struct nfs_write_data *wdata, int how)
++{
++	int status = PNFS_ATTEMPTED;
++
++	INIT_WORK(&wdata->task.u.tk_work, _rpc_commit_complete);
++	schedule_work(&wdata->task.u.tk_work);
++	dprintk("%s: Return %d\n", __func__, status);
++	return status;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_read_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++
++	pnfs_client_ops->nfs_readlist_complete(rdata);
++}
++
++void
++objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
++{
++	int eof = state->eof;
++	struct nfs_read_data *rdata;
++
++	state->status = status;
++	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
++	rdata = state->rpcdata;
++	rdata->task.tk_status = status;
++	if (status >= 0) {
++		rdata->res.count = status;
++		rdata->res.eof = eof;
++	}
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_readlist_complete(rdata);
++	else {
++		INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
++		schedule_work(&rdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async reads.
++ */
++enum pnfs_try_status
++objlayout_read_pagelist(struct nfs_read_data *rdata, unsigned nr_pages)
++{
++	loff_t offset = rdata->args.offset;
++	size_t count = rdata->args.count;
++	struct objlayout_io_state *state;
++	ssize_t status = 0;
++	loff_t eof;
++
++	dprintk("%s: Begin inode %p offset %llu count %d\n",
++		__func__, rdata->inode, offset, (int)count);
++
++	eof = i_size_read(rdata->inode);
++	if (unlikely(offset + count > eof)) {
++		if (offset >= eof) {
++			status = 0;
++			rdata->res.count = 0;
++			rdata->res.eof = 1;
++			goto out;
++		}
++		count = eof - offset;
++	}
++
++	state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
++					 rdata->args.pages, rdata->args.pgbase,
++					 nr_pages, offset, count,
++					 rdata->pdata.lseg, rdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->eof = state->offset + state->count >= eof;
++
++	status = objio_read_pagelist(state);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	rdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
++ * This is because the osd completion is called with ints-off from
++ * the block layer
++ */
++static void _rpc_write_complete(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++
++	pnfs_client_ops->nfs_writelist_complete(wdata);
++}
++
++void
++objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
++		     bool sync)
++{
++	struct nfs_write_data *wdata;
++
++	dprintk("%s: Begin\n", __func__);
++	wdata = state->rpcdata;
++	state->status = status;
++	wdata->task.tk_status = status;
++	if (status >= 0) {
++		wdata->res.count = status;
++		wdata->verf.committed = state->committed;
++		dprintk("%s: Return status %d committed %d\n",
++			__func__, wdata->task.tk_status,
++			wdata->verf.committed);
++	} else
++		dprintk("%s: Return status %d\n",
++			__func__, wdata->task.tk_status);
++	objlayout_iodone(state);
++	/* must not use state after this point */
++
++	if (sync)
++		pnfs_client_ops->nfs_writelist_complete(wdata);
++	else {
++		INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
++		schedule_work(&wdata->task.u.tk_work);
++	}
++}
++
++/*
++ * Perform sync or async writes.
++ */
++enum pnfs_try_status
++objlayout_write_pagelist(struct nfs_write_data *wdata,
++			 unsigned nr_pages,
++			 int how)
++{
++	struct objlayout_io_state *state;
++	ssize_t status;
++
++	dprintk("%s: Begin inode %p offset %llu count %u\n",
++		__func__, wdata->inode, wdata->args.offset, wdata->args.count);
++
++	state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
++					 wdata->args.pages,
++					 wdata->args.pgbase,
++					 nr_pages,
++					 wdata->args.offset,
++					 wdata->args.count,
++					 wdata->pdata.lseg, wdata);
++	if (unlikely(!state)) {
++		status = -ENOMEM;
++		goto out;
++	}
++
++	state->sync = how & FLUSH_SYNC;
++
++	status = objio_write_pagelist(state, how & FLUSH_STABLE);
++ out:
++	dprintk("%s: Return status %Zd\n", __func__, status);
++	wdata->pdata.pnfs_error = status;
++	return PNFS_ATTEMPTED;
++}
++
++void
++objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_layoutcommit_args *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct pnfs_osd_layoutupdate lou;
++	__be32 *start;
++
++	dprintk("%s: Begin\n", __func__);
++
++	spin_lock(&objlay->lock);
++	lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
++	lou.dsu_delta = objlay->delta_space_used;
++	objlay->delta_space_used = 0;
++	objlay->delta_space_valid = OBJ_DSU_INIT;
++	lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
++	spin_unlock(&objlay->lock);
++
++	start = xdr_reserve_space(xdr, 4);
++
++	BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++
++	dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
++		lou.dsu_delta, lou.olu_ioerr_flag);
++}
++
++static int
++err_prio(u32 oer_errno)
++{
++	switch (oer_errno) {
++	case 0:
++		return 0;
++
++	case PNFS_OSD_ERR_RESOURCE:
++		return OSD_ERR_PRI_RESOURCE;
++	case PNFS_OSD_ERR_BAD_CRED:
++		return OSD_ERR_PRI_BAD_CRED;
++	case PNFS_OSD_ERR_NO_ACCESS:
++		return OSD_ERR_PRI_NO_ACCESS;
++	case PNFS_OSD_ERR_UNREACHABLE:
++		return OSD_ERR_PRI_UNREACHABLE;
++	case PNFS_OSD_ERR_NOT_FOUND:
++		return OSD_ERR_PRI_NOT_FOUND;
++	case PNFS_OSD_ERR_NO_SPACE:
++		return OSD_ERR_PRI_NO_SPACE;
++	default:
++		WARN_ON(1);
++		/* fallthrough */
++	case PNFS_OSD_ERR_EIO:
++		return OSD_ERR_PRI_EIO;
++	}
++}
++
++static void
++merge_ioerr(struct pnfs_osd_ioerr *dest_err,
++	    const struct pnfs_osd_ioerr *src_err)
++{
++	u64 dest_end, src_end;
++
++	if (!dest_err->oer_errno) {
++		*dest_err = *src_err;
++		/* accumulated device must be blank */
++		memset(&dest_err->oer_component.oid_device_id, 0,
++			sizeof(dest_err->oer_component.oid_device_id));
++
++		return;
++	}
++
++	if (dest_err->oer_component.oid_partition_id !=
++				src_err->oer_component.oid_partition_id)
++		dest_err->oer_component.oid_partition_id = 0;
++
++	if (dest_err->oer_component.oid_object_id !=
++				src_err->oer_component.oid_object_id)
++		dest_err->oer_component.oid_object_id = 0;
++
++	if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
++		dest_err->oer_comp_offset = src_err->oer_comp_offset;
++
++	dest_end = end_offset(dest_err->oer_comp_offset,
++			      dest_err->oer_comp_length);
++	src_end =  end_offset(src_err->oer_comp_offset,
++			      src_err->oer_comp_length);
++	if (dest_end < src_end)
++		dest_end = src_end;
++
++	dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
++
++	if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
++	    (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
++			dest_err->oer_errno = src_err->oer_errno;
++	} else if (src_err->oer_iswrite) {
++		dest_err->oer_iswrite = true;
++		dest_err->oer_errno = src_err->oer_errno;
++	}
++}
++
++static void
++encode_accumulated_error(struct objlayout *objlay, struct xdr_stream *xdr)
++{
++	struct objlayout_io_state *state, *tmp;
++	struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++
++		for (i = 0; i < state->num_comps; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			merge_ioerr(&accumulated_err, ioerr);
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++
++	BUG_ON(pnfs_osd_xdr_encode_ioerr(xdr, &accumulated_err));
++}
++
++void
++objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
++			      struct xdr_stream *xdr,
++			      const struct nfs4_layoutreturn_args *args)
++{
++	struct objlayout *objlay = OBJLAYOUT(pnfslay);
++	struct objlayout_io_state *state, *tmp;
++	__be32 *start, *uninitialized_var(last_xdr);
++
++	dprintk("%s: Begin\n", __func__);
++	start = xdr_reserve_space(xdr, 4);
++	BUG_ON(!start);
++
++	spin_lock(&objlay->lock);
++
++	list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
++		unsigned i;
++		int res = 0;
++
++		for (i = 0; i < state->num_comps && !res; i++) {
++			struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
++
++			if (!ioerr->oer_errno)
++				continue;
++
++			dprintk("%s: err[%d]: errno=%d is_write=%d "
++				"dev(%llx:%llx) par=0x%llx obj=0x%llx "
++				"offset=0x%llx length=0x%llx\n",
++				__func__, i, ioerr->oer_errno,
++				ioerr->oer_iswrite,
++				_DEVID_LO(&ioerr->oer_component.oid_device_id),
++				_DEVID_HI(&ioerr->oer_component.oid_device_id),
++				ioerr->oer_component.oid_partition_id,
++				ioerr->oer_component.oid_object_id,
++				ioerr->oer_comp_offset,
++				ioerr->oer_comp_length);
++
++			last_xdr = xdr->p;
++			res = pnfs_osd_xdr_encode_ioerr(xdr, &state->ioerrs[i]);
++		}
++		if (unlikely(res)) {
++			/* no space for even one error descriptor */
++			BUG_ON(last_xdr == start + 1);
++
++			/* we've encountered a situation with lots and lots of
++			 * errors and no space to encode them all. Use the last
++			 * available slot to report the union of all the
++			 * remaining errors.
++			 */
++			xdr_rewind_stream(xdr, last_xdr -
++					       pnfs_osd_ioerr_xdr_sz() / 4);
++			encode_accumulated_error(objlay, xdr);
++			goto loop_done;
++		}
++		list_del(&state->err_list);
++		objlayout_free_io_state(state);
++	}
++loop_done:
++	spin_unlock(&objlay->lock);
++
++	*start = cpu_to_be32((xdr->p - start - 1) * 4);
++	dprintk("%s: Return\n", __func__);
++}
++
++struct objlayout_deviceinfo {
++	struct page *page;
++	struct pnfs_osd_deviceaddr da; /* This must be last */
++};
++
++/* Initialize and call nfs_getdeviceinfo, then decode and return a
++ * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
++ * should be called.
++ */
++int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr)
++{
++	struct objlayout_deviceinfo *odi;
++	struct pnfs_device pd;
++	struct super_block *sb;
++	struct page *page;
++	size_t sz;
++	u32 *p;
++	int err;
++
++	page = alloc_page(GFP_KERNEL);
++	if (!page)
++		return -ENOMEM;
++
++	pd.area = page_address(page);
++
++	memcpy(&pd.dev_id, d_id, sizeof(*d_id));
++	pd.layout_type = LAYOUT_OSD2_OBJECTS;
++	pd.dev_notify_types = 0;
++	pd.pages = &page;
++	pd.pgbase = 0;
++	pd.pglen = PAGE_SIZE;
++	pd.mincount = 0;
++
++	sb = PNFS_INODE(pnfslay)->i_sb;
++	err = pnfs_client_ops->nfs_getdeviceinfo(PNFS_NFS_SERVER(pnfslay), &pd);
++	dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
++	if (err)
++		goto err_out;
++
++	p = pd.area;
++	sz = pnfs_osd_xdr_deviceaddr_incore_sz(p);
++	odi = kzalloc(sz + (sizeof(*odi) - sizeof(odi->da)), GFP_KERNEL);
++	if (!odi) {
++		err = -ENOMEM;
++		goto err_out;
++	}
++	pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
++	odi->page = page;
++	*deviceaddr = &odi->da;
++	return 0;
++
++err_out:
++	__free_page(page);
++	return err;
++}
++
++void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
++{
++	struct objlayout_deviceinfo *odi = container_of(deviceaddr,
++						struct objlayout_deviceinfo,
++						da);
++
++	__free_page(odi->page);
++	kfree(odi);
++}
++
++/*
++ * Initialize a mountpoint by retrieving the list of
++ * available devices for it.
++ * Return the pnfs_mount_type structure so the
++ * pNFS_client can refer to the mount point later on.
++ */
++static int
++objlayout_initialize_mountpoint(struct nfs_server *server,
++				const struct nfs_fh *mntfh)
++{
++	void *data;
++
++	data = objio_init_mt();
++	if (IS_ERR(data)) {
++		printk(KERN_INFO "%s: objlayout lib not ready err=%ld\n",
++		       __func__, PTR_ERR(data));
++		return PTR_ERR(data);
++	}
++	server->pnfs_ld_data = data;
++
++	dprintk("%s: Return data=%p\n", __func__, data);
++	return 0;
++}
++
++/*
++ * Uninitialize a mountpoint
++ */
++static int
++objlayout_uninitialize_mountpoint(struct nfs_server *server)
++{
++	dprintk("%s: Begin %p\n", __func__, server->pnfs_ld_data);
++	objio_fini_mt(server->pnfs_ld_data);
++	return 0;
++}
++
++struct layoutdriver_io_operations objlayout_io_operations = {
++	.commit                  = objlayout_commit,
++	.read_pagelist           = objlayout_read_pagelist,
++	.write_pagelist          = objlayout_write_pagelist,
++	.alloc_layout            = objlayout_alloc_layout,
++	.free_layout             = objlayout_free_layout,
++	.alloc_lseg              = objlayout_alloc_lseg,
++	.free_lseg               = objlayout_free_lseg,
++	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
++	.encode_layoutreturn     = objlayout_encode_layoutreturn,
++	.initialize_mountpoint   = objlayout_initialize_mountpoint,
++	.uninitialize_mountpoint = objlayout_uninitialize_mountpoint,
++};
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h.orig	2010-09-30 10:17:08.727996000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/objlayout.h	2010-09-30 10:17:08.729004000 -0400
+@@ -0,0 +1,171 @@
++/*
++ *  objlayout.h
++ *
++ *  Data types and function declerations for interfacing with the
++ *  pNFS standard object layout driver.
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *  Boaz Harrosh <bharrosh@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef _OBJLAYOUT_H
++#define _OBJLAYOUT_H
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/pnfs_osd_xdr.h>
++
++/*
++ * in-core layout segment
++ */
++struct objlayout_segment {
++	void *internal;    /* for provider internal use */
++	u8 pnfs_osd_layout[];
++};
++
++/*
++ * per-inode layout
++ */
++struct objlayout {
++	struct pnfs_layout_hdr pnfs_layout;
++
++	 /* for layout_commit */
++	enum osd_delta_space_valid_enum {
++		OBJ_DSU_INIT = 0,
++		OBJ_DSU_VALID,
++		OBJ_DSU_INVALID,
++	} delta_space_valid;
++	s64 delta_space_used;  /* consumed by write ops */
++
++	 /* for layout_return */
++	spinlock_t lock;
++	struct list_head err_list;
++};
++
++static inline struct objlayout *
++OBJLAYOUT(struct pnfs_layout_hdr *lo)
++{
++	return container_of(lo, struct objlayout, pnfs_layout);
++}
++
++/*
++ * per-I/O operation state
++ * embedded in objects provider io_state data structure
++ */
++struct objlayout_io_state {
++	struct pnfs_layout_segment *lseg;
++
++	struct page **pages;
++	unsigned pgbase;
++	unsigned nr_pages;
++	unsigned long count;
++	loff_t offset;
++	bool sync;
++
++	void *rpcdata;
++	int status;             /* res */
++	int eof;                /* res */
++	int committed;          /* res */
++
++	/* Error reporting (layout_return) */
++	struct list_head err_list;
++	unsigned num_comps;
++	/* Pointer to array of error descriptors of size num_comps.
++	 * It should contain as many entries as devices in the osd_layout
++	 * that participate in the I/O. It is up to the io_engine to allocate
++	 * needed space and set num_comps.
++	 */
++	struct pnfs_osd_ioerr *ioerrs;
++};
++
++/*
++ * Raid engine I/O API
++ */
++extern void *objio_init_mt(void);
++extern void objio_fini_mt(void *mt);
++
++extern int objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout);
++extern void objio_free_lseg(void *p);
++
++extern int objio_alloc_io_state(void *seg, struct objlayout_io_state **outp);
++extern void objio_free_io_state(struct objlayout_io_state *state);
++
++extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
++extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
++				    bool stable);
++
++/*
++ * callback API
++ */
++extern void objlayout_io_set_result(struct objlayout_io_state *state,
++				    unsigned index, int osd_error,
++				    u64 offset, u64 length, bool is_write);
++
++static inline void
++objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
++{
++	struct objlayout *objlay = OBJLAYOUT(state->lseg->layout);
++
++	/* If one of the I/Os errored out and the delta_space_used was
++	 * invalid we render the complete report as invalid. Protocol mandate
++	 * the DSU be accurate or not reported.
++	 */
++	spin_lock(&objlay->lock);
++	if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
++		objlay->delta_space_valid = OBJ_DSU_VALID;
++		objlay->delta_space_used += space_used;
++	}
++	spin_unlock(&objlay->lock);
++}
++
++extern void objlayout_read_done(struct objlayout_io_state *state,
++				ssize_t status, bool sync);
++extern void objlayout_write_done(struct objlayout_io_state *state,
++				 ssize_t status, bool sync);
++
++extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr);
++extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
++
++/*
++ * exported generic objects function vectors
++ */
++extern struct layoutdriver_io_operations objlayout_io_operations;
++extern struct pnfs_client_operations *pnfs_client_ops;
++
++#endif /* _OBJLAYOUT_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c.orig	2010-09-30 10:17:08.731997000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.c	2010-09-30 10:17:08.733995000 -0400
+@@ -0,0 +1,734 @@
++/*
++ *  panfs_shim.c
++ *
++ *  Shim layer for interfacing with the Panasas DirectFlow module I/O stack
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <asm/byteorder.h>
++
++#include "objlayout.h"
++#include "panfs_shim.h"
++
++#include <linux/panfs_shim_api.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++struct panfs_export_operations *panfs_export_ops;
++
++void *
++objio_init_mt(void)
++{
++	return panfs_export_ops == NULL ? ERR_PTR(-EAGAIN) : NULL;
++}
++
++void objio_fini_mt(void *mountid)
++{
++}
++
++static int
++panfs_shim_conv_raid01(struct pnfs_osd_layout *layout,
++		       struct pnfs_osd_data_map *lo_map,
++		       pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt) {
++		hdr->type = PAN_AGG_RAID1;
++		hdr->hdr.raid1.num_comps = lo_map->odm_mirror_cnt + 1;
++	} else if (layout->olo_num_comps > 1) {
++		hdr->type = PAN_AGG_RAID0;
++		hdr->hdr.raid0.num_comps = layout->olo_num_comps;
++		hdr->hdr.raid0.stripe_unit = lo_map->odm_stripe_unit;
++	} else
++		hdr->type = PAN_AGG_SIMPLE;
++	return 0;
++}
++
++static int
++panfs_shim_conv_raid5(struct pnfs_osd_layout *layout,
++		      struct pnfs_osd_data_map *lo_map,
++		      pan_agg_layout_hdr_t *hdr)
++{
++	if (lo_map->odm_mirror_cnt)
++		goto err;
++
++	if (lo_map->odm_group_width || lo_map->odm_group_depth) {
++		if (!lo_map->odm_group_width || !lo_map->odm_group_depth)
++			goto err;
++
++		hdr->type = PAN_AGG_GRP_RAID5_LEFT;
++		hdr->hdr.grp_raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.grp_raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.grp_raid5_left.stripe_unit = lo_map->odm_stripe_unit;
++		hdr->hdr.grp_raid5_left.rg_width = lo_map->odm_group_width;
++		hdr->hdr.grp_raid5_left.rg_depth = lo_map->odm_group_depth;
++		/* this is a guess, panasas server is not supposed to
++		   hand out layotu otherwise */
++		hdr->hdr.grp_raid5_left.group_layout_policy =
++			PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN;
++	} else {
++		hdr->type = PAN_AGG_RAID5_LEFT;
++		hdr->hdr.raid5_left.num_comps = lo_map->odm_num_comps;
++		if (hdr->hdr.raid5_left.num_comps != lo_map->odm_num_comps)
++			goto err;
++		hdr->hdr.raid5_left.stripe_unit2 =
++		hdr->hdr.raid5_left.stripe_unit1 =
++		hdr->hdr.raid5_left.stripe_unit0 = lo_map->odm_stripe_unit;
++	}
++
++	return 0;
++err:
++	return -EINVAL;
++}
++
++/*
++ * Convert a pnfs_osd data map into Panasas aggregation layout header
++ */
++static int
++panfs_shim_conv_pnfs_osd_data_map(
++	struct pnfs_osd_layout *layout,
++	pan_agg_layout_hdr_t *hdr)
++{
++	int status = -EINVAL;
++	struct pnfs_osd_data_map *lo_map = &layout->olo_map;
++
++	if (!layout->olo_num_comps) {
++		dprintk("%s: !!layout.n_comps(%u)\n", __func__,
++			layout->olo_num_comps);
++		goto err;
++	}
++
++	switch (lo_map->odm_raid_algorithm) {
++	case PNFS_OSD_RAID_0:
++		if (layout->olo_num_comps != lo_map->odm_num_comps ||
++		    layout->olo_comps_index) {
++			dprintk("%s: !!PNFS_OSD_RAID_0 "
++				"layout.n_comps(%u) map.n_comps(%u) "
++				"comps_index(%u)\n", __func__,
++				layout->olo_num_comps,
++				lo_map->odm_num_comps,
++				layout->olo_comps_index);
++			goto err;
++		}
++		status = panfs_shim_conv_raid01(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_5:
++		if (!lo_map->odm_group_width) {
++			if (layout->olo_num_comps != lo_map->odm_num_comps ||
++			    layout->olo_comps_index) {
++				dprintk("%s: !!PNFS_OSD_RAID_5 !group_width "
++					"layout.n_comps(%u)!=map.n_comps(%u) "
++					"|| comps_index(%u)\n", __func__,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		} else if ((layout->olo_num_comps != lo_map->odm_num_comps &&
++			    layout->olo_num_comps > lo_map->odm_group_width) ||
++			   (layout->olo_comps_index % lo_map->odm_group_width)){
++				dprintk("%s: !!PNFS_OSD_RAID_5 group_width(%u) "
++					"layout.n_comps(%u) map.n_comps(%u) "
++					"comps_index(%u)\n", __func__,
++					lo_map->odm_group_width,
++					layout->olo_num_comps,
++					lo_map->odm_num_comps,
++					layout->olo_comps_index);
++				goto err;
++			}
++		status = panfs_shim_conv_raid5(layout, lo_map, hdr);
++		break;
++
++	case PNFS_OSD_RAID_4:
++	case PNFS_OSD_RAID_PQ:
++	default:
++		dprintk("%s: !!PNFS_OSD_RAID_(%d)\n", __func__,
++			lo_map->odm_raid_algorithm);
++		goto err;
++	}
++
++	return 0;
++
++err:
++	return status;
++}
++
++/*
++ * Convert pnfs_osd layout into Panasas map and caps type
++ */
++int
++objio_alloc_lseg(void **outp,
++	struct pnfs_layout_hdr *pnfslay,
++	struct pnfs_layout_segment *lseg,
++	struct pnfs_osd_layout *layout)
++{
++	int i, total_comps;
++	int status;
++	struct pnfs_osd_object_cred *lo_comp;
++	pan_size_t alloc_sz, local_sz;
++	pan_sm_map_cap_t *mcs = NULL;
++	u8 *buf;
++	pan_agg_comp_obj_t *pan_comp;
++	pan_sm_sec_t *pan_sec;
++
++	status = -EINVAL;
++	if (layout->olo_num_comps < layout->olo_map.odm_group_width) {
++		total_comps = layout->olo_comps_index + layout->olo_num_comps;
++	} else {
++		/* allocate full map, otherwise SAM gets confused */
++		total_comps = layout->olo_map.odm_num_comps;
++	}
++	alloc_sz = total_comps *
++		   (sizeof(pan_agg_comp_obj_t) + sizeof(pan_sm_sec_t));
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p = layout->olo_comps[i].oc_cap.cred;
++		if (panfs_export_ops->sm_sec_t_get_size_otw(
++			(pan_sm_sec_otw_t *)&p, &local_sz, NULL, NULL))
++			goto err;
++		alloc_sz += local_sz;
++	}
++
++	status = -ENOMEM;
++	mcs = kzalloc(sizeof(*mcs) + alloc_sz, GFP_KERNEL);
++	if (!mcs)
++		goto err;
++	buf = (u8 *)&mcs[1];
++
++	mcs->offset = lseg->range.offset;
++	mcs->length = lseg->range.length;
++#if 0
++	/* FIXME: for now */
++	mcs->expiration_time.ts_sec  = 0;
++	mcs->expiration_time.ts_nsec = 0;
++#endif
++	mcs->full_map.map_hdr.avail_state = PAN_AGG_OBJ_STATE_NORMAL;
++	status = panfs_shim_conv_pnfs_osd_data_map(layout,
++						   &mcs->full_map.layout_hdr);
++	if (status)
++		goto err;
++
++	mcs->full_map.components.size = total_comps;
++	mcs->full_map.components.data = (pan_agg_comp_obj_t *)buf;
++	buf += total_comps * sizeof(pan_agg_comp_obj_t);
++
++	mcs->secs.size = total_comps;
++	mcs->secs.data = (pan_sm_sec_t *)buf;
++	buf += total_comps * sizeof(pan_sm_sec_t);
++
++	lo_comp = layout->olo_comps;
++	pan_comp = mcs->full_map.components.data + layout->olo_comps_index;
++	pan_sec = mcs->secs.data + layout->olo_comps_index;
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		void *p;
++		pan_stor_obj_id_t *obj_id = &mcs->full_map.map_hdr.obj_id;
++		struct pnfs_osd_objid *oc_obj_id = &lo_comp->oc_object_id;
++		u64 dev_id = __be64_to_cpup(
++			(__be64 *)oc_obj_id->oid_device_id.data + 1);
++
++		dprintk("%s: i=%d deviceid=%Lx:%Lx partition=%Lx object=%Lx\n",
++			__func__, i,
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data),
++			__be64_to_cpup((__be64 *)oc_obj_id->oid_device_id.data + 1),
++			oc_obj_id->oid_partition_id, oc_obj_id->oid_object_id);
++
++		if (i == 0) {
++			/* make up mgr_id to calm sam down */
++			pan_mgr_id_construct_artificial(PAN_MGR_SM, 0,
++							&obj_id->dev_id);
++			obj_id->grp_id = oc_obj_id->oid_partition_id;
++			obj_id->obj_id = oc_obj_id->oid_object_id;
++		}
++
++		if (obj_id->grp_id != lo_comp->oc_object_id.oid_partition_id) {
++			dprintk("%s: i=%d grp_id=0x%Lx oid_partition_id=0x%Lx\n",
++				__func__, i, (u64)obj_id->grp_id,
++				lo_comp->oc_object_id.oid_partition_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		if (obj_id->obj_id != lo_comp->oc_object_id.oid_object_id) {
++			dprintk("%s: i=%d obj_id=0x%Lx oid_object_id=0x%Lx\n",
++				__func__, i, obj_id->obj_id,
++				lo_comp->oc_object_id.oid_object_id);
++			status = -EINVAL;
++			goto err;
++		}
++
++		pan_comp->dev_id = dev_id;
++		if (!pan_stor_is_device_id_an_obsd_id(pan_comp->dev_id)) {
++			dprintk("%s: i=%d dev_id=0x%Lx not an obsd_id\n",
++				__func__, i, obj_id->dev_id);
++			status = -EINVAL;
++			goto err;
++		}
++		if (lo_comp->oc_osd_version == PNFS_OSD_MISSING) {
++			dprintk("%s: degraded maps not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++		pan_comp->avail_state = PAN_AGG_COMP_STATE_NORMAL;
++		if (lo_comp->oc_cap_key_sec != PNFS_OSD_CAP_KEY_SEC_NONE) {
++			dprintk("%s: cap key security not supported yet\n",
++				__func__);
++			status = -ENOTSUPP;
++			goto err;
++		}
++
++		p = lo_comp->oc_cap.cred;
++		panfs_export_ops->sm_sec_t_unmarshall(
++			(pan_sm_sec_otw_t *)&p,
++			pan_sec,
++			buf,
++			alloc_sz,
++			NULL,
++			&local_sz);
++		buf += local_sz;
++		alloc_sz -= local_sz;
++
++		lo_comp++;
++		pan_comp++;
++		pan_sec++;
++	}
++
++	*outp = mcs;
++	dprintk("%s:Return mcs=%p\n", __func__, mcs);
++	return 0;
++
++err:
++	objio_free_lseg(mcs);
++	dprintk("%s:Error %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * Free a Panasas map and caps type
++ */
++void
++objio_free_lseg(void *p)
++{
++	kfree(p);
++}
++
++/*
++ * I/O routines
++ */
++int
++objio_alloc_io_state(void *seg, struct objlayout_io_state **outp)
++{
++	struct panfs_shim_io_state *p;
++
++	dprintk("%s: allocating io_state\n", __func__);
++	p = kzalloc(sizeof(*p), GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	*outp = &p->ol_state;
++	return 0;
++}
++
++/*
++ * Free an I/O state
++ */
++void
++objio_free_io_state(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	int i;
++
++	dprintk("%s: freeing io_state\n", __func__);
++	for (i = 0; i < state->ol_state.nr_pages; i++)
++		kunmap(state->ol_state.pages[i]);
++
++	if (state->ucreds)
++		panfs_export_ops->ucreds_put(state->ucreds);
++	kfree(state->sg_list);
++	kfree(state);
++}
++
++static int
++panfs_shim_pages_to_sg(
++	struct panfs_shim_io_state *state,
++	struct page **pages,
++	unsigned int pgbase,
++	unsigned nr_pages,
++	size_t count)
++{
++	unsigned i, n;
++	pan_sg_entry_t *sg;
++
++	dprintk("%s pgbase %u nr_pages %u count %d "
++		"pg0 %p flags 0x%x index %llu\n",
++		__func__, pgbase, nr_pages, (int)count, pages[0],
++		(unsigned)pages[0]->flags, (unsigned long long)pages[0]->index);
++
++	sg = kmalloc(nr_pages * sizeof(*sg), GFP_KERNEL);
++	if (sg == NULL)
++		return -ENOMEM;
++
++	dprintk("%s sg_list %p pages %p pgbase %u nr_pages %u\n",
++		__func__, sg, pages, pgbase, nr_pages);
++
++	for (i = 0; i < nr_pages; i++) {
++		sg[i].buffer = (char *)kmap(pages[i]) + pgbase;
++		n = PAGE_SIZE - pgbase;
++		pgbase = 0;
++		if (n > count)
++			n = count;
++		sg[i].chunk_size = n;
++		count -= n;
++		if (likely(count)) {
++			sg[i].next = &sg[i+1];
++		} else {
++			/* we're done */
++			sg[i].next = NULL;
++			break;
++		}
++	}
++	BUG_ON(count);
++
++	state->sg_list = sg;
++	return 0;
++}
++
++/*
++ * Callback function for async reads
++ */
++static void
++panfs_shim_read_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_read_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.read.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++		status = res_p->length;
++		WARN_ON(status < 0);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_read rc %d: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_read_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_read_pagelist(struct objlayout_io_state *ol_state)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.read.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.read.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_read(PAN_SAM_ACCESS_BYPASS_TIMESTAMP,
++					&state->u.read.args,
++					&state->obj_sec,
++					state->sg_list,
++					state->ucreds,
++					ol_state->sync ?
++						NULL : panfs_shim_read_done,
++					state, NULL,
++					&state->u.read.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_read_done(state, NULL, &state->u.read.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++/*
++ * Callback function for async writes
++ */
++static void
++panfs_shim_write_done(
++	void *arg1,
++	void *arg2,
++	pan_sam_write_res_t *res_p,
++	pan_status_t rc)
++{
++	struct panfs_shim_io_state *state = arg1;
++	ssize_t status;
++
++	dprintk("%s: Begin\n", __func__);
++	if (!res_p)
++		res_p = &state->u.write.res;
++	if (rc == PAN_SUCCESS)
++		rc = res_p->result;
++	if (rc == PAN_SUCCESS) {
++/*		state->ol_state.committed = NFS_FILE_SYNC;*/
++		state->ol_state.committed = NFS_UNSTABLE;
++		status = res_p->length;
++		WARN_ON(status < 0);
++
++		objlayout_add_delta_space_used(&state->ol_state,
++					       res_p->delta_capacity_used);
++	} else {
++		status = -panfs_export_ops->convert_rc(rc);
++		dprintk("%s: pan_sam_write rc %u: status %Zd\n",
++			__func__, rc, status);
++	}
++	dprintk("%s: Return status %Zd rc %d\n", __func__, status, rc);
++	objlayout_write_done(&state->ol_state, status, true);
++}
++
++ssize_t
++objio_write_pagelist(struct objlayout_io_state *ol_state,
++		     bool stable /* unused, PanOSD writes are stable */)
++{
++	struct panfs_shim_io_state *state = container_of(ol_state,
++					struct panfs_shim_io_state, ol_state);
++	struct objlayout_segment *lseg = LSEG_LD_DATA(ol_state->lseg);
++	pan_sm_map_cap_t *mcs = (pan_sm_map_cap_t *)lseg->internal;
++	ssize_t status = 0;
++	pan_status_t rc = PAN_SUCCESS;
++
++	dprintk("%s: Begin\n", __func__);
++
++	status = panfs_shim_pages_to_sg(state, ol_state->pages,
++					ol_state->pgbase, ol_state->nr_pages,
++					ol_state->count);
++	if (unlikely(status))
++		goto err;
++
++	state->obj_sec.min_security = 0;
++	state->obj_sec.map_ccaps = mcs;
++
++	rc = panfs_export_ops->ucreds_get(&state->ucreds);
++	if (unlikely(rc)) {
++		status = -EACCES;
++		goto err;
++	}
++
++	state->u.write.args.obj_id = mcs->full_map.map_hdr.obj_id;
++	state->u.write.args.offset = ol_state->offset;
++	rc = panfs_export_ops->sam_write(PAN_SAM_ACCESS_NONE,
++					 &state->u.write.args,
++					 &state->obj_sec,
++					 state->sg_list,
++					 state->ucreds,
++					 ol_state->sync ?
++						NULL : panfs_shim_write_done,
++					 state,
++					 NULL,
++					 &state->u.write.res);
++	if (rc != PAN_ERR_IN_PROGRESS)
++		panfs_shim_write_done(state, NULL, &state->u.write.res, rc);
++ err:
++	dprintk("%s: Return %Zd\n", __func__, status);
++	return status;
++}
++
++int
++panfs_shim_register(struct panfs_export_operations *ops)
++{
++	if (panfs_export_ops) {
++		printk(KERN_INFO
++		       "%s: panfs already registered (panfs ops %p)\n",
++		       __func__, panfs_export_ops);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: registering panfs ops %p\n",
++	       __func__, ops);
++
++	panfs_export_ops = ops;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_register);
++
++int
++panfs_shim_unregister(void)
++{
++	if (!panfs_export_ops) {
++		printk(KERN_INFO "%s: panfs is not registered\n", __func__);
++		return -EINVAL;
++	}
++
++	printk(KERN_INFO "%s: unregistering panfs ops %p\n",
++	       __func__, panfs_export_ops);
++
++	panfs_export_ops = NULL;
++	return 0;
++}
++EXPORT_SYMBOL(panfs_shim_unregister);
++
++/*
++ * Policy Operations
++ */
++
++/*
++ * Return the stripe size for the specified file
++ */
++ssize_t
++panlayout_get_stripesize(struct pnfs_layout_hdr *pnfslay)
++{
++	ssize_t sz, maxsz = -1;
++	struct pnfs_layout_segment *lseg;
++
++	dprintk("%s: Begin\n", __func__);
++
++	list_for_each_entry(lseg, &pnfslay->segs, fi_list) {
++		int n;
++		struct objlayout_segment *panlseg = LSEG_LD_DATA(lseg);
++		struct pnfs_osd_layout *lo =
++			(struct pnfs_osd_layout *)panlseg->pnfs_osd_layout;
++		struct pnfs_osd_data_map *map = &lo->olo_map;
++
++		n = map->odm_group_width;
++		if (n == 0)
++			n = map->odm_num_comps / (map->odm_mirror_cnt + 1);
++
++		switch (map->odm_raid_algorithm) {
++		case PNFS_OSD_RAID_0:
++			break;
++
++		case PNFS_OSD_RAID_4:
++		case PNFS_OSD_RAID_5:
++			n -= 1;
++			n *= 8;	/* FIXME: until we have 2-D coalescing */
++			break;
++
++		case PNFS_OSD_RAID_PQ:
++			n -= 2;
++			break;
++
++		default:
++			BUG_ON(1);
++		}
++		sz = map->odm_stripe_unit * n;
++		if (sz > maxsz)
++			maxsz = sz;
++	}
++	dprintk("%s: Return %Zd\n", __func__, maxsz);
++	return maxsz;
++}
++
++#define PANLAYOUT_DEF_STRIPE_UNIT    (64*1024)
++#define PANLAYOUT_DEF_STRIPE_WIDTH   9
++#define PANLAYOUT_MAX_STRIPE_WIDTH   11
++#define PANLAYOUT_MAX_GATHER_STRIPES 8
++
++/*
++ * Get the max [rw]size
++ */
++static ssize_t
++panlayout_get_blocksize(void)
++{
++	ssize_t sz = (PANLAYOUT_MAX_STRIPE_WIDTH-1) *
++		      PANLAYOUT_DEF_STRIPE_UNIT *
++		      PANLAYOUT_MAX_GATHER_STRIPES;
++	dprintk("%s: Return %Zd\n", __func__, sz);
++	return sz;
++}
++
++static struct layoutdriver_policy_operations panlayout_policy_operations = {
++/*
++ * Don't gather across stripes, but rather gather (coalesce) up to
++ * the stripe size.
++ *
++ * FIXME: change interface to use merge_align, merge_count
++ */
++	.flags                 = PNFS_LAYOUTRET_ON_SETATTR,
++	.get_stripesize        = panlayout_get_stripesize,
++	.get_blocksize         = panlayout_get_blocksize,
++};
++
++#define PNFS_LAYOUT_PANOSD (NFS4_PNFS_PRIVATE_LAYOUT | LAYOUT_OSD2_OBJECTS)
++
++static struct pnfs_layoutdriver_type panlayout_type = {
++	.id = PNFS_LAYOUT_PANOSD,
++	.name = "PNFS_LAYOUT_PANOSD",
++	.ld_io_ops = &objlayout_io_operations,
++	.ld_policy_ops = &panlayout_policy_operations,
++};
++
++MODULE_DESCRIPTION("pNFS Layout Driver for Panasas OSDs");
++MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
++MODULE_LICENSE("GPL");
++
++static int __init
++panlayout_init(void)
++{
++	pnfs_client_ops = pnfs_register_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Registered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++	return 0;
++}
++
++static void __exit
++panlayout_exit(void)
++{
++	pnfs_unregister_layoutdriver(&panlayout_type);
++	printk(KERN_INFO "%s: Unregistered Panasas OSD pNFS Layout Driver\n",
++	       __func__);
++}
++
++module_init(panlayout_init);
++module_exit(panlayout_exit);
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h
+--- linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h.orig	2010-09-30 10:17:08.736995000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/panfs_shim.h	2010-09-30 10:17:08.738995000 -0400
+@@ -0,0 +1,482 @@
++/*
++ *  panfs_shim.h
++ *
++ *  Data types and external function declerations for interfacing with
++ *  panfs (Panasas DirectFlow) I/O stack
++ *
++ *  Copyright (C) 2007 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * See the file COPYING included with this distribution for more details.
++ *
++ */
++
++#ifndef _PANLAYOUT_PANFS_SHIM_H
++#define _PANLAYOUT_PANFS_SHIM_H
++
++typedef s8 pan_int8_t;
++typedef u8 pan_uint8_t;
++typedef s16 pan_int16_t;
++typedef u16 pan_uint16_t;
++typedef s32 pan_int32_t;
++typedef u32 pan_uint32_t;
++typedef s64 pan_int64_t;
++typedef u64 pan_uint64_t;
++
++/*
++ * from pan_base_types.h
++ */
++typedef  pan_uint64_t pan_rpc_none_t;
++typedef pan_uint32_t  pan_rpc_arrdim_t;
++typedef pan_uint32_t  pan_status_t;
++typedef pan_uint8_t   pan_otw_t;
++typedef pan_uint8_t   pan_pad_t;
++
++typedef pan_uint32_t  pan_timespec_sec_t;
++typedef pan_uint32_t  pan_timespec_nsec_t;
++
++typedef  struct pan_timespec_s  pan_timespec_t;
++struct pan_timespec_s {
++  pan_timespec_sec_t   ts_sec;
++  pan_timespec_nsec_t  ts_nsec;
++};
++
++/*
++ * from pan_std_types.h
++ */
++typedef pan_uint32_t pan_size_t;
++typedef  int  pan_bool_t;
++
++/*
++ * from pan_common_error.h
++ */
++#define PAN_SUCCESS                                         ((pan_status_t)0)
++#define PAN_ERR_IN_PROGRESS                                 ((pan_status_t)55)
++
++/*
++ * from pan_sg.h
++ */
++typedef struct pan_sg_entry_s pan_sg_entry_t;
++struct pan_sg_entry_s {
++  void                  *buffer;       /* pointer to memory */
++  pan_uint32_t           chunk_size;   /* size of each chunk (bytes) */
++  pan_sg_entry_t        *next;
++};
++
++/*
++ * from pan_storage.h
++ */
++typedef pan_uint64_t pan_stor_dev_id_t;
++typedef pan_uint32_t pan_stor_obj_grp_id_t;
++typedef pan_uint64_t pan_stor_obj_uniq_t;
++typedef pan_uint32_t pan_stor_action_t;
++typedef pan_uint8_t pan_stor_cap_key_t[20];
++
++typedef pan_uint8_t pan_stor_key_type_t;
++typedef pan_uint64_t pan_stor_len_t;
++typedef pan_int64_t pan_stor_delta_len_t;
++typedef pan_uint64_t pan_stor_offset_t;
++typedef pan_uint16_t pan_stor_op_t;
++
++typedef pan_uint16_t pan_stor_sec_level_t;
++
++struct pan_stor_obj_id_s {
++  pan_stor_dev_id_t      dev_id;
++  pan_stor_obj_uniq_t    obj_id;
++  pan_stor_obj_grp_id_t  grp_id;
++};
++
++typedef struct pan_stor_obj_id_s pan_stor_obj_id_t;
++
++#define PAN_STOR_OP_NONE ((pan_stor_op_t) 0U)
++#define PAN_STOR_OP_READ ((pan_stor_op_t) 8U)
++#define PAN_STOR_OP_WRITE ((pan_stor_op_t) 9U)
++#define PAN_STOR_OP_APPEND ((pan_stor_op_t) 10U)
++#define PAN_STOR_OP_GETATTR ((pan_stor_op_t) 11U)
++#define PAN_STOR_OP_SETATTR ((pan_stor_op_t) 12U)
++#define PAN_STOR_OP_FLUSH ((pan_stor_op_t) 13U)
++#define PAN_STOR_OP_CLEAR ((pan_stor_op_t) 14U)
++
++/*
++ * from pan_aggregation_map.h
++ */
++typedef pan_uint8_t pan_agg_type_t;
++typedef pan_uint64_t pan_agg_map_version_t;
++typedef pan_uint8_t pan_agg_obj_state_t;
++typedef pan_uint8_t pan_agg_comp_state_t;
++typedef pan_uint8_t pan_agg_comp_flag_t;
++
++#define PAN_AGG_OBJ_STATE_INVALID ((pan_agg_obj_state_t) 0x00)
++#define PAN_AGG_OBJ_STATE_NORMAL ((pan_agg_obj_state_t) 0x01)
++#define PAN_AGG_OBJ_STATE_DEGRADED ((pan_agg_obj_state_t) 0x02)
++#define PAN_AGG_OBJ_STATE_RECONSTRUCT ((pan_agg_obj_state_t) 0x03)
++#define PAN_AGG_OBJ_STATE_COPYBACK ((pan_agg_obj_state_t) 0x04)
++#define PAN_AGG_OBJ_STATE_UNAVAILABLE ((pan_agg_obj_state_t) 0x05)
++#define PAN_AGG_OBJ_STATE_CREATING ((pan_agg_obj_state_t) 0x06)
++#define PAN_AGG_OBJ_STATE_DELETED ((pan_agg_obj_state_t) 0x07)
++#define PAN_AGG_COMP_STATE_INVALID ((pan_agg_comp_state_t) 0x00)
++#define PAN_AGG_COMP_STATE_NORMAL ((pan_agg_comp_state_t) 0x01)
++#define PAN_AGG_COMP_STATE_UNAVAILABLE ((pan_agg_comp_state_t) 0x02)
++#define PAN_AGG_COMP_STATE_COPYBACK ((pan_agg_comp_state_t) 0x03)
++#define PAN_AGG_COMP_F_NONE ((pan_agg_comp_flag_t) 0x00)
++#define PAN_AGG_COMP_F_ATTR_STORING ((pan_agg_comp_flag_t) 0x01)
++#define PAN_AGG_COMP_F_OBJ_CORRUPT_OBS ((pan_agg_comp_flag_t) 0x02)
++#define PAN_AGG_COMP_F_TEMP ((pan_agg_comp_flag_t) 0x04)
++
++struct pan_aggregation_map_s {
++  pan_agg_map_version_t  version;
++  pan_agg_obj_state_t    avail_state;
++  pan_stor_obj_id_t      obj_id;
++};
++
++typedef struct pan_aggregation_map_s pan_aggregation_map_t;
++
++struct pan_agg_comp_obj_s {
++  pan_stor_dev_id_t     dev_id;
++  pan_agg_comp_state_t  avail_state;
++  pan_agg_comp_flag_t   comp_flags;
++};
++
++typedef struct pan_agg_comp_obj_s pan_agg_comp_obj_t;
++
++struct pan_agg_simple_header_s {
++  pan_uint8_t  unused;
++};
++
++typedef struct pan_agg_simple_header_s pan_agg_simple_header_t;
++
++struct pan_agg_raid1_header_s {
++  pan_uint16_t  num_comps;
++};
++
++typedef struct pan_agg_raid1_header_s pan_agg_raid1_header_t;
++
++struct pan_agg_raid0_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++};
++
++typedef struct pan_agg_raid0_header_s pan_agg_raid0_header_t;
++
++struct pan_agg_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit0;
++  pan_uint32_t  stripe_unit1;
++  pan_uint32_t  stripe_unit2;
++};
++
++typedef struct pan_agg_raid5_left_header_s pan_agg_raid5_left_header_t;
++
++typedef struct pan_agg_grp_raid5_left_header_s pan_agg_grp_raid5_left_header_t;
++
++struct pan_agg_grp_raid5_left_header_s {
++  pan_uint16_t  num_comps;
++  pan_uint32_t  stripe_unit;
++  pan_uint16_t  rg_width;
++  pan_uint16_t  rg_depth;
++  pan_uint8_t   group_layout_policy;
++};
++
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_INVALID ((pan_uint8_t) 0x00)
++#define PAN_AGG_GRP_RAID5_LEFT_POLICY_ROUND_ROBIN ((pan_uint8_t) 0x01)
++
++#define PAN_AGG_NULL_MAP ((pan_agg_type_t) 0x00)
++#define PAN_AGG_SIMPLE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_RAID1 ((pan_agg_type_t) 0x02)
++#define PAN_AGG_RAID0 ((pan_agg_type_t) 0x03)
++#define PAN_AGG_RAID5_LEFT ((pan_agg_type_t) 0x04)
++#define PAN_AGG_GRP_RAID5_LEFT ((pan_agg_type_t) 0x06)
++#define PAN_AGG_MINTYPE ((pan_agg_type_t) 0x01)
++#define PAN_AGG_MAXTYPE ((pan_agg_type_t) 0x06)
++
++struct pan_agg_layout_hdr_s {
++  pan_agg_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_uint64_t                        null;
++    pan_agg_simple_header_t             simple;
++    pan_agg_raid1_header_t              raid1;
++    pan_agg_raid0_header_t              raid0;
++    pan_agg_raid5_left_header_t         raid5_left;
++    pan_agg_grp_raid5_left_header_t     grp_raid5_left;
++  } hdr;
++};
++
++typedef struct pan_agg_layout_hdr_s pan_agg_layout_hdr_t;
++
++struct pan_agg_comp_obj_a_s {
++  pan_rpc_arrdim_t size;
++  pan_agg_comp_obj_t *data;
++};
++typedef struct pan_agg_comp_obj_a_s pan_agg_comp_obj_a;
++
++struct pan_agg_full_map_s {
++  pan_aggregation_map_t  map_hdr;
++  pan_agg_layout_hdr_t   layout_hdr;
++  pan_agg_comp_obj_a     components;
++};
++
++typedef struct pan_agg_full_map_s pan_agg_full_map_t;
++
++/*
++ * from pan_obsd_rpc_types.h
++ */
++typedef pan_uint8_t pan_obsd_security_key_a[16];
++
++typedef pan_uint8_t pan_obsd_capability_key_a[20];
++
++typedef pan_uint8_t pan_obsd_key_holder_id_t;
++
++#define PAN_OBSD_KEY_HOLDER_BASIS_KEY ((pan_obsd_key_holder_id_t) 0x01)
++#define PAN_OBSD_KEY_HOLDER_CAP_KEY ((pan_obsd_key_holder_id_t) 0x02)
++
++struct pan_obsd_key_holder_s {
++  pan_obsd_key_holder_id_t select;
++  pan_pad_t pad[3];
++  union {
++    pan_obsd_security_key_a    basis_key;
++    pan_obsd_capability_key_a  cap_key;
++  } key;
++};
++
++typedef struct pan_obsd_key_holder_s pan_obsd_key_holder_t;
++
++/*
++ * from pan_sm_sec.h
++ */
++typedef pan_uint8_t pan_sm_sec_type_t;
++typedef pan_uint8_t pan_sm_sec_otw_allo_mode_t;
++
++struct pan_obsd_capability_generic_otw_t_s {
++  pan_rpc_arrdim_t size;
++  pan_uint8_t *data;
++};
++typedef struct pan_obsd_capability_generic_otw_t_s
++				pan_obsd_capability_generic_otw_t;
++
++struct pan_sm_sec_obsd_s {
++  pan_obsd_key_holder_t              key;
++  pan_obsd_capability_generic_otw_t  cap_otw;
++  pan_sm_sec_otw_allo_mode_t         allo_mode;
++};
++
++typedef struct pan_sm_sec_obsd_s pan_sm_sec_obsd_t;
++
++struct pan_sm_sec_s {
++  pan_sm_sec_type_t type;
++  pan_pad_t pad[3];
++  union {
++    pan_rpc_none_t     none;
++    pan_sm_sec_obsd_t  obsd;
++  } variant;
++};
++
++typedef struct pan_sm_sec_s pan_sm_sec_t;
++
++struct pan_sm_sec_a_s {
++  pan_rpc_arrdim_t size;
++  pan_sm_sec_t *data;
++};
++typedef struct pan_sm_sec_a_s pan_sm_sec_a;
++typedef pan_otw_t *pan_sm_sec_otw_t;
++
++/*
++ * from pan_sm_types.h
++ */
++typedef pan_uint64_t pan_sm_cap_handle_t;
++
++struct pan_sm_map_cap_s {
++  pan_agg_full_map_t   full_map;
++  pan_stor_offset_t    offset;
++  pan_stor_len_t       length;
++  pan_sm_sec_a         secs;
++  pan_sm_cap_handle_t  handle;
++  pan_timespec_t       expiration_time;
++  pan_stor_action_t    action_mask;
++  pan_uint32_t         flags;
++};
++
++typedef struct pan_sm_map_cap_s pan_sm_map_cap_t;
++
++/*
++ * from pan_sm_ops.h
++ */
++typedef pan_rpc_none_t pan_sm_cache_ptr_t;
++
++/*
++ * from pan_sam_api.h
++ */
++typedef pan_uint32_t    pan_sam_access_flags_t;
++
++typedef struct pan_sam_dev_error_s  pan_sam_dev_error_t;
++struct pan_sam_dev_error_s {
++    pan_stor_dev_id_t       dev_id;
++    pan_stor_op_t           stor_op;
++    pan_status_t            error;
++};
++
++typedef struct pan_sam_ext_status_s pan_sam_ext_status_t;
++struct pan_sam_ext_status_s {
++    pan_uint32_t        available;
++    pan_uint32_t        size;
++    pan_sam_dev_error_t *errors;
++};
++
++enum pan_sam_rpc_sec_sel_e {
++    PAN_SAM_RPC_SEC_DEFAULT,
++    PAN_SAM_RPC_SEC_ATLEAST,
++    PAN_SAM_RPC_SEC_EXACTLY
++};
++typedef enum pan_sam_rpc_sec_sel_e pan_sam_rpc_sec_sel_t;
++
++typedef struct pan_sam_obj_sec_s pan_sam_obj_sec_t;
++struct pan_sam_obj_sec_s {
++    pan_stor_sec_level_t    min_security;
++    pan_sm_map_cap_t        *map_ccaps;
++};
++
++typedef struct  pan_sam_rpc_sec_s   pan_sam_rpc_sec_t;
++struct pan_sam_rpc_sec_s {
++    pan_sam_rpc_sec_sel_t   selector;
++};
++
++typedef struct pan_sam_read_args_s pan_sam_read_args_t;
++struct pan_sam_read_args_s {
++    pan_stor_obj_id_t                obj_id;
++    pan_sm_cache_ptr_t               obj_ent;
++    void                            *return_attr;
++    void                            *checksum;
++    pan_stor_offset_t                offset;
++    pan_uint16_t                     sm_options;
++    void                            *callout;
++    void                            *callout_arg;
++};
++
++typedef struct pan_sam_read_res_s pan_sam_read_res_t;
++struct pan_sam_read_res_s {
++    pan_status_t             result;
++    pan_sam_ext_status_t     ext_status;
++    pan_stor_len_t           length;
++    void                    *attr;
++    void                    *checksum;
++};
++
++typedef void (*pan_sam_read_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_read_res_t  *res_p,
++    pan_status_t        status);
++
++#define PAN_SAM_ACCESS_NONE                             0x0000
++#define PAN_SAM_ACCESS_BYPASS_TIMESTAMP                 0x0020
++
++typedef struct pan_sam_write_args_s pan_sam_write_args_t;
++struct pan_sam_write_args_s {
++    pan_stor_obj_id_t   obj_id;
++    pan_sm_cache_ptr_t  obj_ent;
++    pan_stor_offset_t   offset;
++    void                *attr;
++    void                *return_attr;
++};
++
++typedef struct pan_sam_write_res_s pan_sam_write_res_t;
++struct pan_sam_write_res_s {
++    pan_status_t            result;
++    pan_sam_ext_status_t    ext_status;
++    pan_stor_len_t          length;
++    pan_stor_delta_len_t    delta_capacity_used;
++    pan_bool_t              parity_dirty;
++    void                   *attr;
++};
++
++typedef void (*pan_sam_write_cb_t)(
++    void                *user_arg1,
++    void                *user_arg2,
++    pan_sam_write_res_t *res_p,
++    pan_status_t        status);
++
++/*
++ * from pan_mgr_types.h
++ */
++#define PAN_MGR_ID_TYPE_SHIFT 56
++#define PAN_MGR_ID_TYPE_MASK ((pan_mgr_id_t)18374686479671623680ULL)
++#define PAN_MGR_ID_UNIQ_MASK ((pan_mgr_id_t)72057594037927935ULL)
++
++typedef pan_uint16_t pan_mgr_type_t;
++typedef pan_uint64_t pan_mgr_id_t;
++
++#define PAN_MGR_SM ((pan_mgr_type_t) 2U)
++#define PAN_MGR_OBSD ((pan_mgr_type_t) 6U)
++
++/*
++ * from pan_mgr_types_c.h
++ */
++#define pan_mgr_id_construct_artificial(_mgr_type_, _mgr_uniq_, _mgr_id_p_) { \
++  pan_mgr_id_t  _id1, _id2; \
++\
++  _id1 = (_mgr_type_); \
++  _id1 <<= PAN_MGR_ID_TYPE_SHIFT; \
++  _id1 &= PAN_MGR_ID_TYPE_MASK; \
++  _id2 = (_mgr_uniq_); \
++  _id2 &= PAN_MGR_ID_UNIQ_MASK; \
++  _id1 |= _id2; \
++  *(_mgr_id_p_) = _id1; \
++}
++
++/*
++ * from pan_storage_c.h
++ */
++#define pan_stor_is_device_id_an_obsd_id(_device_id_) \
++    ((((_device_id_) & PAN_MGR_ID_TYPE_MASK) >> PAN_MGR_ID_TYPE_SHIFT) \
++	== PAN_MGR_OBSD)
++
++/*
++ * pnfs_shim internal definitions
++ */
++
++struct panfs_shim_io_state {
++	struct objlayout_io_state ol_state;
++
++	pan_sg_entry_t *sg_list;
++	pan_sam_obj_sec_t obj_sec;
++	void *ucreds;
++	union {
++		struct {
++			pan_sam_read_args_t args;
++			pan_sam_read_res_t res;
++		} read;
++		struct {
++			pan_sam_write_args_t args;
++			pan_sam_write_res_t res;
++		} write;
++	} u;
++};
++
++#endif /* _PANLAYOUT_PANFS_SHIM_H */
+diff -up linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+--- linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c.orig	2010-09-30 10:17:08.741996000 -0400
++++ linux-2.6.34.noarch/fs/nfs/objlayout/pnfs_osd_xdr_cli.c	2010-09-30 10:17:08.743002000 -0400
+@@ -0,0 +1,435 @@
++/*
++ *  pnfs_osd_xdr.c
++ *
++ *  Object-Based pNFS Layout XDR layer
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/pnfs_osd_xdr.h>
++
++#define NFSDBG_FACILITY         NFSDBG_PNFS_LD
++
++/*
++ * The following implementation is based on these Internet Drafts:
++ *
++ * draft-ietf-nfsv4-minorversion-21
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_objid(u32 *p, struct pnfs_osd_objid *objid)
++{
++	COPYMEM(objid->oid_device_id.data, sizeof(objid->oid_device_id.data));
++	READ64(objid->oid_partition_id);
++	READ64(objid->oid_object_id);
++	return p;
++}
++
++static inline u32 *
++pnfs_osd_xdr_decode_opaque_cred(u32 *p,
++				struct pnfs_osd_opaque_cred *opaque_cred)
++{
++	READ32(opaque_cred->cred_len);
++	COPYMEM(opaque_cred->cred, opaque_cred->cred_len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_object_cred {
++ * 	struct pnfs_osd_objid		oc_object_id;
++ * 	u32				oc_osd_version;
++ * 	u32				oc_cap_key_sec;
++ * 	struct pnfs_osd_opaque_cred	oc_cap_key
++ * 	struct pnfs_osd_opaque_cred	oc_cap;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_object_cred(u32 *p, struct pnfs_osd_object_cred *comp,
++				u8 **credp)
++{
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_objid(p, &comp->oc_object_id);
++	READ32(comp->oc_osd_version);
++	READ32(comp->oc_cap_key_sec);
++
++	cred = *credp;
++	comp->oc_cap_key.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap_key);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap_key.cred_len));
++	comp->oc_cap.cred = cred;
++	p = pnfs_osd_xdr_decode_opaque_cred(p, &comp->oc_cap);
++	cred = (u8 *)((u32 *)cred + XDR_QUADLEN(comp->oc_cap.cred_len));
++	*credp = cred;
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_data_map {
++ * 	u32	odm_num_comps;
++ * 	u64	odm_stripe_unit;
++ * 	u32	odm_group_width;
++ * 	u32	odm_group_depth;
++ * 	u32	odm_mirror_cnt;
++ * 	u32	odm_raid_algorithm;
++ * };
++ */
++static inline u32 *
++pnfs_osd_xdr_decode_data_map(u32 *p, struct pnfs_osd_data_map *data_map)
++{
++	READ32(data_map->odm_num_comps);
++	READ64(data_map->odm_stripe_unit);
++	READ32(data_map->odm_group_width);
++	READ32(data_map->odm_group_depth);
++	READ32(data_map->odm_mirror_cnt);
++	READ32(data_map->odm_raid_algorithm);
++	dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
++		"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
++		__func__,
++		data_map->odm_num_comps,
++		(unsigned long long)data_map->odm_stripe_unit,
++		data_map->odm_group_width,
++		data_map->odm_group_depth,
++		data_map->odm_mirror_cnt,
++		data_map->odm_raid_algorithm);
++	return p;
++}
++
++struct pnfs_osd_layout *
++pnfs_osd_xdr_decode_layout(struct pnfs_osd_layout *layout, u32 *p)
++{
++	int i;
++	u32 *start = p;
++	struct pnfs_osd_object_cred *comp;
++	u8 *cred;
++
++	p = pnfs_osd_xdr_decode_data_map(p, &layout->olo_map);
++	READ32(layout->olo_comps_index);
++	READ32(layout->olo_num_comps);
++	layout->olo_comps = (struct pnfs_osd_object_cred *)(layout + 1);
++	comp = layout->olo_comps;
++	cred = (u8 *)(comp + layout->olo_num_comps);
++	dprintk("%s: comps_index=%u num_comps=%u\n",
++		__func__, layout->olo_comps_index, layout->olo_num_comps);
++	for (i = 0; i < layout->olo_num_comps; i++) {
++		p = pnfs_osd_xdr_decode_object_cred(p, comp, &cred);
++		dprintk("%s: comp[%d]=dev(%llx:%llx) par=0x%llx obj=0x%llx "
++			"key_len=%u cap_len=%u\n",
++			__func__, i,
++			_DEVID_LO(&comp->oc_object_id.oid_device_id),
++			_DEVID_HI(&comp->oc_object_id.oid_device_id),
++			comp->oc_object_id.oid_partition_id,
++			comp->oc_object_id.oid_object_id,
++			comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
++		comp++;
++	}
++	dprintk("%s: xdr_size=%Zd end=%p in_core_size=%Zd\n", __func__,
++	       (char *)p - (char *)start, cred, (char *)cred - (char *)layout);
++	return layout;
++}
++
++/*
++ * Get Device Information Decoding
++ *
++ * Note: since Device Information is currently done synchronously, most
++ *       of the actual fields are left inside the rpc buffer and are only
++ *       pointed to by the pnfs_osd_deviceaddr members. So the read buffer
++ *       should not be freed while the returned information is in use.
++ */
++
++u32 *__xdr_read_calc_nfs4_string(
++	u32 *p, struct nfs4_string *str, u8 **freespace)
++{
++	u32 len;
++	char *data;
++	bool need_copy;
++
++	READ32(len);
++	data = (char *)p;
++
++	if (data[len]) { /* Not null terminated we'll need extra space */
++		data = *freespace;
++		*freespace += len + 1;
++		need_copy = true;
++	} else {
++		need_copy = false;
++	}
++
++	if (str) {
++		str->len = len;
++		str->data = data;
++		if (need_copy) {
++			memcpy(data, p, len);
++			data[len] = 0;
++		}
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++u32 *__xdr_read_calc_u8_opaque(
++	u32 *p, struct nfs4_string *str)
++{
++	u32 len;
++
++	READ32(len);
++
++	if (str) {
++		str->len = len;
++		str->data = (char *)p;
++	}
++
++	p += XDR_QUADLEN(len);
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetid {
++ * 	u32			oti_type;
++ * 	struct nfs4_string	oti_scsi_device_id;
++ * };
++ */
++u32 *__xdr_read_calc_targetid(
++	u32 *p, struct pnfs_osd_targetid* targetid, u8 **freespace)
++{
++	u32 oti_type;
++
++	READ32(oti_type);
++	if (targetid)
++		targetid->oti_type = oti_type;
++
++	switch (oti_type) {
++	case OBJ_TARGET_SCSI_NAME:
++	case OBJ_TARGET_SCSI_DEVICE_ID:
++		p = __xdr_read_calc_u8_opaque(p,
++			targetid ? &targetid->oti_scsi_device_id : NULL);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_net_addr {
++ * 	struct nfs4_string	r_netid;
++ * 	struct nfs4_string	r_addr;
++ * };
++ */
++u32 *__xdr_read_calc_net_addr(
++	u32 *p, struct pnfs_osd_net_addr* netaddr, u8 **freespace)
++{
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_netid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_nfs4_string(p,
++			netaddr ? &netaddr->r_addr : NULL,
++			freespace);
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_targetaddr {
++ * 	u32				ota_available;
++ * 	struct pnfs_osd_net_addr	ota_netaddr;
++ * };
++ */
++u32 *__xdr_read_calc_targetaddr(
++	u32 *p, struct pnfs_osd_targetaddr *targetaddr, u8 **freespace)
++{
++	u32 ota_available;
++
++	READ32(ota_available);
++	if (targetaddr)
++		targetaddr->ota_available = ota_available;
++
++	if (ota_available) {
++		p = __xdr_read_calc_net_addr(p,
++				targetaddr ? &targetaddr->ota_netaddr : NULL,
++				freespace);
++	}
++
++	return p;
++}
++
++/*
++ * struct pnfs_osd_deviceaddr {
++ * 	struct pnfs_osd_targetid	oda_targetid;
++ * 	struct pnfs_osd_targetaddr	oda_targetaddr;
++ * 	u8				oda_lun[8];
++ * 	struct nfs4_string		oda_systemid;
++ * 	struct pnfs_osd_object_cred	oda_root_obj_cred;
++ * 	struct nfs4_string		oda_osdname;
++ * };
++ */
++u32 *__xdr_read_calc_deviceaddr(
++	u32 *p, struct pnfs_osd_deviceaddr *deviceaddr, u8 **freespace)
++{
++	p = __xdr_read_calc_targetid(p,
++			deviceaddr ? &deviceaddr->oda_targetid : NULL,
++			freespace);
++
++	p = __xdr_read_calc_targetaddr(p,
++			deviceaddr ? &deviceaddr->oda_targetaddr : NULL,
++			freespace);
++
++	if (deviceaddr)
++		COPYMEM(deviceaddr->oda_lun, sizeof(deviceaddr->oda_lun));
++	else
++		p += XDR_QUADLEN(sizeof(deviceaddr->oda_lun));
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_systemid : NULL);
++
++	if (deviceaddr) {
++		p = pnfs_osd_xdr_decode_object_cred(p,
++				&deviceaddr->oda_root_obj_cred, freespace);
++	} else {
++		*freespace += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++
++	p = __xdr_read_calc_u8_opaque(p,
++			deviceaddr ? &deviceaddr->oda_osdname : NULL);
++
++	return p;
++}
++
++size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p)
++{
++	u8 *null_freespace = NULL;
++	size_t sz;
++
++	__xdr_read_calc_deviceaddr(p, NULL, &null_freespace);
++	sz = sizeof(struct pnfs_osd_deviceaddr) + (size_t)null_freespace;
++
++	return sz;
++}
++
++void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p)
++{
++	u8 *freespace = (u8 *)(deviceaddr + 1);
++
++	__xdr_read_calc_deviceaddr(p, deviceaddr, &freespace);
++}
++
++/*
++ * struct pnfs_osd_layoutupdate {
++ * 	u32	dsu_valid;
++ * 	s64	dsu_delta;
++ * 	u32	olu_ioerr_flag;
++ * };
++ */
++int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou)
++{
++	__be32 *p = xdr_reserve_space(xdr, 16);
++
++	if (!p)
++		return -E2BIG;
++
++	*p++ = cpu_to_be32(lou->dsu_valid);
++	if (lou->dsu_valid)
++		p = xdr_encode_hyper(p, lou->dsu_delta);
++	*p++ = cpu_to_be32(lou->olu_ioerr_flag);
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_objid {
++ * 	struct pnfs_deviceid	oid_device_id;
++ * 	u64			oid_partition_id;
++ * 	u64			oid_object_id;
++ */
++static inline int pnfs_osd_xdr_encode_objid(struct xdr_stream *xdr,
++					    struct pnfs_osd_objid *object_id)
++{
++	__be32 *p;
++
++	p = xdr_reserve_space(xdr, 32);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
++				    sizeof(object_id->oid_device_id.data));
++	p = xdr_encode_hyper(p, object_id->oid_partition_id);
++	p = xdr_encode_hyper(p, object_id->oid_object_id);
++
++	return 0;
++}
++
++/*
++ * struct pnfs_osd_ioerr {
++ * 	struct pnfs_osd_objid	oer_component;
++ * 	u64			oer_comp_offset;
++ * 	u64			oer_comp_length;
++ * 	u32			oer_iswrite;
++ * 	u32			oer_errno;
++ * };
++ */
++int pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr,
++			      struct pnfs_osd_ioerr *ioerr)
++{
++	__be32 *p;
++	int ret;
++
++	ret = pnfs_osd_xdr_encode_objid(xdr, &ioerr->oer_component);
++	if (ret)
++		return ret;
++
++	p = xdr_reserve_space(xdr, 24);
++	if (!p)
++		return -E2BIG;
++
++	p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
++	p = xdr_encode_hyper(p, ioerr->oer_comp_length);
++	*p++ = cpu_to_be32(ioerr->oer_iswrite);
++	*p   = cpu_to_be32(ioerr->oer_errno);
++
++	return 0;
++}
+diff -up linux-2.6.34.noarch/fs/nfs/pagelist.c.orig linux-2.6.34.noarch/fs/nfs/pagelist.c
+--- linux-2.6.34.noarch/fs/nfs/pagelist.c.orig	2010-09-30 10:15:17.899715000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pagelist.c	2010-09-30 10:17:08.748995000 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ 
+ #include "internal.h"
++#include "pnfs.h"
+ 
+ static struct kmem_cache *nfs_page_cachep;
+ 
+@@ -56,7 +57,8 @@ nfs_page_free(struct nfs_page *p)
+ struct nfs_page *
+ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
+ 		   struct page *page,
+-		   unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page		*req;
+ 
+@@ -79,7 +81,11 @@ nfs_create_request(struct nfs_open_conte
+ 	req->wb_pgbase	= offset;
+ 	req->wb_bytes   = count;
+ 	req->wb_context = get_nfs_open_context(ctx);
++	req->wb_lock_context = nfs_get_lock_context(ctx);
+ 	kref_init(&req->wb_kref);
++	req->wb_lseg    = lseg;
++	if (lseg)
++		get_lseg(lseg);
+ 	return req;
+ }
+ 
+@@ -141,18 +147,26 @@ void nfs_clear_request(struct nfs_page *
+ {
+ 	struct page *page = req->wb_page;
+ 	struct nfs_open_context *ctx = req->wb_context;
++	struct nfs_lock_context *l_ctx = req->wb_lock_context;
+ 
+ 	if (page != NULL) {
+ 		page_cache_release(page);
+ 		req->wb_page = NULL;
+ 	}
++	if (l_ctx != NULL) {
++		nfs_put_lock_context(l_ctx);
++		req->wb_lock_context = NULL;
++	}
+ 	if (ctx != NULL) {
+ 		put_nfs_open_context(ctx);
+ 		req->wb_context = NULL;
+ 	}
++	if (req->wb_lseg != NULL) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
+ }
+ 
+-
+ /**
+  * nfs_release_request - Release the count on an NFS read/write request
+  * @req: request to release
+@@ -231,11 +245,12 @@ void nfs_pageio_init(struct nfs_pageio_d
+  * Return 'true' if this is the case, else return 'false'.
+  */
+ static int nfs_can_coalesce_requests(struct nfs_page *prev,
+-				     struct nfs_page *req)
++				     struct nfs_page *req,
++				     struct nfs_pageio_descriptor *pgio)
+ {
+ 	if (req->wb_context->cred != prev->wb_context->cred)
+ 		return 0;
+-	if (req->wb_context->lockowner != prev->wb_context->lockowner)
++	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
+ 		return 0;
+ 	if (req->wb_context->state != prev->wb_context->state)
+ 		return 0;
+@@ -245,6 +260,12 @@ static int nfs_can_coalesce_requests(str
+ 		return 0;
+ 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ 		return 0;
++	if (req->wb_lseg != prev->wb_lseg)
++		return 0;
++#ifdef CONFIG_NFS_V4_1
++	if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
++		return 0;
++#endif /* CONFIG_NFS_V4_1 */
+ 	return 1;
+ }
+ 
+@@ -277,7 +298,7 @@ static int nfs_pageio_do_add_request(str
+ 		if (newlen > desc->pg_bsize)
+ 			return 0;
+ 		prev = nfs_list_entry(desc->pg_list.prev);
+-		if (!nfs_can_coalesce_requests(prev, req))
++		if (!nfs_can_coalesce_requests(prev, req, desc))
+ 			return 0;
+ 	} else
+ 		desc->pg_base = req->wb_pgbase;
+@@ -366,6 +387,7 @@ void nfs_pageio_cond_complete(struct nfs
+  * @idx_start: lower bound of page->index to scan
+  * @npages: idx_start + npages sets the upper bound to scan.
+  * @tag: tag to scan for
++ * @use_pnfs: will be set TRUE if commit needs to be handled by layout driver
+  *
+  * Moves elements from one of the inode request lists.
+  * If the number of requests is set to 0, the entire address_space
+@@ -375,7 +397,7 @@ void nfs_pageio_cond_complete(struct nfs
+  */
+ int nfs_scan_list(struct nfs_inode *nfsi,
+ 		struct list_head *dst, pgoff_t idx_start,
+-		unsigned int npages, int tag)
++		  unsigned int npages, int tag, int *use_pnfs)
+ {
+ 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ 	struct nfs_page *req;
+@@ -406,6 +428,8 @@ int nfs_scan_list(struct nfs_inode *nfsi
+ 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+ 						req->wb_index, tag);
+ 				nfs_list_add_request(req, dst);
++				if (req->wb_lseg)
++					*use_pnfs = 1;
+ 				res++;
+ 				if (res == INT_MAX)
+ 					goto out;
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.c.orig linux-2.6.34.noarch/fs/nfs/pnfs.c
+--- linux-2.6.34.noarch/fs/nfs/pnfs.c.orig	2010-09-30 10:17:08.752997000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.c	2010-09-30 10:17:08.754995000 -0400
+@@ -0,0 +1,2039 @@
++/*
++ *  linux/fs/nfs/pnfs.c
++ *
++ *  pNFS functions to call and manage layout drivers.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand <dhildebz@eecs.umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/smp_lock.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_mount.h>
++#include <linux/nfs_page.h>
++#include <linux/nfs4.h>
++#include <linux/nfs4_pnfs.h>
++#include <linux/rculist.h>
++
++#include "internal.h"
++#include "nfs4_fs.h"
++#include "pnfs.h"
++
++#define NFSDBG_FACILITY		NFSDBG_PNFS
++
++#define MIN_POOL_LC		(4)
++
++static int pnfs_initialized;
++
++static void pnfs_free_layout(struct pnfs_layout_hdr *lo,
++			     struct pnfs_layout_range *range);
++static inline void get_layout(struct pnfs_layout_hdr *lo);
++
++/* Locking:
++ *
++ * pnfs_spinlock:
++ * 	protects pnfs_modules_tbl.
++ */
++static spinlock_t pnfs_spinlock = __SPIN_LOCK_UNLOCKED(pnfs_spinlock);
++
++/*
++ * pnfs_modules_tbl holds all pnfs modules
++ */
++static struct list_head	pnfs_modules_tbl;
++static struct kmem_cache *pnfs_cachep;
++static mempool_t *pnfs_layoutcommit_mempool;
++
++static inline struct nfs4_layoutcommit_data *pnfs_layoutcommit_alloc(void)
++{
++	struct nfs4_layoutcommit_data *p =
++			mempool_alloc(pnfs_layoutcommit_mempool, GFP_NOFS);
++	if (p)
++		memset(p, 0, sizeof(*p));
++
++	return p;
++}
++
++void pnfs_layoutcommit_free(struct nfs4_layoutcommit_data *p)
++{
++	mempool_free(p, pnfs_layoutcommit_mempool);
++}
++
++/*
++ * struct pnfs_module - One per pNFS device module.
++ */
++struct pnfs_module {
++	struct pnfs_layoutdriver_type *pnfs_ld_type;
++	struct list_head        pnfs_tblid;
++};
++
++int
++pnfs_initialize(void)
++{
++	INIT_LIST_HEAD(&pnfs_modules_tbl);
++
++	pnfs_cachep = kmem_cache_create("nfs4_layoutcommit_data",
++					sizeof(struct nfs4_layoutcommit_data),
++					0, SLAB_HWCACHE_ALIGN, NULL);
++	if (pnfs_cachep == NULL)
++		return -ENOMEM;
++
++	pnfs_layoutcommit_mempool = mempool_create(MIN_POOL_LC,
++						   mempool_alloc_slab,
++						   mempool_free_slab,
++						   pnfs_cachep);
++	if (pnfs_layoutcommit_mempool == NULL) {
++		kmem_cache_destroy(pnfs_cachep);
++		return -ENOMEM;
++	}
++
++	pnfs_initialized = 1;
++	return 0;
++}
++
++void pnfs_uninitialize(void)
++{
++	mempool_destroy(pnfs_layoutcommit_mempool);
++	kmem_cache_destroy(pnfs_cachep);
++}
++
++/* search pnfs_modules_tbl for right pnfs module */
++static int
++find_pnfs(u32 id, struct pnfs_module **module) {
++	struct  pnfs_module *local = NULL;
++
++	dprintk("PNFS: %s: Searching for %u\n", __func__, id);
++	list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) {
++		if (local->pnfs_ld_type->id == id) {
++			*module = local;
++			return(1);
++		}
++	}
++	return 0;
++}
++
++/* Set cred to indicate we require a layoutcommit
++ * If we don't even have a layout, we don't need to commit it.
++ */
++void
++pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
++{
++	dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (has_layout(nfsi) &&
++	    !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state)) {
++		nfsi->layout->cred = get_rpccred(ctx->state->owner->so_cred);
++		__set_bit(NFS_INO_LAYOUTCOMMIT,
++			  &nfsi->layout->state);
++		nfsi->change_attr++;
++		spin_unlock(&nfsi->vfs_inode.i_lock);
++		dprintk("%s: Set layoutcommit\n", __func__);
++		return;
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Update last_write_offset for layoutcommit.
++ * TODO: We should only use commited extents, but the current nfs
++ * implementation does not calculate the written range in nfs_commit_done.
++ * We therefore update this field in writeback_done.
++ */
++void
++pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
++{
++	loff_t end_pos;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (offset < nfsi->layout->write_begin_pos)
++		nfsi->layout->write_begin_pos = offset;
++	end_pos = offset + extent - 1; /* I'm being inclusive */
++	if (end_pos > nfsi->layout->write_end_pos)
++		nfsi->layout->write_end_pos = end_pos;
++	dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
++		__func__,
++		(unsigned long) extent,
++		(unsigned long) offset ,
++		(unsigned long) nfsi->layout->write_begin_pos,
++		(unsigned long) nfsi->layout->write_end_pos);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/* Unitialize a mountpoint in a layout driver */
++void
++unmount_pnfs_layoutdriver(struct nfs_server *nfss)
++{
++	if (PNFS_EXISTS_LDIO_OP(nfss, uninitialize_mountpoint))
++		nfss->pnfs_curr_ld->ld_io_ops->uninitialize_mountpoint(nfss);
++}
++
++/*
++ * Set the server pnfs module to the first registered pnfs_type.
++ * Only one pNFS layout driver is supported.
++ */
++void
++set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
++		      u32 id)
++{
++	struct pnfs_module *mod = NULL;
++
++	if (server->pnfs_curr_ld)
++		return;
++
++	if (!find_pnfs(id, &mod)) {
++		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
++		find_pnfs(id, &mod);
++	}
++
++	if (!mod) {
++		dprintk("%s: No pNFS module found for %u. ", __func__, id);
++		goto out_err;
++	}
++
++	server->pnfs_curr_ld = mod->pnfs_ld_type;
++	if (mod->pnfs_ld_type->ld_io_ops->initialize_mountpoint(
++							server, mntfh)) {
++		printk(KERN_ERR "%s: Error initializing mount point "
++		       "for layout driver %u. ", __func__, id);
++		goto out_err;
++	}
++
++	dprintk("%s: pNFS module for %u set\n", __func__, id);
++	return;
++
++out_err:
++	dprintk("Using NFSv4 I/O\n");
++	server->pnfs_curr_ld = NULL;
++}
++
++/* Allow I/O module to set its functions structure */
++struct pnfs_client_operations*
++pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++	struct layoutdriver_io_operations *io_ops = ld_type->ld_io_ops;
++
++	if (!pnfs_initialized) {
++		printk(KERN_ERR "%s Registration failure. "
++		       "pNFS not initialized.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops || !io_ops->alloc_layout || !io_ops->free_layout) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_layout and free_layout.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->alloc_lseg || !io_ops->free_lseg) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "alloc_lseg and free_lseg.\n", __func__);
++		return NULL;
++	}
++
++	if (!io_ops->read_pagelist || !io_ops->write_pagelist ||
++	    !io_ops->commit) {
++		printk(KERN_ERR "%s Layout driver must provide "
++		       "read_pagelist, write_pagelist, and commit.\n",
++		       __func__);
++		return NULL;
++	}
++
++	pnfs_mod = kmalloc(sizeof(struct pnfs_module), GFP_KERNEL);
++	if (pnfs_mod != NULL) {
++		dprintk("%s Registering id:%u name:%s\n",
++			__func__,
++			ld_type->id,
++			ld_type->name);
++		pnfs_mod->pnfs_ld_type = ld_type;
++		INIT_LIST_HEAD(&pnfs_mod->pnfs_tblid);
++
++		spin_lock(&pnfs_spinlock);
++		list_add(&pnfs_mod->pnfs_tblid, &pnfs_modules_tbl);
++		spin_unlock(&pnfs_spinlock);
++	}
++
++	return &pnfs_ops;
++}
++
++/*  Allow I/O module to set its functions structure */
++void
++pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
++{
++	struct pnfs_module *pnfs_mod;
++
++	if (find_pnfs(ld_type->id, &pnfs_mod)) {
++		dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
++		spin_lock(&pnfs_spinlock);
++		list_del(&pnfs_mod->pnfs_tblid);
++		spin_unlock(&pnfs_spinlock);
++		kfree(pnfs_mod);
++	}
++}
++
++/*
++ * pNFS client layout cache
++ */
++#if defined(CONFIG_SMP)
++#define BUG_ON_UNLOCKED_INO(ino) \
++	BUG_ON(!spin_is_locked(&ino->i_lock))
++#define BUG_ON_UNLOCKED_LO(lo) \
++	BUG_ON_UNLOCKED_INO(PNFS_INODE(lo))
++#else /* CONFIG_SMP */
++#define BUG_ON_UNLOCKED_INO(lo) do {} while (0)
++#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
++#endif /* CONFIG_SMP */
++
++static inline void
++get_layout(struct pnfs_layout_hdr *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	lo->refcount++;
++}
++
++static inline void
++put_layout_locked(struct pnfs_layout_hdr *lo)
++{
++	BUG_ON_UNLOCKED_LO(lo);
++	BUG_ON(lo->refcount <= 0);
++
++	lo->refcount--;
++	if (!lo->refcount) {
++		struct layoutdriver_io_operations *io_ops = PNFS_LD_IO_OPS(lo);
++		struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++		dprintk("%s: freeing layout cache %p\n", __func__, lo);
++		WARN_ON(!list_empty(&lo->layouts));
++		io_ops->free_layout(lo);
++		nfsi->layout = NULL;
++	}
++}
++
++void
++put_layout(struct inode *inode)
++{
++	spin_lock(&inode->i_lock);
++	put_layout_locked(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++
++}
++
++void
++pnfs_layout_release(struct pnfs_layout_hdr *lo,
++		    struct pnfs_layout_range *range)
++{
++	struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	if (range)
++		pnfs_free_layout(lo, range);
++	/*
++	 * Matched in _pnfs_update_layout for layoutget
++	 * and by get_layout in _pnfs_return_layout for layoutreturn
++	 */
++	put_layout_locked(lo);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	wake_up_all(&nfsi->lo_waitq);
++}
++
++void
++pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layout_range range = {
++		.iomode = IOMODE_ANY,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	lo = nfsi->layout;
++	if (lo) {
++		pnfs_free_layout(lo, &range);
++		WARN_ON(!list_empty(&nfsi->layout->segs));
++		WARN_ON(!list_empty(&nfsi->layout->layouts));
++
++		if (nfsi->layout->refcount != 1)
++			printk(KERN_WARNING "%s: layout refcount not=1 %d\n",
++				__func__, nfsi->layout->refcount);
++		WARN_ON(nfsi->layout->refcount != 1);
++
++		/* Matched by refcount set to 1 in alloc_init_layout */
++		put_layout_locked(lo);
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++}
++
++/*
++ * Called by the state manger to remove all layouts established under an
++ * expired lease.
++ */
++void
++pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++	struct pnfs_layout_hdr *lo;
++
++	while (!list_empty(&clp->cl_layouts)) {
++		lo = list_entry(clp->cl_layouts.next, struct pnfs_layout_hdr,
++				layouts);
++		dprintk("%s freeing layout for inode %lu\n", __func__,
++			lo->inode->i_ino);
++		pnfs_destroy_layout(NFS_I(lo->inode));
++	}
++}
++
++static inline void
++init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
++{
++	INIT_LIST_HEAD(&lseg->fi_list);
++	kref_init(&lseg->kref);
++	lseg->valid = true;
++	lseg->layout = lo;
++}
++
++static void
++destroy_lseg(struct kref *kref)
++{
++	struct pnfs_layout_segment *lseg =
++		container_of(kref, struct pnfs_layout_segment, kref);
++
++	dprintk("--> %s\n", __func__);
++	/* Matched by get_layout in pnfs_insert_layout */
++	put_layout_locked(lseg->layout);
++	PNFS_LD_IO_OPS(lseg->layout)->free_lseg(lseg);
++}
++
++static void
++put_lseg_locked(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	kref_put(&lseg->kref, destroy_lseg);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++
++void
++put_lseg(struct pnfs_layout_segment *lseg)
++{
++	bool do_wake_up;
++	struct nfs_inode *nfsi;
++
++	if (!lseg)
++		return;
++
++	dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
++		atomic_read(&lseg->kref.refcount), lseg->valid);
++	do_wake_up = !lseg->valid;
++	nfsi = PNFS_NFS_INODE(lseg->layout);
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	kref_put(&lseg->kref, destroy_lseg);
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	if (do_wake_up)
++		wake_up(&nfsi->lo_waitq);
++}
++EXPORT_SYMBOL(put_lseg);
++
++void get_lseg(struct pnfs_layout_segment *lseg)
++{
++	kref_get(&lseg->kref);
++}
++EXPORT_SYMBOL(get_lseg);
++
++static inline u64
++end_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	end = start + len;
++	return end >= start ? end: NFS4_MAX_UINT64;
++}
++
++/* last octet in a range */
++static inline u64
++last_byte_offset(u64 start, u64 len)
++{
++	u64 end;
++
++	BUG_ON(!len);
++	end = start + len;
++	return end > start ? end - 1: NFS4_MAX_UINT64;
++}
++
++/*
++ * is l2 fully contained in l1?
++ *   start1                             end1
++ *   [----------------------------------)
++ *           start2           end2
++ *           [----------------)
++ */
++static inline int
++lo_seg_contained(struct pnfs_layout_range *l1,
++		 struct pnfs_layout_range *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (start1 <= start2) && (end1 >= end2);
++}
++
++/*
++ * is l1 and l2 intersecting?
++ *   start1                             end1
++ *   [----------------------------------)
++ *                              start2           end2
++ *                              [----------------)
++ */
++static inline int
++lo_seg_intersecting(struct pnfs_layout_range *l1,
++		    struct pnfs_layout_range *l2)
++{
++	u64 start1 = l1->offset;
++	u64 end1 = end_offset(start1, l1->length);
++	u64 start2 = l2->offset;
++	u64 end2 = end_offset(start2, l2->length);
++
++	return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
++	       (end2 == NFS4_MAX_UINT64 || end2 > start1);
++}
++
++void
++pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
++			const nfs4_stateid *stateid)
++{
++	write_seqlock(&lo->seqlock);
++	memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
++	write_sequnlock(&lo->seqlock);
++}
++
++void
++pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	do {
++		seq = read_seqbegin(&lo->seqlock);
++		memcpy(dst->u.data, lo->stateid.u.data,
++		       sizeof(lo->stateid.u.data));
++	} while (read_seqretry(&lo->seqlock, seq));
++
++	dprintk("<-- %s\n", __func__);
++}
++
++static void
++pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
++			      struct nfs4_state *state)
++{
++	int seq;
++
++	dprintk("--> %s\n", __func__);
++
++	write_seqlock(&lo->seqlock);
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE))
++		do {
++			seq = read_seqbegin(&state->seqlock);
++			memcpy(lo->stateid.u.data, state->stateid.u.data,
++					sizeof(state->stateid.u.data));
++		} while (read_seqretry(&state->seqlock, seq));
++	write_sequnlock(&lo->seqlock);
++	dprintk("<-- %s\n", __func__);
++}
++
++/*
++* Get layout from server.
++*    for now, assume that whole file layouts are requested.
++*    arg->offset: 0
++*    arg->length: all ones
++*/
++static int
++send_layoutget(struct inode *ino,
++	   struct nfs_open_context *ctx,
++	   struct pnfs_layout_range *range,
++	   struct pnfs_layout_segment **lsegpp,
++	   struct pnfs_layout_hdr *lo)
++{
++	int status;
++	struct nfs_server *server = NFS_SERVER(ino);
++	struct nfs4_layoutget *lgp;
++
++	dprintk("--> %s\n", __func__);
++
++	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
++	if (lgp == NULL) {
++		pnfs_layout_release(lo, NULL);
++		return -ENOMEM;
++	}
++	lgp->args.minlength = NFS4_MAX_UINT64;
++	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
++	lgp->args.range.iomode = range->iomode;
++	lgp->args.range.offset = 0;
++	lgp->args.range.length = NFS4_MAX_UINT64;
++	lgp->args.type = server->pnfs_curr_ld->id;
++	lgp->args.inode = ino;
++	lgp->lsegpp = lsegpp;
++
++	if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
++		struct nfs_open_context *oldctx = ctx;
++
++		if (!oldctx) {
++			ctx = nfs_find_open_context(ino, NULL,
++					(range->iomode == IOMODE_READ) ?
++					FMODE_READ: FMODE_WRITE);
++			BUG_ON(!ctx);
++		}
++		/* Set the layout stateid from the open stateid */
++		pnfs_layout_from_open_stateid(NFS_I(ino)->layout, ctx->state);
++		if (!oldctx)
++			put_nfs_open_context(ctx);
++	}
++
++	/* Retrieve layout information from server */
++	status = nfs4_proc_layoutget(lgp);
++
++	dprintk("<-- %s status %d\n", __func__, status);
++	return status;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	false
++ */
++static inline int
++should_free_lseg(struct pnfs_layout_segment *lseg,
++		   struct pnfs_layout_range *range)
++{
++	return (range->iomode == IOMODE_ANY ||
++		lseg->range.iomode == range->iomode) &&
++	       lo_seg_intersecting(&lseg->range, range);
++}
++
++static struct pnfs_layout_segment *
++has_layout_to_return(struct pnfs_layout_hdr *lo,
++		     struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *out = NULL, *lseg;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list)
++		if (should_free_lseg(lseg, range)) {
++			out = lseg;
++			break;
++		}
++
++	dprintk("%s:Return lseg=%p\n", __func__, out);
++	return out;
++}
++
++static inline bool
++_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
++{
++	return atomic_read(&lseg->kref.refcount) == 1;
++}
++
++
++static void
++pnfs_free_layout(struct pnfs_layout_hdr *lo,
++		 struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *lseg, *next;
++	dprintk("%s:Begin lo %p offset %llu length %llu iomode %d\n",
++		__func__, lo, range->offset, range->length, range->iomode);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
++		if (!should_free_lseg(lseg, range) ||
++		    !_pnfs_can_return_lseg(lseg))
++			continue;
++		dprintk("%s: freeing lseg %p iomode %d "
++			"offset %llu length %llu\n", __func__,
++			lseg, lseg->range.iomode, lseg->range.offset,
++			lseg->range.length);
++		list_del(&lseg->fi_list);
++		put_lseg_locked(lseg);
++	}
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp;
++
++		clp = PNFS_NFS_SERVER(lo)->nfs_client;
++		spin_lock(&clp->cl_lock);
++		list_del_init(&lo->layouts);
++		spin_unlock(&clp->cl_lock);
++		pnfs_set_layout_stateid(lo, &zero_stateid);
++	}
++
++	dprintk("%s:Return\n", __func__);
++}
++
++static bool
++pnfs_return_layout_barrier(struct nfs_inode *nfsi,
++			   struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_segment *lseg;
++	bool ret = false;
++
++	spin_lock(&nfsi->vfs_inode.i_lock);
++	list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
++		if (!should_free_lseg(lseg, range))
++			continue;
++		lseg->valid = false;
++		if (!_pnfs_can_return_lseg(lseg)) {
++			dprintk("%s: wait on lseg %p refcount %d\n",
++				__func__, lseg,
++				atomic_read(&lseg->kref.refcount));
++			ret = true;
++		}
++	}
++	spin_unlock(&nfsi->vfs_inode.i_lock);
++	dprintk("%s:Return %d\n", __func__, ret);
++	return ret;
++}
++
++static int
++return_layout(struct inode *ino, struct pnfs_layout_range *range,
++	      enum pnfs_layoutreturn_type type, struct pnfs_layout_hdr *lo,
++	      bool wait)
++{
++	struct nfs4_layoutreturn *lrp;
++	struct nfs_server *server = NFS_SERVER(ino);
++	int status = -ENOMEM;
++
++	dprintk("--> %s\n", __func__);
++
++	BUG_ON(type != RETURN_FILE);
++
++	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
++	if (lrp == NULL) {
++		if (lo && (type == RETURN_FILE))
++			pnfs_layout_release(lo, NULL);
++		goto out;
++	}
++	lrp->args.reclaim = 0;
++	lrp->args.layout_type = server->pnfs_curr_ld->id;
++	lrp->args.return_type = type;
++	lrp->args.range = *range;
++	lrp->args.inode = ino;
++
++	status = nfs4_proc_layoutreturn(lrp, wait);
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++}
++
++int
++_pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
++		    const nfs4_stateid *stateid, /* optional */
++		    enum pnfs_layoutreturn_type type,
++		    bool wait)
++{
++	struct pnfs_layout_hdr *lo = NULL;
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_range arg;
++	int status = 0;
++
++	dprintk("--> %s type %d\n", __func__, type);
++
++
++	arg.iomode = range ? range->iomode : IOMODE_ANY;
++	arg.offset = 0;
++	arg.length = NFS4_MAX_UINT64;
++
++	if (type == RETURN_FILE) {
++		spin_lock(&ino->i_lock);
++		lo = nfsi->layout;
++		if (lo && !has_layout_to_return(lo, &arg)) {
++			lo = NULL;
++		}
++		if (!lo) {
++			spin_unlock(&ino->i_lock);
++			dprintk("%s: no layout segments to return\n", __func__);
++			goto out;
++		}
++
++		/* Reference for layoutreturn matched in pnfs_layout_release */
++		get_layout(lo);
++
++		spin_unlock(&ino->i_lock);
++
++		if (pnfs_return_layout_barrier(nfsi, &arg)) {
++			if (stateid) { /* callback */
++				status = -EAGAIN;
++				goto out_put;
++			}
++			dprintk("%s: waiting\n", __func__);
++			wait_event(nfsi->lo_waitq,
++				   !pnfs_return_layout_barrier(nfsi, &arg));
++		}
++
++		if (layoutcommit_needed(nfsi)) {
++			if (stateid && !wait) { /* callback */
++				dprintk("%s: layoutcommit pending\n", __func__);
++				status = -EAGAIN;
++				goto out_put;
++			}
++			status = pnfs_layoutcommit_inode(ino, wait);
++			if (status) {
++				/* Return layout even if layoutcommit fails */
++				dprintk("%s: layoutcommit failed, status=%d. "
++					"Returning layout anyway\n",
++					__func__, status);
++			}
++		}
++
++		if (!stateid)
++			status = return_layout(ino, &arg, type, lo, wait);
++		else
++			pnfs_layout_release(lo, &arg);
++	}
++out:
++	dprintk("<-- %s status: %d\n", __func__, status);
++	return status;
++out_put:
++	put_layout(ino);
++	goto out;
++}
++
++/*
++ * cmp two layout segments for sorting into layout cache
++ */
++static inline s64
++cmp_layout(struct pnfs_layout_range *l1,
++	   struct pnfs_layout_range *l2)
++{
++	s64 d;
++
++	/* higher offset > lower offset */
++	d = l1->offset - l2->offset;
++	if (d)
++		return d;
++
++	/* longer length > shorter length */
++	d = l1->length - l2->length;
++	if (d)
++		return d;
++
++	/* read > read/write */
++	return (int)(l1->iomode == IOMODE_READ) -
++	(int)(l2->iomode == IOMODE_READ);
++}
++
++static void
++pnfs_insert_layout(struct pnfs_layout_hdr *lo,
++		   struct pnfs_layout_segment *lseg)
++{
++	struct pnfs_layout_segment *lp;
++	int found = 0;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	if (list_empty(&lo->segs)) {
++		struct nfs_client *clp = PNFS_NFS_SERVER(lo)->nfs_client;
++
++		spin_lock(&clp->cl_lock);
++		BUG_ON(!list_empty(&lo->layouts));
++		list_add_tail(&lo->layouts, &clp->cl_layouts);
++		spin_unlock(&clp->cl_lock);
++	}
++	list_for_each_entry (lp, &lo->segs, fi_list) {
++		if (cmp_layout(&lp->range, &lseg->range) > 0)
++			continue;
++		list_add_tail(&lseg->fi_list, &lp->fi_list);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu before "
++			"lp %p iomode %d offset %llu length %llu\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length,
++			lp, lp->range.iomode, lp->range.offset,
++			lp->range.length);
++		found = 1;
++		break;
++	}
++	if (!found) {
++		list_add_tail(&lseg->fi_list, &lo->segs);
++		dprintk("%s: inserted lseg %p "
++			"iomode %d offset %llu length %llu at tail\n",
++			__func__, lseg, lseg->range.iomode,
++			lseg->range.offset, lseg->range.length);
++	}
++	get_layout(lo);
++
++	dprintk("%s:Return\n", __func__);
++}
++
++/*
++ * Each layoutdriver embeds pnfs_layout_hdr as the first field in it's
++ * per-layout type layout cache structure and returns it ZEROed
++ * from layoutdriver_io_ops->alloc_layout
++ */
++static struct pnfs_layout_hdr *
++alloc_init_layout(struct inode *ino)
++{
++	struct pnfs_layout_hdr *lo;
++	struct layoutdriver_io_operations *io_ops;
++
++	io_ops = NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops;
++	lo = io_ops->alloc_layout(ino);
++	if (!lo) {
++		printk(KERN_ERR
++			"%s: out of memory: io_ops->alloc_layout failed\n",
++			__func__);
++		return NULL;
++	}
++	lo->refcount = 1;
++	INIT_LIST_HEAD(&lo->layouts);
++	INIT_LIST_HEAD(&lo->segs);
++	seqlock_init(&lo->seqlock);
++	lo->inode = ino;
++	return lo;
++}
++
++/*
++ * Retrieve and possibly allocate the inode layout
++ *
++ * ino->i_lock must be taken by the caller.
++ */
++static struct pnfs_layout_hdr *
++pnfs_alloc_layout(struct inode *ino)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_hdr *new = NULL;
++
++	dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
++
++	BUG_ON_UNLOCKED_INO(ino);
++	if (likely(nfsi->layout))
++		return nfsi->layout;
++
++	spin_unlock(&ino->i_lock);
++	new = alloc_init_layout(ino);
++	spin_lock(&ino->i_lock);
++
++	if (likely(nfsi->layout == NULL)) {	/* Won the race? */
++		nfsi->layout = new;
++	} else if (new) {
++		/* Reference the layout accross i_lock release and grab */
++		get_layout(nfsi->layout);
++		spin_unlock(&ino->i_lock);
++		NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
++		spin_lock(&ino->i_lock);
++		put_layout_locked(nfsi->layout);
++	}
++	return nfsi->layout;
++}
++
++/*
++ * iomode matching rules:
++ * range	lseg	match
++ * -----	-----	-----
++ * ANY		READ	true
++ * ANY		RW	true
++ * RW		READ	false
++ * RW		RW	true
++ * READ		READ	true
++ * READ		RW	true
++ */
++static inline int
++has_matching_lseg(struct pnfs_layout_segment *lseg,
++		  struct pnfs_layout_range *range)
++{
++	struct pnfs_layout_range range1;
++
++	if ((range->iomode == IOMODE_RW && lseg->range.iomode != IOMODE_RW) ||
++	    !lo_seg_intersecting(&lseg->range, range))
++		return 0;
++
++	/* range1 covers only the first byte in the range */
++	range1 = *range;
++	range1.length = 1;
++	return lo_seg_contained(&lseg->range, &range1);
++}
++
++/*
++ * lookup range in layout
++ */
++static struct pnfs_layout_segment *
++pnfs_has_layout(struct pnfs_layout_hdr *lo,
++		struct pnfs_layout_range *range,
++		bool take_ref,
++		bool only_valid)
++{
++	struct pnfs_layout_segment *lseg, *ret = NULL;
++
++	dprintk("%s:Begin\n", __func__);
++
++	BUG_ON_UNLOCKED_LO(lo);
++	list_for_each_entry (lseg, &lo->segs, fi_list) {
++		if (has_matching_lseg(lseg, range) &&
++		    (lseg->valid || !only_valid)) {
++			ret = lseg;
++			if (take_ref)
++				get_lseg(ret);
++			break;
++		}
++		if (cmp_layout(range, &lseg->range) > 0)
++			break;
++	}
++
++	dprintk("%s:Return lseg %p take_ref %d ref %d valid %d\n",
++		__func__, ret, take_ref,
++		ret ? atomic_read(&ret->kref.refcount) : 0,
++		ret ? ret->valid : 0);
++	return ret;
++}
++
++/* Update the file's layout for the given range and iomode.
++ * Layout is retreived from the server if needed.
++ * If lsegpp is given, the appropriate layout segment is referenced and
++ * returned to the caller.
++ */
++void
++_pnfs_update_layout(struct inode *ino,
++		   struct nfs_open_context *ctx,
++		   loff_t pos,
++		   u64 count,
++		   enum pnfs_iomode iomode,
++		   struct pnfs_layout_segment **lsegpp)
++{
++	struct pnfs_layout_range arg = {
++		.iomode = iomode,
++		.offset = 0,
++		.length = NFS4_MAX_UINT64,
++	};
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layout_segment *lseg = NULL;
++	bool take_ref = (lsegpp != NULL);
++
++	if (take_ref)
++		*lsegpp = NULL;
++	spin_lock(&ino->i_lock);
++	lo = pnfs_alloc_layout(ino);
++	if (lo == NULL) {
++		dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
++		goto out_unlock;
++	}
++
++	/* Check to see if the layout for the given range already exists */
++	lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
++	if (lseg && !lseg->valid) {
++		if (take_ref)
++			put_lseg_locked(lseg);
++		/* someone is cleaning the layout */
++		lseg = NULL;
++		goto out_unlock;
++	}
++
++	if (lseg) {
++		dprintk("%s: Using cached lseg %p for %llu@%llu iomode %d)\n",
++			__func__,
++			lseg,
++			arg.length,
++			arg.offset,
++			arg.iomode);
++
++		goto out_unlock;
++	}
++
++	/* if get layout already failed once goto out */
++	if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) {
++		if (unlikely(nfsi->pnfs_layout_suspend &&
++		    get_seconds() >= nfsi->pnfs_layout_suspend)) {
++			dprintk("%s: layout_get resumed\n", __func__);
++			clear_bit(lo_fail_bit(iomode),
++				  &nfsi->layout->state);
++			nfsi->pnfs_layout_suspend = 0;
++		} else
++			goto out_unlock;
++	}
++
++	/* Reference the layout for layoutget matched in pnfs_layout_release */
++	get_layout(lo);
++	spin_unlock(&ino->i_lock);
++
++	send_layoutget(ino, ctx, &arg, lsegpp, lo);
++out:
++	dprintk("%s end, state 0x%lx lseg %p\n", __func__,
++		nfsi->layout->state, lseg);
++	return;
++out_unlock:
++	if (lsegpp)
++		*lsegpp = lseg;
++	spin_unlock(&ino->i_lock);
++	goto out;
++}
++
++void
++pnfs_get_layout_done(struct nfs4_layoutget *lgp, int rpc_status)
++{
++	struct pnfs_layout_segment *lseg = NULL;
++	struct nfs_inode *nfsi = NFS_I(lgp->args.inode);
++	time_t suspend = 0;
++
++	dprintk("-->%s\n", __func__);
++
++	lgp->status = rpc_status;
++	if (likely(!rpc_status)) {
++		if (unlikely(lgp->res.layout.len < 0)) {
++			printk(KERN_ERR
++			       "%s: ERROR Returned layout size is ZERO\n", __func__);
++			lgp->status = -EIO;
++		}
++		goto out;
++	}
++
++	dprintk("%s: ERROR retrieving layout %d\n", __func__, rpc_status);
++	switch (rpc_status) {
++	case -NFS4ERR_BADLAYOUT:
++		lgp->status = -ENOENT;
++		/* FALLTHROUGH */
++	case -EACCES:	/* NFS4ERR_ACCESS */
++		/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
++		goto out;
++
++	case -NFS4ERR_LAYOUTTRYLATER:
++	case -NFS4ERR_RECALLCONFLICT:
++	case -NFS4ERR_OLD_STATEID:
++	case -EAGAIN:	/* NFS4ERR_LOCKED */
++		lgp->status = -NFS4ERR_DELAY;	/* for nfs4_handle_exception */
++		/* FALLTHROUGH */
++	case -NFS4ERR_GRACE:
++	case -NFS4ERR_DELAY:
++		goto out;
++
++	case -NFS4ERR_ADMIN_REVOKED:
++	case -NFS4ERR_DELEG_REVOKED:
++		/* The layout is expected to be returned at this point.
++		 * This should clear the layout stateid as well */
++		suspend = get_seconds() + 1;
++		break;
++
++	case -NFS4ERR_LAYOUTUNAVAILABLE:
++		lgp->status = -ENOTSUPP;
++		break;
++
++	case -NFS4ERR_REP_TOO_BIG:
++	case -NFS4ERR_REP_TOO_BIG_TO_CACHE:
++		lgp->status = -E2BIG;
++		break;
++
++	/* Leave the following errors untranslated */
++	case -NFS4ERR_DEADSESSION:
++	case -NFS4ERR_DQUOT:
++	case -EINVAL:		/* NFS4ERR_INVAL */
++	case -EIO:		/* NFS4ERR_IO */
++	case -NFS4ERR_FHEXPIRED:
++	case -NFS4ERR_MOVED:
++	case -NFS4ERR_NOSPC:
++	case -ESERVERFAULT:	/* NFS4ERR_SERVERFAULT */
++	case -ESTALE:		/* NFS4ERR_STALE */
++	case -ETOOSMALL:	/* NFS4ERR_TOOSMALL */
++		break;
++
++	/* The following errors are our fault and should never happen */
++	case -NFS4ERR_BADIOMODE:
++	case -NFS4ERR_BADXDR:
++	case -NFS4ERR_REQ_TOO_BIG:
++	case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
++	case -NFS4ERR_WRONG_TYPE:
++		lgp->status = -EINVAL;
++		/* FALLTHROUGH */
++	case -NFS4ERR_BAD_STATEID:
++	case -NFS4ERR_NOFILEHANDLE:
++	case -ENOTSUPP:	/* NFS4ERR_NOTSUPP */
++	case -NFS4ERR_OPENMODE:
++	case -NFS4ERR_OP_NOT_IN_SESSION:
++	case -NFS4ERR_TOO_MANY_OPS:
++		dprintk("%s: error %d: should never happen\n", __func__,
++			rpc_status);
++		break;
++
++	/* The following errors are the server's fault */
++	default:
++		dprintk("%s: illegal error %d\n", __func__, rpc_status);
++		lgp->status = -EIO;
++		break;
++	}
++
++	/* remember that get layout failed and suspend trying */
++	nfsi->pnfs_layout_suspend = suspend;
++	set_bit(lo_fail_bit(lgp->args.range.iomode),
++		&nfsi->layout->state);
++	dprintk("%s: layout_get suspended until %ld\n",
++		__func__, suspend);
++out:
++	dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
++		__func__, lgp->status, nfsi->layout->state, lseg);
++	return;
++}
++
++int
++pnfs_layout_process(struct nfs4_layoutget *lgp)
++{
++	struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
++	struct nfs4_layoutget_res *res = &lgp->res;
++	struct pnfs_layout_segment *lseg;
++	struct inode *ino = PNFS_INODE(lo);
++	int status = 0;
++
++	/* Inject layout blob into I/O device driver */
++	lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, res);
++	if (!lseg || IS_ERR(lseg)) {
++		if (!lseg)
++			status = -ENOMEM;
++		else
++			status = PTR_ERR(lseg);
++		dprintk("%s: Could not allocate layout: error %d\n",
++		       __func__, status);
++		goto out;
++	}
++
++	spin_lock(&ino->i_lock);
++	init_lseg(lo, lseg);
++	lseg->range = res->range;
++	if (lgp->lsegpp) {
++		get_lseg(lseg);
++		*lgp->lsegpp = lseg;
++	}
++	pnfs_insert_layout(lo, lseg);
++
++	if (res->return_on_close) {
++		lo->roc_iomode |= res->range.iomode;
++		if (!lo->roc_iomode)
++			lo->roc_iomode = IOMODE_ANY;
++	}
++
++	/* Done processing layoutget. Set the layout stateid */
++	pnfs_set_layout_stateid(lo, &res->stateid);
++	spin_unlock(&ino->i_lock);
++out:
++	return status;
++}
++
++void
++readahead_range(struct inode *inode, struct list_head *pages, loff_t *offset,
++		size_t *count)
++{
++	struct page *first, *last;
++	loff_t foff, i_size = i_size_read(inode);
++	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
++	size_t range;
++
++
++	first = list_entry((pages)->prev, struct page, lru);
++	last = list_entry((pages)->next, struct page, lru);
++
++	foff = (loff_t)first->index << PAGE_CACHE_SHIFT;
++
++	range = (last->index - first->index) * PAGE_CACHE_SIZE;
++	if (last->index == end_index)
++		range += ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
++	else
++		range += PAGE_CACHE_SIZE;
++	dprintk("%s foff %lu, range %Zu\n", __func__, (unsigned long)foff,
++		range);
++	*offset = foff;
++	*count = range;
++}
++
++void
++pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)
++{
++	struct pnfs_layout_hdr *lo;
++	struct pnfs_layoutdriver_type *ld;
++
++	pgio->pg_test = NULL;
++
++	lo = NFS_I(inode)->layout;
++	ld = NFS_SERVER(inode)->pnfs_curr_ld;
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !lo)
++		return;
++
++	if (ld->ld_policy_ops)
++		pgio->pg_test = ld->ld_policy_ops->pg_test;
++}
++
++static u32
++pnfs_getboundary(struct inode *inode)
++{
++	u32 stripe_size = 0;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct layoutdriver_policy_operations *policy_ops;
++
++	if (!nfss->pnfs_curr_ld)
++		goto out;
++
++	policy_ops = nfss->pnfs_curr_ld->ld_policy_ops;
++	if (!policy_ops || !policy_ops->get_stripesize)
++		goto out;
++
++	/* The default is to not gather across stripes */
++	if (pnfs_ld_gather_across_stripes(nfss->pnfs_curr_ld))
++		goto out;
++
++	spin_lock(&inode->i_lock);
++	if (NFS_I(inode)->layout)
++		stripe_size = policy_ops->get_stripesize(NFS_I(inode)->layout);
++	spin_unlock(&inode->i_lock);
++out:
++	return stripe_size;
++}
++
++/*
++ * rsize is already set by caller to MDS rsize.
++ */
++void
++pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
++		  struct inode *inode,
++		  struct nfs_open_context *ctx,
++		  struct list_head *pages,
++		  size_t *rsize)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	size_t count = 0;
++	loff_t loff;
++
++	pgio->pg_iswrite = 0;
++	pgio->pg_boundary = 0;
++	pgio->pg_test = NULL;
++	pgio->pg_lseg = NULL;
++
++	if (!pnfs_enabled_sb(nfss))
++		return;
++
++	/* Calculate the total read-ahead count */
++	readahead_range(inode, pages, &loff, &count);
++
++	if (count > 0) {
++		_pnfs_update_layout(inode, ctx, loff, count, IOMODE_READ,
++				    &pgio->pg_lseg);
++		if (!pgio->pg_lseg)
++			return;
++
++		*rsize = NFS_SERVER(inode)->ds_rsize;
++		pgio->pg_boundary = pnfs_getboundary(inode);
++		if (pgio->pg_boundary)
++			pnfs_set_pg_test(inode, pgio);
++	}
++}
++
++void
++pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
++		       size_t *wsize)
++{
++	struct nfs_server *server = NFS_SERVER(inode);
++
++	pgio->pg_iswrite = 1;
++	if (!pnfs_enabled_sb(server)) {
++		pgio->pg_boundary = 0;
++		pgio->pg_test = NULL;
++		return;
++	}
++	pgio->pg_boundary = pnfs_getboundary(inode);
++	pnfs_set_pg_test(inode, pgio);
++	*wsize = server->ds_wsize;
++}
++
++/* Return I/O buffer size for a layout driver
++ * This value will determine what size reads and writes
++ * will be gathered into and sent to the data servers.
++ * blocksize must be a multiple of the page cache size.
++ */
++unsigned int
++pnfs_getiosize(struct nfs_server *server)
++{
++	if (!PNFS_EXISTS_LDPOLICY_OP(server, get_blocksize))
++		return 0;
++	return server->pnfs_curr_ld->ld_policy_ops->get_blocksize();
++}
++
++void
++pnfs_set_ds_iosize(struct nfs_server *server)
++{
++	unsigned dssize = pnfs_getiosize(server);
++
++	/* Set buffer size for data servers */
++	if (dssize > 0) {
++		server->ds_rsize = server->ds_wsize =
++			nfs_block_size(dssize, NULL);
++	} else {
++		server->ds_wsize = server->wsize;
++		server->ds_rsize = server->rsize;
++	}
++}
++
++static int
++pnfs_call_done(struct pnfs_call_data *pdata, struct rpc_task *task, void *data)
++{
++	put_lseg(pdata->lseg);
++	pdata->lseg = NULL;
++	pdata->call_ops->rpc_call_done(task, data);
++	if (pdata->pnfs_error == -EAGAIN || task->tk_status == -EAGAIN)
++		return -EAGAIN;
++	if (pdata->pnfsflags & PNFS_NO_RPC) {
++		pdata->call_ops->rpc_release(data);
++	} else {
++		/*
++		 * just restore original rpc call ops
++		 * rpc_release will be called later by the rpc scheduling layer.
++		 */
++		task->tk_ops = pdata->call_ops;
++	}
++	return 0;
++}
++
++/* Post-write completion function
++ * Invoked by all layout drivers when write_pagelist is done.
++ *
++ * NOTE: callers set data->pnfsflags PNFS_NO_RPC
++ * so that the NFS cleanup routines perform only the page cache
++ * cleanup.
++ */
++static void
++pnfs_write_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_write_data *wdata;
++	struct pnfs_layout_range range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	wdata = container_of(task, struct nfs_write_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = wdata->args.offset;
++	range.length = wdata->args.count;
++	_pnfs_return_layout(wdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_write(wdata, NFS_CLIENT(wdata->inode),
++			    wdata->pdata.call_ops, wdata->pdata.how);
++}
++
++static void
++pnfs_writeback_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	/* update last write offset and need layout commit
++	 * for non-files layout types (files layout calls
++	 * pnfs4_write_done for this)
++	 */
++	if ((pdata->pnfsflags & PNFS_NO_RPC) &&
++	    data->task.tk_status >= 0 && data->res.count > 0) {
++		struct nfs_inode *nfsi = NFS_I(data->inode);
++
++		pnfs_update_last_write(nfsi, data->args.offset, data->res.count);
++		pnfs_need_layoutcommit(nfsi, data->args.context);
++	}
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_write_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++static void _pnfs_clear_lseg_from_pages(struct list_head *head)
++{
++	struct nfs_page *req;
++
++	list_for_each_entry(req, head, wb_list) {
++		put_lseg(req->wb_lseg);
++		req->wb_lseg = NULL;
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem write function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ *
++ * TODO: Is wdata->how and wdata->args.stable always the same value?
++ * TODO: It seems in NFS, the server may not do a stable write even
++ * though it was requested (and vice-versa?).  To check, it looks
++ * in data->res.verf->committed.  Do we need this ability
++ * for non-file layout drivers?
++ */
++enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *wdata,
++			const struct rpc_call_ops *call_ops, int how)
++{
++	struct inode *inode = wdata->inode;
++	enum pnfs_try_status trypnfs;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = wdata->req->wb_lseg;
++
++	wdata->pdata.call_ops = call_ops;
++	wdata->pdata.pnfs_error = 0;
++	wdata->pdata.how = how;
++
++	dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
++		inode->i_ino, wdata->args.count, wdata->args.offset, how);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		wdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	wdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->write_pagelist(wdata,
++		nfs_page_array_len(wdata->args.pgbase, wdata->args.count),
++								how);
++
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		wdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		wdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&wdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/* Post-read completion function.  Invoked by all layout drivers when
++ * read_pagelist is done
++ */
++static void
++pnfs_read_retry(struct work_struct *work)
++{
++	struct rpc_task *task;
++	struct nfs_read_data *rdata;
++	struct pnfs_layout_range range;
++
++	dprintk("%s enter\n", __func__);
++	task = container_of(work, struct rpc_task, u.tk_work);
++	rdata = container_of(task, struct nfs_read_data, task);
++	range.iomode = IOMODE_RW;
++	range.offset = rdata->args.offset;
++	range.length = rdata->args.count;
++	_pnfs_return_layout(rdata->inode, &range, NULL, RETURN_FILE, true);
++	pnfs_initiate_read(rdata, NFS_CLIENT(rdata->inode),
++			   rdata->pdata.call_ops);
++}
++
++static void
++pnfs_read_done(struct nfs_read_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		INIT_WORK(&data->task.u.tk_work, pnfs_read_retry);
++		queue_work(nfsiod_workqueue, &data->task.u.tk_work);
++	}
++}
++
++/*
++ * Call the appropriate parallel I/O subsystem read function.
++ * If no I/O device driver exists, or one does match the returned
++ * fstype, then return a positive status for regular NFS processing.
++ */
++enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *rdata,
++		       const struct rpc_call_ops *call_ops)
++{
++	struct inode *inode = rdata->inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	struct pnfs_layout_segment *lseg = rdata->req->wb_lseg;
++	enum pnfs_try_status trypnfs;
++
++	rdata->pdata.call_ops = call_ops;
++	rdata->pdata.pnfs_error = 0;
++
++	dprintk("%s: Reading ino:%lu %u@%llu\n",
++		__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
++
++	get_lseg(lseg);
++
++	if (!pnfs_use_rpc(nfss))
++		rdata->pdata.pnfsflags |= PNFS_NO_RPC;
++	rdata->pdata.lseg = lseg;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->read_pagelist(rdata,
++		nfs_page_array_len(rdata->args.pgbase, rdata->args.count));
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		rdata->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		rdata->pdata.lseg = NULL;
++		put_lseg(lseg);
++		_pnfs_clear_lseg_from_pages(&rdata->pages);
++	} else {
++		nfs_inc_stats(inode, NFSIOS_PNFS_READ);
++	}
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++/*
++ * This gives the layout driver an opportunity to read in page "around"
++ * the data to be written.  It returns 0 on success, otherwise an error code
++ * which will either be passed up to user, or ignored if
++ * some previous part of write succeeded.
++ * Note the range [pos, pos+len-1] is entirely within the page.
++ */
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata)
++{
++	struct pnfs_fsdata *data;
++	int status = 0;
++
++	dprintk("--> %s: pos=%llu len=%u\n",
++		__func__, (unsigned long long)pos, len);
++	data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
++	if (!data) {
++		status = -ENOMEM;
++		goto out;
++	}
++	data->lseg = lseg; /* refcount passed into data to be managed there */
++	status = NFS_SERVER(inode)->pnfs_curr_ld->ld_io_ops->write_begin(
++						lseg, page, pos, len, data);
++	if (status) {
++		kfree(data);
++		data = NULL;
++	}
++out:
++	*fsdata = data;
++	dprintk("<-- %s: status=%d\n", __func__, status);
++	return status;
++}
++
++/* Return 0 on succes, negative on failure */
++/* CAREFUL - what happens if copied < len??? */
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status;
++
++	status = nfss->pnfs_curr_ld->ld_io_ops->write_end(inode, page,
++						pos, len, copied, lseg);
++	return status;
++}
++
++/* pNFS Commit callback function for all layout drivers */
++static void
++pnfs_commit_done(struct nfs_write_data *data)
++{
++	struct pnfs_call_data *pdata = &data->pdata;
++
++	dprintk("%s: Begin (status %d)\n", __func__, data->task.tk_status);
++
++	if (pnfs_call_done(pdata, &data->task, data) == -EAGAIN) {
++		struct pnfs_layout_range range = {
++			.iomode = IOMODE_RW,
++			.offset = data->args.offset,
++			.length = data->args.count,
++		};
++		dprintk("%s: retrying\n", __func__);
++		_pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE,
++				    true);
++		pnfs_initiate_commit(data, NFS_CLIENT(data->inode),
++				     pdata->call_ops, pdata->how, 1);
++	}
++}
++
++enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		    const struct rpc_call_ops *call_ops, int sync)
++{
++	struct inode *inode = data->inode;
++	struct nfs_server *nfss = NFS_SERVER(data->inode);
++	enum pnfs_try_status trypnfs;
++
++	dprintk("%s: Begin\n", __func__);
++
++	if (!pnfs_use_rpc(nfss))
++		data->pdata.pnfsflags |= PNFS_NO_RPC;
++	/* We need to account for possibility that
++	 * each nfs_page can point to a different lseg (or be NULL).
++	 * For the immediate case of whole-file-only layouts, we at
++	 * least know there can be only a single lseg.
++	 * We still have to account for the possibility of some being NULL.
++	 * This will be done by passing the buck to the layout driver.
++	 */
++	data->pdata.call_ops = call_ops;
++	data->pdata.pnfs_error = 0;
++	data->pdata.how = sync;
++	data->pdata.lseg = NULL;
++	trypnfs = nfss->pnfs_curr_ld->ld_io_ops->commit(data, sync);
++	if (trypnfs == PNFS_NOT_ATTEMPTED) {
++		data->pdata.pnfsflags &= ~PNFS_NO_RPC;
++		_pnfs_clear_lseg_from_pages(&data->pages);
++	} else
++		nfs_inc_stats(inode, NFSIOS_PNFS_COMMIT);
++	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
++	return trypnfs;
++}
++
++void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
++{
++	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
++
++	/* TODO: Maybe we should avoid this by allowing the layout driver
++	* to directly xdr its layout on the wire.
++	*/
++	if (nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit)
++		nfss->pnfs_curr_ld->ld_io_ops->cleanup_layoutcommit(
++					NFS_I(data->args.inode)->layout,
++					&data->args, data->status);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int
++pnfs_layoutcommit_setup(struct inode *inode,
++			struct nfs4_layoutcommit_data *data,
++			loff_t write_begin_pos, loff_t write_end_pos)
++{
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int result = 0;
++
++	dprintk("--> %s\n", __func__);
++
++	data->args.inode = inode;
++	data->args.fh = NFS_FH(inode);
++	data->args.layout_type = nfss->pnfs_curr_ld->id;
++	data->res.fattr = &data->fattr;
++	nfs_fattr_init(&data->fattr);
++
++	/* TODO: Need to determine the correct values */
++	data->args.time_modify_changed = 0;
++
++	/* Set values from inode so it can be reset
++	 */
++	data->args.range.iomode = IOMODE_RW;
++	data->args.range.offset = write_begin_pos;
++	data->args.range.length = write_end_pos - write_begin_pos + 1;
++	data->args.lastbytewritten =  min(write_end_pos,
++					  i_size_read(inode) - 1);
++	data->args.bitmask = nfss->attr_bitmask;
++	data->res.server = nfss;
++
++	/* Call layout driver to set the arguments */
++	if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit)
++		result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
++				NFS_I(inode)->layout, &data->args);
++
++	dprintk("<-- %s Status %d\n", __func__, result);
++	return result;
++}
++
++/* Issue a async layoutcommit for an inode.
++ */
++int
++pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	struct nfs4_layoutcommit_data *data;
++	struct nfs_inode *nfsi = NFS_I(inode);
++	loff_t write_begin_pos;
++	loff_t write_end_pos;
++
++	int status = 0;
++
++	dprintk("%s Begin (sync:%d)\n", __func__, sync);
++
++	BUG_ON(!has_layout(nfsi));
++
++	data = pnfs_layoutcommit_alloc();
++	if (!data)
++		return -ENOMEM;
++
++	spin_lock(&inode->i_lock);
++	if (!layoutcommit_needed(nfsi)) {
++		spin_unlock(&inode->i_lock);
++		goto out_free;
++	}
++
++	/* Clear layoutcommit properties in the inode so
++	 * new lc info can be generated
++	 */
++	write_begin_pos = nfsi->layout->write_begin_pos;
++	write_end_pos = nfsi->layout->write_end_pos;
++	data->cred = nfsi->layout->cred;
++	nfsi->layout->write_begin_pos = 0;
++	nfsi->layout->write_end_pos = 0;
++	nfsi->layout->cred = NULL;
++	__clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state);
++	pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout);
++
++	/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
++	get_layout(NFS_I(inode)->layout);
++
++	spin_unlock(&inode->i_lock);
++
++	/* Set up layout commit args */
++	status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
++					 write_end_pos);
++	if (status) {
++		/* The layout driver failed to setup the layoutcommit */
++		put_rpccred(data->cred);
++		put_layout(inode);
++		goto out_free;
++	}
++	status = nfs4_proc_layoutcommit(data, sync);
++out:
++	dprintk("%s end (err:%d)\n", __func__, status);
++	return status;
++out_free:
++	pnfs_layoutcommit_free(data);
++	goto out;
++}
++
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
++{
++	if (fsdata) {
++		/* lseg refcounting handled directly in nfs_Write_end */
++		kfree(fsdata);
++	}
++}
++
++/* Callback operations for layout drivers.
++ */
++struct pnfs_client_operations pnfs_ops = {
++	.nfs_getdevicelist = nfs4_proc_getdevicelist,
++	.nfs_getdeviceinfo = nfs4_proc_getdeviceinfo,
++	.nfs_readlist_complete = pnfs_read_done,
++	.nfs_writelist_complete = pnfs_writeback_done,
++	.nfs_commit_complete = pnfs_commit_done,
++};
++
++EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
++EXPORT_SYMBOL(pnfs_register_layoutdriver);
++
++
++/* Device ID cache. Supports one layout type per struct nfs_client */
++int
++nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
++			 void (*free_callback)(struct kref *))
++{
++	struct nfs4_deviceid_cache *c;
++
++	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
++	if (!c)
++		return -ENOMEM;
++	spin_lock(&clp->cl_lock);
++	if (clp->cl_devid_cache != NULL) {
++		kref_get(&clp->cl_devid_cache->dc_kref);
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [kref [%d]]\n", __func__,
++			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
++		kfree(c);
++	} else {
++		int i;
++
++		spin_lock_init(&c->dc_lock);
++		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
++			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
++		kref_init(&c->dc_kref);
++		c->dc_free_callback = free_callback;
++		clp->cl_devid_cache = c;
++		spin_unlock(&clp->cl_lock);
++		dprintk("%s [new]\n", __func__);
++	}
++	return 0;
++}
++EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
++
++void
++nfs4_init_deviceid_node(struct nfs4_deviceid *d)
++{
++	INIT_HLIST_NODE(&d->de_node);
++	kref_init(&d->de_kref);
++}
++EXPORT_SYMBOL(nfs4_init_deviceid_node);
++
++/* Called from layoutdriver_io_operations->alloc_lseg */
++void
++nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = d;
++}
++EXPORT_SYMBOL(nfs4_set_layout_deviceid);
++
++/* Called from layoutdriver_io_operations->free_lseg */
++void
++nfs4_put_unset_layout_deviceid(struct pnfs_layout_segment *l,
++			   struct nfs4_deviceid *d,
++			   void (*free_callback)(struct kref *))
++{
++	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
++	l->deviceid = NULL;
++	kref_put(&d->de_kref, free_callback);
++}
++EXPORT_SYMBOL(nfs4_put_unset_layout_deviceid);
++
++/* Find and reference a deviceid */
++struct nfs4_deviceid *
++nfs4_find_get_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			if (!atomic_inc_not_zero(&d->de_kref.refcount)) {
++				goto fail;
++			} else {
++				rcu_read_unlock();
++				return d;
++			}
++		}
++	}
++fail:
++	rcu_read_unlock();
++	return NULL;
++}
++EXPORT_SYMBOL(nfs4_find_get_deviceid);
++
++/*
++ * Add and kref_get a deviceid.
++ * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
++ */
++struct nfs4_deviceid *
++nfs4_add_get_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++	long hash = nfs4_deviceid_hash(&new->de_id);
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
++			kref_get(&d->de_kref);
++			spin_unlock(&c->dc_lock);
++			dprintk("%s [discard]\n", __func__);
++			c->dc_free_callback(&new->de_kref);
++			return d;
++		}
++	}
++	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
++	kref_get(&new->de_kref);
++	spin_unlock(&c->dc_lock);
++	dprintk("%s [new]\n", __func__);
++	return new;
++}
++EXPORT_SYMBOL(nfs4_add_get_deviceid);
++
++/*
++ * Remove the first deviceid from a hash bucket, or return 0 if bucket list
++ * is empty.
++ */
++static int
++nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash,
++		     struct pnfs_deviceid *id)
++{
++	struct nfs4_deviceid *d;
++	struct hlist_node *n;
++
++	dprintk("--> %s hash %ld\n", __func__, hash);
++	spin_lock(&c->dc_lock);
++	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
++		if (id && memcmp(id, &d->de_id, NFS4_PNFS_DEVICEID4_SIZE))
++			continue;
++		hlist_del_rcu(&d->de_node);
++		spin_unlock(&c->dc_lock);
++		synchronize_rcu();
++		dprintk("%s [%d]\n", __func__,
++			atomic_read(&d->de_kref.refcount));
++		kref_put(&d->de_kref, c->dc_free_callback);
++		return 1;
++	}
++	spin_unlock(&c->dc_lock);
++	return 0;
++}
++
++void
++nfs4_delete_device(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
++{
++	long hash = nfs4_deviceid_hash(id);
++
++	nfs4_remove_deviceid(c, hash, id);
++}
++EXPORT_SYMBOL(nfs4_delete_device);
++
++static void
++nfs4_free_deviceid_cache(struct kref *kref)
++{
++	struct nfs4_deviceid_cache *cache =
++		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
++	long i;
++
++	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
++		while (nfs4_remove_deviceid(cache, i, NULL))
++			;
++	kfree(cache);
++}
++
++void
++nfs4_put_deviceid_cache(struct nfs_client *clp)
++{
++	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
++	int refcount;
++
++	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
++	spin_lock(&clp->cl_lock);
++	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
++	if (refcount == 1)
++		clp->cl_devid_cache = NULL;
++	spin_unlock(&clp->cl_lock);
++	dprintk("%s [%d]\n", __func__, refcount);
++	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
++}
++EXPORT_SYMBOL(nfs4_put_deviceid_cache);
+diff -up linux-2.6.34.noarch/fs/nfs/pnfs.h.orig linux-2.6.34.noarch/fs/nfs/pnfs.h
+--- linux-2.6.34.noarch/fs/nfs/pnfs.h.orig	2010-09-30 10:17:08.757998000 -0400
++++ linux-2.6.34.noarch/fs/nfs/pnfs.h	2010-09-30 10:17:08.759996000 -0400
+@@ -0,0 +1,354 @@
++/*
++ *  fs/nfs/pnfs.h
++ *
++ *  pNFS client data structures.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef FS_NFS_PNFS_H
++#define FS_NFS_PNFS_H
++
++#include <linux/nfs4_pnfs.h>
++
++#ifdef CONFIG_NFS_V4_1
++
++#include <linux/nfs_page.h>
++#include <linux/nfs_iostat.h>
++#include "iostat.h"
++
++/* nfs4proc.c */
++extern int nfs4_proc_getdevicelist(struct nfs_server *server,
++				   const struct nfs_fh *fh,
++				   struct pnfs_devicelist *devlist);
++extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
++				   struct pnfs_device *dev);
++extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
++extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
++				   int issync);
++extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
++
++/* pnfs.c */
++extern const nfs4_stateid zero_stateid;
++
++void _pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp);
++
++int _pnfs_return_layout(struct inode *, struct pnfs_layout_range *,
++			const nfs4_stateid *stateid, /* optional */
++			enum pnfs_layoutreturn_type, bool wait);
++void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *mntfh, u32 id);
++void unmount_pnfs_layoutdriver(struct nfs_server *);
++enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
++					     const struct rpc_call_ops *, int);
++enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
++					    const struct rpc_call_ops *);
++int pnfs_initialize(void);
++void pnfs_uninitialize(void);
++void pnfs_layoutcommit_free(struct nfs4_layoutcommit_data *data);
++void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
++int pnfs_layoutcommit_inode(struct inode *inode, int sync);
++void pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent);
++void pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx);
++unsigned int pnfs_getiosize(struct nfs_server *server);
++void pnfs_set_ds_iosize(struct nfs_server *server);
++enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
++					 const struct rpc_call_ops *, int);
++void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
++			   struct nfs_open_context *, struct list_head *,
++			   size_t *);
++void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
++			    size_t *);
++void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
++void pnfs_get_layout_done(struct nfs4_layoutget *, int rpc_status);
++int pnfs_layout_process(struct nfs4_layoutget *lgp);
++void pnfs_layout_release(struct pnfs_layout_hdr *, struct pnfs_layout_range *range);
++void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
++			     const nfs4_stateid *stateid);
++void pnfs_destroy_layout(struct nfs_inode *);
++void pnfs_destroy_all_layouts(struct nfs_client *);
++void put_layout(struct inode *inode);
++void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
++int _pnfs_write_begin(struct inode *inode, struct page *page,
++		      loff_t pos, unsigned len,
++		      struct pnfs_layout_segment *lseg,
++		      struct pnfs_fsdata **fsdata);
++int _pnfs_write_end(struct inode *inode, struct page *page,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct pnfs_layout_segment *lseg);
++
++#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops &&	\
++				     (srv)->pnfs_curr_ld->ld_io_ops->opname)
++#define PNFS_EXISTS_LDPOLICY_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
++				     (srv)->pnfs_curr_ld->ld_policy_ops && \
++				     (srv)->pnfs_curr_ld->ld_policy_ops->opname)
++
++#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
++
++static inline int lo_fail_bit(u32 iomode)
++{
++	return iomode == IOMODE_RW ?
++			 NFS_INO_RW_LAYOUT_FAILED : NFS_INO_RO_LAYOUT_FAILED;
++}
++
++/* Return true if a layout driver is being used for this mountpoint */
++static inline int pnfs_enabled_sb(struct nfs_server *nfss)
++{
++	return nfss->pnfs_curr_ld != NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return !fsdata  || ((struct pnfs_layout_segment *)fsdata == lseg) ||
++		!fsdata->bypass_eof;
++}
++
++/* Should the pNFS client commit and return the layout upon a setattr */
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
++		return false;
++	return NFS_SERVER(inode)->pnfs_curr_ld->ld_policy_ops->flags &
++		PNFS_LAYOUTRET_ON_SETATTR;
++}
++
++/* Should the pNFS client commit and return the layout on close
++ */
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return nfsi->layout->roc_iomode;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++	int status = 0;
++
++	*fsdata = lseg;
++	if (lseg && PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++		status = _pnfs_write_begin(inode, page, pos, len, lseg,
++					   (struct pnfs_fsdata **) fsdata);
++	return status;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	struct inode *inode = filp->f_dentry->d_inode;
++	struct nfs_server *nfss = NFS_SERVER(inode);
++
++	if (PNFS_EXISTS_LDIO_OP(nfss, write_end))
++		return _pnfs_write_end(inode, page, pos, len, copied, lseg);
++	else
++		return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_end_cleanup))
++			nfss->pnfs_curr_ld->ld_io_ops->write_end_cleanup(filp, fsdata);
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			pnfs_free_fsdata(fsdata);
++	}
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct pnfs_layout_range *range,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	struct nfs_inode *nfsi = NFS_I(ino);
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss) &&
++	    (type != RETURN_FILE || has_layout(nfsi)))
++		return _pnfs_return_layout(ino, range, stateid, type, wait);
++
++	return 0;
++}
++
++static inline void pnfs_update_layout(struct inode *ino,
++	struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	struct nfs_server *nfss = NFS_SERVER(ino);
++
++	if (pnfs_enabled_sb(nfss))
++		_pnfs_update_layout(ino, ctx, pos, count, access_type, lsegpp);
++	else {
++		if (lsegpp)
++			*lsegpp = NULL;
++	}
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return data->pdata.pnfs_error;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	if (pnfs_enabled_sb(nfss))
++		return pnfs_ld_use_rpc_code(nfss->pnfs_curr_ld);
++
++	return 1;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	if (fsdata) {
++		struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
++
++		if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
++			return ((struct pnfs_fsdata *) fsdata)->lseg;
++	}
++	return fsdata;
++}
++#else  /* CONFIG_NFS_V4_1 */
++
++static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
++{
++}
++
++static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
++{
++}
++
++static inline void get_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void put_lseg(struct pnfs_layout_segment *lseg)
++{
++}
++
++static inline void
++pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
++	loff_t pos, u64 count, enum pnfs_iomode access_type,
++	struct pnfs_layout_segment **lsegpp)
++{
++	if (lsegpp)
++		*lsegpp = NULL;
++}
++
++static inline int pnfs_grow_ok(struct pnfs_layout_segment *lseg,
++			       struct pnfs_fsdata *fsdata)
++{
++	return 1;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_read_data(struct nfs_read_data *data,
++		      const struct rpc_call_ops *call_ops)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_write_data(struct nfs_write_data *data,
++		       const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline enum pnfs_try_status
++pnfs_try_to_commit(struct nfs_write_data *data,
++		   const struct rpc_call_ops *call_ops, int how)
++{
++	return PNFS_NOT_ATTEMPTED;
++}
++
++static inline int pnfs_write_begin(struct file *filp, struct page *page,
++				   loff_t pos, unsigned len,
++				   struct pnfs_layout_segment *lseg,
++				   void **fsdata)
++{
++	*fsdata = NULL;
++	return 0;
++}
++
++static inline int pnfs_write_end(struct file *filp, struct page *page,
++				 loff_t pos, unsigned len, unsigned copied,
++				 struct pnfs_layout_segment *lseg)
++{
++	return 0;
++}
++
++static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
++{
++}
++
++static inline int pnfs_get_write_status(struct nfs_write_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_get_read_status(struct nfs_read_data *data)
++{
++	return 0;
++}
++
++static inline int pnfs_use_rpc(struct nfs_server *nfss)
++{
++	return 1;
++}
++
++static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync)
++{
++	return 0;
++}
++
++static inline bool
++pnfs_ld_layoutret_on_setattr(struct inode *inode)
++{
++	return false;
++}
++
++static inline int
++pnfs_layout_roc_iomode(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++static inline int pnfs_return_layout(struct inode *ino,
++				     struct pnfs_layout_range *range,
++				     const nfs4_stateid *stateid, /* optional */
++				     enum pnfs_layoutreturn_type type,
++				     bool wait)
++{
++	return 0;
++}
++
++static inline struct pnfs_layout_segment *
++nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
++{
++	return NULL;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++#endif /* FS_NFS_PNFS_H */
+diff -up linux-2.6.34.noarch/fs/nfs/proc.c.orig linux-2.6.34.noarch/fs/nfs/proc.c
+--- linux-2.6.34.noarch/fs/nfs/proc.c.orig	2010-09-30 10:15:17.904725000 -0400
++++ linux-2.6.34.noarch/fs/nfs/proc.c	2010-09-30 10:17:08.764996000 -0400
+@@ -443,7 +443,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	fattr = nfs_alloc_fattr();
+ 	status = -ENOMEM;
+ 	if (fh == NULL || fattr == NULL)
+-		goto out;
++		goto out_free;
+ 
+ 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ 	nfs_mark_for_revalidate(dir);
+@@ -455,7 +455,7 @@ nfs_proc_symlink(struct inode *dir, stru
+ 	 */
+ 	if (status == 0)
+ 		status = nfs_instantiate(dentry, fh, fattr);
+-
++out_free:
+ 	nfs_free_fattr(fattr);
+ 	nfs_free_fhandle(fh);
+ out:
+@@ -694,6 +694,7 @@ const struct nfs_rpc_ops nfs_v2_clientop
+ 	.dentry_ops	= &nfs_dentry_operations,
+ 	.dir_inode_ops	= &nfs_dir_inode_operations,
+ 	.file_inode_ops	= &nfs_file_inode_operations,
++	.file_ops	= &nfs_file_operations,
+ 	.getroot	= nfs_proc_get_root,
+ 	.getattr	= nfs_proc_getattr,
+ 	.setattr	= nfs_proc_setattr,
+diff -up linux-2.6.34.noarch/fs/nfs/read.c.orig linux-2.6.34.noarch/fs/nfs/read.c
+--- linux-2.6.34.noarch/fs/nfs/read.c.orig	2010-09-30 10:15:17.910723000 -0400
++++ linux-2.6.34.noarch/fs/nfs/read.c	2010-09-30 10:17:08.770996000 -0400
+@@ -18,8 +18,12 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_page.h>
++#include <linux/smp_lock.h>
++#include <linux/module.h>
+ 
+ #include <asm/system.h>
++#include <linux/module.h>
++#include "pnfs.h"
+ 
+ #include "nfs4_fs.h"
+ #include "internal.h"
+@@ -117,11 +121,14 @@ int nfs_readpage_async(struct nfs_open_c
+ 	LIST_HEAD(one_request);
+ 	struct nfs_page	*new;
+ 	unsigned int len;
++	struct pnfs_layout_segment *lseg;
+ 
+ 	len = nfs_page_length(page);
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+-	new = nfs_create_request(ctx, inode, page, 0, len);
++	pnfs_update_layout(inode, ctx, 0, NFS4_MAX_UINT64, IOMODE_READ, &lseg);
++	new = nfs_create_request(ctx, inode, page, 0, len, lseg);
++	put_lseg(lseg);
+ 	if (IS_ERR(new)) {
+ 		unlock_page(page);
+ 		return PTR_ERR(new);
+@@ -155,24 +162,20 @@ static void nfs_readpage_release(struct 
+ 	nfs_release_request(req);
+ }
+ 
+-/*
+- * Set up the NFS read request struct
+- */
+-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset)
++int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		      const struct rpc_call_ops *call_ops)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+ 		.callback_data = data,
+@@ -180,9 +183,46 @@ static int nfs_read_rpcsetup(struct nfs_
+ 		.flags = RPC_TASK_ASYNC | swap_flags,
+ 	};
+ 
++	/* Set up the initial task struct. */
++	NFS_PROTO(inode)->read_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
++			data->task.tk_pid,
++			inode->i_sb->s_id,
++			(long long)NFS_FILEID(inode),
++			data->args.count,
++			(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_read);
++
++int pnfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
++		return pnfs_get_read_status(data);
++
++	return nfs_initiate_read(data, clnt, call_ops);
++}
++
++/*
++ * Set up the NFS read request struct
++ */
++static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	data->req	  = req;
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -190,27 +230,14 @@ static int nfs_read_rpcsetup(struct nfs_
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.count   = count;
+ 	data->res.eof     = 0;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct. */
+-	NFS_PROTO(inode)->read_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+-			data->task.tk_pid,
+-			inode->i_sb->s_id,
+-			(long long)NFS_FILEID(inode),
+-			count,
+-			(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	return pnfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ }
+ 
+ static void
+@@ -354,7 +381,14 @@ static void nfs_readpage_retry(struct rp
+ {
+ 	struct nfs_readargs *argp = &data->args;
+ 	struct nfs_readres *resp = &data->res;
++	struct nfs_client *clp = NFS_SERVER(data->inode)->nfs_client;
+ 
++#ifdef CONFIG_NFS_V4_1
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (resp->eof || resp->count == argp->count)
+ 		return;
+ 
+@@ -368,7 +402,10 @@ static void nfs_readpage_retry(struct rp
+ 	argp->offset += resp->count;
+ 	argp->pgbase += resp->count;
+ 	argp->count -= resp->count;
+-	nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++	data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++	nfs_restart_rpc(task, clp);
+ }
+ 
+ /*
+@@ -409,13 +446,19 @@ static void nfs_readpage_release_partial
+ void nfs_read_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_read_data *data = calldata;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	}
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
+ 				&data->args.seq_args, &data->res.seq_res,
+ 				0, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_read_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_read_partial_ops = {
+@@ -568,7 +611,8 @@ readpage_async_filler(void *data, struct
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
+ 
+-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
++	new = nfs_create_request(desc->ctx, inode, page, 0, len,
++				 desc->pgio->pg_lseg);
+ 	if (IS_ERR(new))
+ 		goto out_error;
+ 
+@@ -624,6 +668,9 @@ int nfs_readpages(struct file *filp, str
+ 	if (ret == 0)
+ 		goto read_complete; /* all pages were read */
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_read(&pgio, inode, desc.ctx, pages, &rsize);
++#endif /* CONFIG_NFS_V4_1 */
+ 	if (rsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
+ 	else
+@@ -632,6 +679,7 @@ int nfs_readpages(struct file *filp, str
+ 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ 
+ 	nfs_pageio_complete(&pgio);
++	put_lseg(pgio.pg_lseg);
+ 	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ 	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+ read_complete:
+diff -up linux-2.6.34.noarch/fs/nfs/super.c.orig linux-2.6.34.noarch/fs/nfs/super.c
+--- linux-2.6.34.noarch/fs/nfs/super.c.orig	2010-09-30 10:15:17.918722000 -0400
++++ linux-2.6.34.noarch/fs/nfs/super.c	2010-09-30 10:17:08.777998000 -0400
+@@ -64,6 +64,7 @@
+ #include "iostat.h"
+ #include "internal.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_VFS
+ 
+@@ -676,6 +677,28 @@ static int nfs_show_options(struct seq_f
+ 
+ 	return 0;
+ }
++#ifdef CONFIG_NFS_V4_1
++void show_sessions(struct seq_file *m, struct nfs_server *server)
++{
++	if (nfs4_has_session(server->nfs_client))
++		seq_printf(m, ",sessions");
++}
++#else
++void show_sessions(struct seq_file *m, struct nfs_server *server) {}
++#endif
++
++#ifdef CONFIG_NFS_V4_1
++void show_pnfs(struct seq_file *m, struct nfs_server *server)
++{
++	seq_printf(m, ",pnfs=");
++	if (server->pnfs_curr_ld)
++		seq_printf(m, "%s", server->pnfs_curr_ld->name);
++	else
++		seq_printf(m, "not configured");
++}
++#else  /* CONFIG_NFS_V4_1 */
++void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ /*
+  * Present statistical information for this VFS mountpoint
+@@ -714,6 +737,8 @@ static int nfs_show_stats(struct seq_fil
+ 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+ 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+ 		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
++		show_sessions(m, nfss);
++		show_pnfs(m, nfss);
+ 	}
+ #endif
+ 
+diff -up linux-2.6.34.noarch/fs/nfs/unlink.c.orig linux-2.6.34.noarch/fs/nfs/unlink.c
+--- linux-2.6.34.noarch/fs/nfs/unlink.c.orig	2010-09-30 10:15:17.932726000 -0400
++++ linux-2.6.34.noarch/fs/nfs/unlink.c	2010-09-30 10:17:08.783003000 -0400
+@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task 
+ 	struct nfs_unlinkdata *data = calldata;
+ 	struct nfs_server *server = NFS_SERVER(data->dir);
+ 
+-	if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
++	if (nfs4_setup_sequence(server, NULL, &data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+diff -up linux-2.6.34.noarch/fs/nfs/write.c.orig linux-2.6.34.noarch/fs/nfs/write.c
+--- linux-2.6.34.noarch/fs/nfs/write.c.orig	2010-09-30 10:15:05.044337000 -0400
++++ linux-2.6.34.noarch/fs/nfs/write.c	2010-09-30 10:17:08.789996000 -0400
+@@ -20,6 +20,7 @@
+ #include <linux/nfs_mount.h>
+ #include <linux/nfs_page.h>
+ #include <linux/backing-dev.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -28,6 +29,7 @@
+ #include "iostat.h"
+ #include "nfs4_fs.h"
+ #include "fscache.h"
++#include "pnfs.h"
+ 
+ #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+ 
+@@ -59,6 +61,7 @@ struct nfs_write_data *nfs_commitdata_al
+ 	}
+ 	return p;
+ }
++EXPORT_SYMBOL(nfs_commitdata_alloc);
+ 
+ void nfs_commit_free(struct nfs_write_data *p)
+ {
+@@ -66,6 +69,7 @@ void nfs_commit_free(struct nfs_write_da
+ 		kfree(p->pagevec);
+ 	mempool_free(p, nfs_commit_mempool);
+ }
++EXPORT_SYMBOL(nfs_commit_free);
+ 
+ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+ {
+@@ -418,6 +422,17 @@ static void nfs_inode_remove_request(str
+ 	nfs_clear_request(req);
+ 	nfs_release_request(req);
+ }
++static void
++nfs_mark_request_nopnfs(struct nfs_page *req)
++{
++	struct pnfs_layout_segment *lseg = req->wb_lseg;
++
++	if (req->wb_lseg == NULL)
++		return;
++	req->wb_lseg = NULL;
++	put_lseg(lseg);
++	dprintk(" retry through MDS\n");
++}
+ 
+ static void
+ nfs_mark_request_dirty(struct nfs_page *req)
+@@ -523,7 +538,7 @@ nfs_need_commit(struct nfs_inode *nfsi)
+  * The requests are *not* checked to ensure that they form a contiguous set.
+  */
+ static int
+-nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
++nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int *use_pnfs)
+ {
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 	int ret;
+@@ -531,7 +546,8 @@ nfs_scan_commit(struct inode *inode, str
+ 	if (!nfs_need_commit(nfsi))
+ 		return 0;
+ 
+-	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
++	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT,
++			    use_pnfs);
+ 	if (ret > 0)
+ 		nfsi->ncommit -= ret;
+ 	if (nfs_need_commit(NFS_I(inode)))
+@@ -560,7 +576,8 @@ static inline int nfs_scan_commit(struct
+ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+ 		struct page *page,
+ 		unsigned int offset,
+-		unsigned int bytes)
++		unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_page *req;
+ 	unsigned int rqend;
+@@ -585,8 +602,8 @@ static struct nfs_page *nfs_try_to_updat
+ 		 * Note: nfs_flush_incompatible() will already
+ 		 * have flushed out requests having wrong owners.
+ 		 */
+-		if (offset > rqend
+-		    || end < req->wb_offset)
++		if (offset > rqend || end < req->wb_offset ||
++		    req->wb_lseg != lseg)
+ 			goto out_flushme;
+ 
+ 		if (nfs_set_page_tag_locked(req))
+@@ -634,16 +651,17 @@ out_err:
+  * already called nfs_flush_incompatible() if necessary.
+  */
+ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+-		struct page *page, unsigned int offset, unsigned int bytes)
++		struct page *page, unsigned int offset, unsigned int bytes,
++		struct pnfs_layout_segment *lseg)
+ {
+ 	struct inode *inode = page->mapping->host;
+ 	struct nfs_page	*req;
+ 	int error;
+ 
+-	req = nfs_try_to_update_request(inode, page, offset, bytes);
++	req = nfs_try_to_update_request(inode, page, offset, bytes, lseg);
+ 	if (req != NULL)
+ 		goto out;
+-	req = nfs_create_request(ctx, inode, page, offset, bytes);
++	req = nfs_create_request(ctx, inode, page, offset, bytes, lseg);
+ 	if (IS_ERR(req))
+ 		goto out;
+ 	error = nfs_inode_add_request(inode, req);
+@@ -656,23 +674,27 @@ out:
+ }
+ 
+ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+-		unsigned int offset, unsigned int count)
++			       unsigned int offset, unsigned int count,
++			       struct pnfs_layout_segment *lseg,
++			       void *fsdata)
+ {
+ 	struct nfs_page	*req;
+ 
+-	req = nfs_setup_write_request(ctx, page, offset, count);
++	req = nfs_setup_write_request(ctx, page, offset, count, lseg);
+ 	if (IS_ERR(req))
+ 		return PTR_ERR(req);
+ 	nfs_mark_request_dirty(req);
+ 	/* Update file length */
+-	nfs_grow_file(page, offset, count);
++	if (pnfs_grow_ok(lseg, fsdata))
++		nfs_grow_file(page, offset, count);
+ 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	return 0;
+ }
+ 
+-int nfs_flush_incompatible(struct file *file, struct page *page)
++int nfs_flush_incompatible(struct file *file, struct page *page,
++			   struct pnfs_layout_segment *lseg)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct nfs_page	*req;
+@@ -689,7 +711,10 @@ int nfs_flush_incompatible(struct file *
+ 		req = nfs_page_find_request(page);
+ 		if (req == NULL)
+ 			return 0;
+-		do_flush = req->wb_page != page || req->wb_context != ctx;
++		do_flush = req->wb_page != page || req->wb_context != ctx ||
++			req->wb_lock_context->lockowner != current->files ||
++			req->wb_lock_context->pid != current->tgid ||
++			req->wb_lseg != lseg;
+ 		nfs_release_request(req);
+ 		if (!do_flush)
+ 			return 0;
+@@ -716,7 +741,8 @@ static int nfs_write_pageuptodate(struct
+  * things with a page scheduled for an RPC call (e.g. invalidate it).
+  */
+ int nfs_updatepage(struct file *file, struct page *page,
+-		unsigned int offset, unsigned int count)
++		   unsigned int offset, unsigned int count,
++		   struct pnfs_layout_segment *lseg, void *fsdata)
+ {
+ 	struct nfs_open_context *ctx = nfs_file_open_context(file);
+ 	struct inode	*inode = page->mapping->host;
+@@ -741,7 +767,7 @@ int nfs_updatepage(struct file *file, st
+ 		offset = 0;
+ 	}
+ 
+-	status = nfs_writepage_setup(ctx, page, offset, count);
++	status = nfs_writepage_setup(ctx, page, offset, count, lseg, fsdata);
+ 	if (status < 0)
+ 		nfs_set_pageerror(page);
+ 
+@@ -771,25 +797,21 @@ static int flush_task_priority(int how)
+ 	return RPC_PRIORITY_NORMAL;
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_write_rpcsetup(struct nfs_page *req,
+-		struct nfs_write_data *data,
+-		const struct rpc_call_ops *call_ops,
+-		unsigned int count, unsigned int offset,
+-		int how)
++int nfs_initiate_write(struct nfs_write_data *data,
++		       struct rpc_clnt *clnt,
++		       const struct rpc_call_ops *call_ops,
++		       int how)
+ {
+-	struct inode *inode = req->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = req->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.task = &data->task,
+ 		.rpc_message = &msg,
+ 		.callback_ops = call_ops,
+@@ -800,12 +822,62 @@ static int nfs_write_rpcsetup(struct nfs
+ 	};
+ 	int ret = 0;
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->write_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated write call "
++		"(req %s/%lld, %u bytes @ offset %llu)\n",
++		data->task.tk_pid,
++		inode->i_sb->s_id,
++		(long long)NFS_FILEID(inode),
++		data->args.count,
++		(unsigned long long)data->args.offset);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task)) {
++		ret = PTR_ERR(task);
++		goto out;
++	}
++	if (how & FLUSH_SYNC) {
++		ret = rpc_wait_for_completion_task(task);
++		if (ret == 0)
++			ret = task->tk_status;
++	}
++	rpc_put_task(task);
++out:
++	return ret;
++}
++EXPORT_SYMBOL(nfs_initiate_write);
++
++int pnfs_initiate_write(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
++{
++	if (data->req->wb_lseg &&
++	    (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_write(data, clnt, call_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_write_rpcsetup(struct nfs_page *req,
++		struct nfs_write_data *data,
++		const struct rpc_call_ops *call_ops,
++		unsigned int count, unsigned int offset,
++		int how)
++{
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	data->req = req;
+ 	data->inode = inode = req->wb_context->path.dentry->d_inode;
+-	data->cred = msg.rpc_cred;
++	data->cred = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+ 	data->args.offset = req_offset(req) + offset;
+@@ -813,6 +885,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->args.pages  = data->pagevec;
+ 	data->args.count  = count;
+ 	data->args.context = get_nfs_open_context(req->wb_context);
++	data->args.lock_context = req->wb_lock_context;
+ 	data->args.stable  = NFS_UNSTABLE;
+ 	if (how & FLUSH_STABLE) {
+ 		data->args.stable = NFS_DATA_SYNC;
+@@ -825,30 +898,7 @@ static int nfs_write_rpcsetup(struct nfs
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->write_setup(data, &msg);
+-
+-	dprintk("NFS: %5u initiated write call "
+-		"(req %s/%lld, %u bytes @ offset %llu)\n",
+-		data->task.tk_pid,
+-		inode->i_sb->s_id,
+-		(long long)NFS_FILEID(inode),
+-		count,
+-		(unsigned long long)data->args.offset);
+-
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task)) {
+-		ret = PTR_ERR(task);
+-		goto out;
+-	}
+-	if (how & FLUSH_SYNC) {
+-		ret = rpc_wait_for_completion_task(task);
+-		if (ret == 0)
+-			ret = task->tk_status;
+-	}
+-	rpc_put_task(task);
+-out:
+-	return ret;
++	return pnfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ }
+ 
+ /* If a nfs_flush_* function fails, it should remove reqs from @head and
+@@ -859,6 +909,7 @@ static void nfs_redirty_request(struct n
+ {
+ 	struct page *page = req->wb_page;
+ 
++	nfs_mark_request_nopnfs(req);
+ 	nfs_mark_request_dirty(req);
+ 	nfs_clear_page_tag_locked(req);
+ 	nfs_end_page_writeback(page);
+@@ -971,6 +1022,10 @@ static void nfs_pageio_init_write(struct
+ {
+ 	size_t wsize = NFS_SERVER(inode)->wsize;
+ 
++#ifdef CONFIG_NFS_V4_1
++	pnfs_pageio_init_write(pgio, inode, &wsize);
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	if (wsize < PAGE_CACHE_SIZE)
+ 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
+ 	else
+@@ -1036,13 +1091,27 @@ out:
+ void nfs_write_prepare(struct rpc_task *task, void *calldata)
+ {
+ 	struct nfs_write_data *data = calldata;
+-	struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client;
++	struct nfs4_session *ds_session = NULL;
+ 
+-	if (nfs4_setup_sequence(clp, &data->args.seq_args,
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS read\n", __func__);
++		ds_session = data->fldata.ds_nfs_client->cl_session;
++	} else if (data->args.count > NFS_SERVER(data->inode)->wsize) {
++		/* retrying via MDS? */
++		data->pdata.orig_count = data->args.count;
++		data->args.count = NFS_SERVER(data->inode)->wsize;
++		dprintk("%s: trimmed count %u to wsize %u\n", __func__,
++		data->pdata.orig_count, data->args.count);
++	} else
++		data->pdata.orig_count = 0;
++
++	if (nfs4_setup_sequence(NFS_SERVER(data->inode), ds_session,
++				&data->args.seq_args,
+ 				&data->res.seq_res, 1, task))
+ 		return;
+ 	rpc_call_start(task);
+ }
++EXPORT_SYMBOL(nfs_write_prepare);
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ static const struct rpc_call_ops nfs_write_partial_ops = {
+@@ -1126,10 +1195,11 @@ int nfs_writeback_done(struct rpc_task *
+ 	struct nfs_writeargs	*argp = &data->args;
+ 	struct nfs_writeres	*resp = &data->res;
+ 	struct nfs_server	*server = NFS_SERVER(data->inode);
++	struct nfs_client	*clp = server->nfs_client;
+ 	int status;
+ 
+-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
+-		task->tk_pid, task->tk_status);
++	dprintk("NFS: %5u nfs_writeback_done (status %d count %u)\n",
++		task->tk_pid, task->tk_status, resp->count);
+ 
+ 	/*
+ 	 * ->write_done will attempt to use post-op attributes to detect
+@@ -1142,6 +1212,13 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (status != 0)
+ 		return status;
+ 	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
++#ifdef CONFIG_NFS_V4_1
++	/* Is this a DS session */
++	if (data->fldata.ds_nfs_client) {
++		dprintk("%s DS write\n", __func__);
++		clp = data->fldata.ds_nfs_client;
++	}
++#endif /* CONFIG_NFS_V4_1 */
+ 
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+@@ -1158,7 +1235,7 @@ int nfs_writeback_done(struct rpc_task *
+ 		if (time_before(complain, jiffies)) {
+ 			dprintk("NFS:       faulty NFS server %s:"
+ 				" (committed = %d) != (stable = %d)\n",
+-				server->nfs_client->cl_hostname,
++				clp->cl_hostname,
+ 				resp->verf->committed, argp->stable);
+ 			complain = jiffies + 300 * HZ;
+ 		}
+@@ -1168,6 +1245,9 @@ int nfs_writeback_done(struct rpc_task *
+ 	if (task->tk_status >= 0 && resp->count < argp->count) {
+ 		static unsigned long    complain;
+ 
++		dprintk("NFS:       short write:"
++			" (resp->count %u) < (argp->count = %u)\n",
++			resp->count, argp->count);
+ 		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ 
+ 		/* Has the server at least made some progress? */
+@@ -1184,7 +1264,10 @@ int nfs_writeback_done(struct rpc_task *
+ 				 */
+ 				argp->stable = NFS_FILE_SYNC;
+ 			}
+-			nfs_restart_rpc(task, server->nfs_client);
++#ifdef CONFIG_NFS_V4_1
++			data->pdata.pnfs_error = -EAGAIN;
++#endif /* CONFIG_NFS_V4_1 */
++			nfs_restart_rpc(task, clp);
+ 			return -EAGAIN;
+ 		}
+ 		if (time_before(complain, jiffies)) {
+@@ -1228,40 +1311,73 @@ static void nfs_commitdata_release(void 
+ 	nfs_commit_free(wdata);
+ }
+ 
+-/*
+- * Set up the argument/result storage required for the RPC call.
+- */
+-static int nfs_commit_rpcsetup(struct list_head *head,
+-		struct nfs_write_data *data,
+-		int how)
++int nfs_initiate_commit(struct nfs_write_data *data,
++			struct rpc_clnt *clnt,
++			const struct rpc_call_ops *call_ops,
++			int how)
+ {
+-	struct nfs_page *first = nfs_list_entry(head->next);
+-	struct inode *inode = first->wb_context->path.dentry->d_inode;
++	struct inode *inode = data->inode;
+ 	int priority = flush_task_priority(how);
+ 	struct rpc_task *task;
+ 	struct rpc_message msg = {
+ 		.rpc_argp = &data->args,
+ 		.rpc_resp = &data->res,
+-		.rpc_cred = first->wb_context->cred,
++		.rpc_cred = data->cred,
+ 	};
+ 	struct rpc_task_setup task_setup_data = {
+ 		.task = &data->task,
+-		.rpc_client = NFS_CLIENT(inode),
++		.rpc_client = clnt,
+ 		.rpc_message = &msg,
+-		.callback_ops = &nfs_commit_ops,
++		.callback_ops = call_ops,
+ 		.callback_data = data,
+ 		.workqueue = nfsiod_workqueue,
+ 		.flags = RPC_TASK_ASYNC,
+ 		.priority = priority,
+ 	};
+ 
++	/* Set up the initial task struct.  */
++	NFS_PROTO(inode)->commit_setup(data, &msg);
++
++	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++
++	task = rpc_run_task(&task_setup_data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
++}
++EXPORT_SYMBOL(nfs_initiate_commit);
++
++
++int pnfs_initiate_commit(struct nfs_write_data *data,
++			 struct rpc_clnt *clnt,
++			 const struct rpc_call_ops *call_ops,
++			 int how, int pnfs)
++{
++	if (pnfs &&
++	    (pnfs_try_to_commit(data, &nfs_commit_ops, how) == PNFS_ATTEMPTED))
++		return pnfs_get_write_status(data);
++
++	return nfs_initiate_commit(data, clnt, &nfs_commit_ops, how);
++}
++
++/*
++ * Set up the argument/result storage required for the RPC call.
++ */
++static int nfs_commit_rpcsetup(struct list_head *head,
++		struct nfs_write_data *data,
++		int how, int pnfs)
++{
++	struct nfs_page *first = nfs_list_entry(head->next);
++	struct inode *inode = first->wb_context->path.dentry->d_inode;
++
+ 	/* Set up the RPC argument and reply structs
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	list_splice_init(head, &data->pages);
+ 
+ 	data->inode	  = inode;
+-	data->cred	  = msg.rpc_cred;
++	data->cred	  = first->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(data->inode);
+ 	/* Note: we always request a commit of the entire inode */
+@@ -1272,45 +1388,47 @@ static int nfs_commit_rpcsetup(struct li
+ 	data->res.fattr   = &data->fattr;
+ 	data->res.verf    = &data->verf;
+ 	nfs_fattr_init(&data->fattr);
++	kref_init(&data->refcount);
++	data->parent      = NULL;
++	data->args.context = first->wb_context;  /* used by commit done */
+ 
+-	/* Set up the initial task struct.  */
+-	NFS_PROTO(inode)->commit_setup(data, &msg);
++	return pnfs_initiate_commit(data, NFS_CLIENT(inode), &nfs_commit_ops,
++				    how, pnfs);
++}
+ 
+-	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
++/* Handle memory error during commit */
++void nfs_mark_list_commit(struct list_head *head)
++{
++	struct nfs_page         *req;
+ 
+-	task = rpc_run_task(&task_setup_data);
+-	if (IS_ERR(task))
+-		return PTR_ERR(task);
+-	rpc_put_task(task);
+-	return 0;
++	while (!list_empty(head)) {
++		req = nfs_list_entry(head->next);
++		nfs_list_remove_request(req);
++		nfs_mark_request_commit(req);
++		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
++		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
++				BDI_RECLAIMABLE);
++		nfs_clear_page_tag_locked(req);
++	}
+ }
++EXPORT_SYMBOL(nfs_mark_list_commit);
+ 
+ /*
+  * Commit dirty pages
+  */
+ static int
+-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
++nfs_commit_list(struct inode *inode, struct list_head *head, int how, int pnfs)
+ {
+ 	struct nfs_write_data	*data;
+-	struct nfs_page         *req;
+ 
+ 	data = nfs_commitdata_alloc();
+-
+ 	if (!data)
+ 		goto out_bad;
+ 
+ 	/* Set up the argument struct */
+-	return nfs_commit_rpcsetup(head, data, how);
++	return nfs_commit_rpcsetup(head, data, how, pnfs);
+  out_bad:
+-	while (!list_empty(head)) {
+-		req = nfs_list_entry(head->next);
+-		nfs_list_remove_request(req);
+-		nfs_mark_request_commit(req);
+-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+-				BDI_RECLAIMABLE);
+-		nfs_clear_page_tag_locked(req);
+-	}
++	nfs_mark_list_commit(head);
+ 	nfs_commit_clear_lock(NFS_I(inode));
+ 	return -ENOMEM;
+ }
+@@ -1330,6 +1448,19 @@ static void nfs_commit_done(struct rpc_t
+ 		return;
+ }
+ 
++static inline void nfs_commit_cleanup(struct kref *kref)
++{
++	struct nfs_write_data *data;
++
++	data = container_of(kref, struct nfs_write_data, refcount);
++	/* Clear lock only when all cloned commits are finished */
++	if (data->parent)
++		kref_put(&data->parent->refcount, nfs_commit_cleanup);
++	else
++		nfs_commit_clear_lock(NFS_I(data->inode));
++	nfs_commitdata_release(data);
++}
++
+ static void nfs_commit_release(void *calldata)
+ {
+ 	struct nfs_write_data	*data = calldata;
+@@ -1347,6 +1478,11 @@ static void nfs_commit_release(void *cal
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 		if (status < 0) {
++			if (req->wb_lseg) {
++				nfs_mark_request_nopnfs(req);
++				nfs_mark_request_dirty(req);
++				goto next;
++			}
+ 			nfs_context_set_write_error(req->wb_context, status);
+ 			nfs_inode_remove_request(req);
+ 			dprintk(", error = %d\n", status);
+@@ -1363,12 +1499,12 @@ static void nfs_commit_release(void *cal
+ 		}
+ 		/* We have a mismatch. Write the page again */
+ 		dprintk(" mismatch\n");
++		nfs_mark_request_nopnfs(req);
+ 		nfs_mark_request_dirty(req);
+ 	next:
+ 		nfs_clear_page_tag_locked(req);
+ 	}
+-	nfs_commit_clear_lock(NFS_I(data->inode));
+-	nfs_commitdata_release(calldata);
++	kref_put(&data->refcount, nfs_commit_cleanup);
+ }
+ 
+ static const struct rpc_call_ops nfs_commit_ops = {
+@@ -1384,21 +1520,22 @@ int nfs_commit_inode(struct inode *inode
+ 	LIST_HEAD(head);
+ 	int may_wait = how & FLUSH_SYNC;
+ 	int res = 0;
++	int use_pnfs = 0;
+ 
+ 	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ 		goto out_mark_dirty;
+ 	spin_lock(&inode->i_lock);
+-	res = nfs_scan_commit(inode, &head, 0, 0);
++	res = nfs_scan_commit(inode, &head, 0, 0, &use_pnfs);
+ 	spin_unlock(&inode->i_lock);
+ 	if (res) {
+-		int error = nfs_commit_list(inode, &head, how);
++		int error = nfs_commit_list(inode, &head, how, use_pnfs);
+ 		if (error < 0)
+ 			return error;
+-		if (may_wait)
++		if (may_wait) {
+ 			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+ 					nfs_wait_bit_killable,
+ 					TASK_KILLABLE);
+-		else
++		} else
+ 			goto out_mark_dirty;
+ 	} else
+ 		nfs_commit_clear_lock(NFS_I(inode));
+@@ -1451,7 +1588,18 @@ static int nfs_commit_unstable_pages(str
+ 
+ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+ {
+-	return nfs_commit_unstable_pages(inode, wbc);
++	int ret;
++	ret = nfs_commit_unstable_pages(inode, wbc);
++	if (ret >= 0 && layoutcommit_needed(NFS_I(inode))) {
++		int err, sync = wbc->sync_mode;
++
++		if (wbc->nonblocking || wbc->for_background)
++			sync = 0;
++		err = pnfs_layoutcommit_inode(inode, sync);
++		if (err < 0)
++			ret = err;
++	}
++	return ret;
+ }
+ 
+ /*
+@@ -1459,6 +1607,7 @@ int nfs_write_inode(struct inode *inode,
+  */
+ int nfs_wb_all(struct inode *inode)
+ {
++	int ret;
+ 	struct writeback_control wbc = {
+ 		.sync_mode = WB_SYNC_ALL,
+ 		.nr_to_write = LONG_MAX,
+@@ -1466,7 +1615,8 @@ int nfs_wb_all(struct inode *inode)
+ 		.range_end = LLONG_MAX,
+ 	};
+ 
+-	return sync_inode(inode, &wbc);
++	ret = sync_inode(inode, &wbc);
++	return ret;
+ }
+ 
+ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+diff -up linux-2.6.34.noarch/include/linux/exportfs.h.orig linux-2.6.34.noarch/include/linux/exportfs.h
+--- linux-2.6.34.noarch/include/linux/exportfs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/exportfs.h	2010-09-30 10:17:09.002005000 -0400
+@@ -2,6 +2,7 @@
+ #define LINUX_EXPORTFS_H 1
+ 
+ #include <linux/types.h>
++#include <linux/exp_xdr.h>
+ 
+ struct dentry;
+ struct inode;
+@@ -175,4 +176,62 @@ extern struct dentry *generic_fh_to_pare
+ 	struct fid *fid, int fh_len, int fh_type,
+ 	struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));
+ 
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct pnfs_filelayout_device;
++struct pnfs_filelayout_layout;
++
++extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				     const struct pnfs_filelayout_device *fdev);
++extern enum nfsstat4 filelayout_encode_layout(struct exp_xdr_stream *xdr,
++				      const struct pnfs_filelayout_layout *flp);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
++struct list_head;
++
++extern int blocklayout_encode_devinfo(struct exp_xdr_stream *xdr,
++				      const struct list_head *volumes);
++
++extern enum nfsstat4 blocklayout_encode_layout(struct exp_xdr_stream *xdr,
++					       const struct list_head *layouts);
++#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
++
++#if defined(CONFIG_PNFSD)
++#include <linux/module.h>
++
++struct pnfsd_cb_operations;
++
++struct pnfsd_cb_ctl {
++	spinlock_t lock;
++	struct module *module;
++	const struct pnfsd_cb_operations *cb_op;
++};
++
++/* in expfs.c so that file systems can depend on it */
++extern struct pnfsd_cb_ctl pnfsd_cb_ctl;
++
++static inline int
++pnfsd_get_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	int ret = -ENOENT;
++
++	spin_lock(&pnfsd_cb_ctl.lock);
++	if (!pnfsd_cb_ctl.cb_op)
++		goto out;
++	if (!try_module_get(pnfsd_cb_ctl.module))
++		goto out;
++	ctl->cb_op = pnfsd_cb_ctl.cb_op;
++	ctl->module = pnfsd_cb_ctl.module;
++	ret = 0;
++out:
++	spin_unlock(&pnfsd_cb_ctl.lock);
++	return ret;
++}
++
++static inline void
++pnfsd_put_cb_op(struct pnfsd_cb_ctl *ctl)
++{
++	module_put(ctl->module);
++}
++#endif /* CONFIG_PNFSD */
+ #endif /* LINUX_EXPORTFS_H */
+diff -up linux-2.6.34.noarch/include/linux/exp_xdr.h.orig linux-2.6.34.noarch/include/linux/exp_xdr.h
+--- linux-2.6.34.noarch/include/linux/exp_xdr.h.orig	2010-09-30 10:17:08.988005000 -0400
++++ linux-2.6.34.noarch/include/linux/exp_xdr.h	2010-09-30 10:17:08.990007000 -0400
+@@ -0,0 +1,141 @@
++#ifndef _LINUX_EXP_XDR_H
++#define _LINUX_EXP_XDR_H
++
++#include <asm/byteorder.h>
++#include <asm/unaligned.h>
++#include <linux/string.h>
++
++struct exp_xdr_stream {
++	__be32 *p;
++	__be32 *end;
++};
++
++/**
++ * exp_xdr_qwords - Calculate the number of quad-words holding nbytes
++ * @nbytes: number of bytes to encode
++ */
++static inline size_t
++exp_xdr_qwords(__u32 nbytes)
++{
++	return DIV_ROUND_UP(nbytes, 4);
++}
++
++/**
++ * exp_xdr_qbytes - Calculate the number of bytes holding qwords
++ * @qwords: number of quad-words to encode
++ */
++static inline size_t
++exp_xdr_qbytes(size_t qwords)
++{
++	return qwords << 2;
++}
++
++/**
++ * exp_xdr_reserve_space - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nbytes: number of bytes to reserve
++ *
++ * Checks that we have enough buffer space to encode 'nbytes' more
++ * bytes of data. If so, update the xdr stream.
++ */
++static inline __be32 *
++exp_xdr_reserve_space(struct exp_xdr_stream *xdr, size_t nbytes)
++{
++	__be32 *p = xdr->p;
++	__be32 *q;
++
++	/* align nbytes on the next 32-bit boundary */
++	q = p + exp_xdr_qwords(nbytes);
++	if (unlikely(q > xdr->end || q < p))
++		return NULL;
++	xdr->p = q;
++	return p;
++}
++
++/**
++ * exp_xdr_reserve_qwords - Reserve buffer space for sending
++ * @xdr: pointer to exp_xdr_stream
++ * @nwords: number of quad words (u32's) to reserve
++ */
++static inline __be32 *
++exp_xdr_reserve_qwords(struct exp_xdr_stream *xdr, size_t qwords)
++{
++	return exp_xdr_reserve_space(xdr, exp_xdr_qbytes(qwords));
++}
++
++/**
++ * exp_xdr_encode_u32 - Encode an unsigned 32-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u32(__be32 *p, __u32 val)
++{
++	*p = cpu_to_be32(val);
++	return p + 1;
++}
++
++/**
++ * exp_xdr_encode_u64 - Encode an unsigned 64-bit value onto a xdr stream
++ * @p: pointer to encoding destination
++ * @val: value to encode
++ */
++static inline __be32 *
++exp_xdr_encode_u64(__be32 *p, __u64 val)
++{
++	put_unaligned_be64(val, p);
++	return p + 2;
++}
++
++/**
++ * exp_xdr_encode_bytes - Encode an array of bytes onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the array of bytes
++ * @nbytes: number of bytes to encode
++ */
++static inline __be32 *
++exp_xdr_encode_bytes(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	if (likely(nbytes != 0)) {
++		unsigned int qwords = exp_xdr_qwords(nbytes);
++		unsigned int padding = exp_xdr_qbytes(qwords) - nbytes;
++
++		memcpy(p, ptr, nbytes);
++		if (padding != 0)
++			memset((char *)p + nbytes, 0, padding);
++		p += qwords;
++	}
++	return p;
++}
++
++/**
++ * exp_xdr_encode_opaque - Encode an opaque type onto a xdr stream
++ * @p: pointer to encoding destination
++ * @ptr: pointer to the opaque array
++ * @nbytes: number of bytes to encode
++ *
++ * Encodes the 32-bit opaque size in bytes followed by the opaque value.
++ */
++static inline __be32 *
++exp_xdr_encode_opaque(__be32 *p, const void *ptr, __u32 nbytes)
++{
++	p = exp_xdr_encode_u32(p, nbytes);
++	return exp_xdr_encode_bytes(p, ptr, nbytes);
++}
++
++/**
++ * exp_xdr_encode_opaque_qlen - Encode the opaque length onto a xdr stream
++ * @lenp: pointer to the opaque length destination
++ * @endp: pointer to the end of the opaque array
++ *
++ * Encodes the 32-bit opaque size in bytes given the start and end pointers
++ */
++static inline __be32 *
++exp_xdr_encode_opaque_len(__be32 *lenp, const void *endp)
++{
++	size_t nbytes = (char *)endp - (char *)(lenp + 1);
++
++	exp_xdr_encode_u32(lenp, nbytes);
++	return lenp + 1 + exp_xdr_qwords(nbytes);
++}
++#endif /* _LINUX_EXP_XDR_H */
+diff -up linux-2.6.34.noarch/include/linux/fs.h.orig linux-2.6.34.noarch/include/linux/fs.h
+--- linux-2.6.34.noarch/include/linux/fs.h.orig	2010-09-30 10:15:16.980690000 -0400
++++ linux-2.6.34.noarch/include/linux/fs.h	2010-09-30 10:17:09.015004000 -0400
+@@ -387,6 +387,7 @@ struct inodes_stat_t {
+ #include <asm/byteorder.h>
+ 
+ struct export_operations;
++struct pnfs_export_operations;
+ struct hd_geometry;
+ struct iovec;
+ struct nameidata;
+@@ -1329,6 +1330,7 @@ struct super_block {
+ 	const struct dquot_operations	*dq_op;
+ 	const struct quotactl_ops	*s_qcop;
+ 	const struct export_operations *s_export_op;
++	const struct pnfs_export_operations *s_pnfs_op;
+ 	unsigned long		s_flags;
+ 	unsigned long		s_magic;
+ 	struct dentry		*s_root;
+diff -up linux-2.6.34.noarch/include/linux/nfs4.h.orig linux-2.6.34.noarch/include/linux/nfs4.h
+--- linux-2.6.34.noarch/include/linux/nfs4.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4.h	2010-09-30 10:17:09.047005000 -0400
+@@ -17,7 +17,10 @@
+ 
+ #define NFS4_BITMAP_SIZE	2
+ #define NFS4_VERIFIER_SIZE	8
+-#define NFS4_STATEID_SIZE	16
++#define NFS4_CLIENTID_SIZE	8
++#define NFS4_STATEID_SEQID_SIZE 4
++#define NFS4_STATEID_OTHER_SIZE 12
++#define NFS4_STATEID_SIZE	(NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
+ #define NFS4_FHSIZE		128
+ #define NFS4_MAXPATHLEN		PATH_MAX
+ #define NFS4_MAXNAMLEN		NAME_MAX
+@@ -119,6 +122,13 @@
+ #define EXCHGID4_FLAG_MASK_A			0x40070003
+ #define EXCHGID4_FLAG_MASK_R			0x80070003
+ 
++static inline bool
++is_ds_only_session(u32 exchange_flags)
++{
++	u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;
++	return (exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
++}
++
+ #define SEQ4_STATUS_CB_PATH_DOWN		0x00000001
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING	0x00000002
+ #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED	0x00000004
+@@ -166,8 +176,25 @@ struct nfs4_acl {
+ 	struct nfs4_ace	aces[0];
+ };
+ 
++struct nfs4_fsid {
++	u64	major;
++	u64	minor;
++};
++
+ typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
+-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
++typedef struct { char data[NFS4_CLIENTID_SIZE]; } nfs4_clientid;
++
++struct nfs41_stateid {
++	__be32 seqid;
++	char other[NFS4_STATEID_OTHER_SIZE];
++} __attribute__ ((packed));
++
++typedef struct {
++	union {
++		char data[NFS4_STATEID_SIZE];
++		struct nfs41_stateid stateid;
++	} u;
++} nfs4_stateid;
+ 
+ enum nfs_opnum4 {
+ 	OP_ACCESS = 3,
+@@ -471,6 +498,8 @@ enum lock_type4 {
+ #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
+ #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
+ #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
++#define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
++#define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
+ 
+ #define NFSPROC4_NULL 0
+ #define NFSPROC4_COMPOUND 1
+@@ -523,6 +552,7 @@ enum {
+ 	NFSPROC4_CLNT_GETACL,
+ 	NFSPROC4_CLNT_SETACL,
+ 	NFSPROC4_CLNT_FS_LOCATIONS,
++	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
+ 
+ 	/* nfs41 */
+ 	NFSPROC4_CLNT_EXCHANGE_ID,
+@@ -531,6 +561,13 @@ enum {
+ 	NFSPROC4_CLNT_SEQUENCE,
+ 	NFSPROC4_CLNT_GET_LEASE_TIME,
+ 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
++	NFSPROC4_CLNT_LAYOUTGET,
++	NFSPROC4_CLNT_LAYOUTCOMMIT,
++	NFSPROC4_CLNT_LAYOUTRETURN,
++	NFSPROC4_CLNT_GETDEVICELIST,
++	NFSPROC4_CLNT_GETDEVICEINFO,
++	NFSPROC4_CLNT_PNFS_WRITE,
++	NFSPROC4_CLNT_PNFS_COMMIT,
+ };
+ 
+ /* nfs41 types */
+@@ -549,6 +586,43 @@ enum state_protect_how4 {
+ 	SP4_SSV		= 2
+ };
+ 
++enum pnfs_layouttype {
++	LAYOUT_NFSV4_1_FILES  = 1,
++	LAYOUT_OSD2_OBJECTS = 2,
++	LAYOUT_BLOCK_VOLUME = 3,
++};
++
++/* used for both layout return and recall */
++enum pnfs_layoutreturn_type {
++	RETURN_FILE = 1,
++	RETURN_FSID = 2,
++	RETURN_ALL  = 3
++};
++
++enum pnfs_iomode {
++	IOMODE_READ = 1,
++	IOMODE_RW = 2,
++	IOMODE_ANY = 3,
++};
++
++enum pnfs_notify_deviceid_type4 {
++	NOTIFY_DEVICEID4_CHANGE = 1 << 1,
++	NOTIFY_DEVICEID4_DELETE = 1 << 2,
++};
++
++#define NFL4_UFLG_MASK			0x0000003F
++#define NFL4_UFLG_DENSE			0x00000001
++#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
++#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
++
++/* Encoded in the loh_body field of type layouthint4 */
++enum filelayout_hint_care4 {
++	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
++	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
++	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
++	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
++};
++
+ #endif
+ #endif
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfs4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfs4_pnfs.h.orig	2010-09-30 10:17:09.057007000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs4_pnfs.h	2010-09-30 10:17:09.059005000 -0400
+@@ -0,0 +1,329 @@
++/*
++ *  include/linux/nfs4_pnfs.h
++ *
++ *  Common data structures needed by the pnfs client and pnfs layout driver.
++ *
++ *  Copyright (c) 2002 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Dean Hildebrand   <dhildebz@eecs.umich.edu>
++ */
++
++#ifndef LINUX_NFS4_PNFS_H
++#define LINUX_NFS4_PNFS_H
++
++#include <linux/nfs_page.h>
++
++enum pnfs_try_status {
++	PNFS_ATTEMPTED     = 0,
++	PNFS_NOT_ATTEMPTED = 1,
++};
++
++#define NFS4_PNFS_GETDEVLIST_MAXNUM 16
++
++/* Per-layout driver specific registration structure */
++struct pnfs_layoutdriver_type {
++	const u32 id;
++	const char *name;
++	struct layoutdriver_io_operations *ld_io_ops;
++	struct layoutdriver_policy_operations *ld_policy_ops;
++};
++
++struct pnfs_fsdata {
++	int bypass_eof;
++	struct pnfs_layout_segment *lseg;
++	void *private;
++};
++
++#if defined(CONFIG_NFS_V4_1)
++
++static inline struct nfs_inode *
++PNFS_NFS_INODE(struct pnfs_layout_hdr *lo)
++{
++	return NFS_I(lo->inode);
++}
++
++static inline struct inode *
++PNFS_INODE(struct pnfs_layout_hdr *lo)
++{
++	return lo->inode;
++}
++
++static inline struct nfs_server *
++PNFS_NFS_SERVER(struct pnfs_layout_hdr *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo));
++}
++
++static inline struct pnfs_layoutdriver_type *
++PNFS_LD(struct pnfs_layout_hdr *lo)
++{
++	return NFS_SERVER(PNFS_INODE(lo))->pnfs_curr_ld;
++}
++
++static inline struct layoutdriver_io_operations *
++PNFS_LD_IO_OPS(struct pnfs_layout_hdr *lo)
++{
++	return PNFS_LD(lo)->ld_io_ops;
++}
++
++static inline struct layoutdriver_policy_operations *
++PNFS_LD_POLICY_OPS(struct pnfs_layout_hdr *lo)
++{
++	return PNFS_LD(lo)->ld_policy_ops;
++}
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return nfsi->layout != NULL;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return has_layout(nfsi) &&
++	       test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->layout->state);
++}
++
++extern void put_lseg(struct pnfs_layout_segment *lseg);
++extern void get_lseg(struct pnfs_layout_segment *lseg);
++
++#else /* CONFIG_NFS_V4_1 */
++
++static inline bool
++has_layout(struct nfs_inode *nfsi)
++{
++	return false;
++}
++
++static inline bool
++layoutcommit_needed(struct nfs_inode *nfsi)
++{
++	return 0;
++}
++
++#endif /* CONFIG_NFS_V4_1 */
++
++struct pnfs_layout_segment {
++	struct list_head fi_list;
++	struct pnfs_layout_range range;
++	struct kref kref;
++	bool valid;
++	struct pnfs_layout_hdr *layout;
++	struct nfs4_deviceid *deviceid;
++	u8 ld_data[];			/* layout driver private data */
++};
++
++static inline void *
++LSEG_LD_DATA(struct pnfs_layout_segment *lseg)
++{
++	return lseg->ld_data;
++}
++
++/* Layout driver I/O operations.
++ * Either the pagecache or non-pagecache read/write operations must be implemented
++ */
++struct layoutdriver_io_operations {
++	/* Functions that use the pagecache.
++	 * If use_pagecache == 1, then these functions must be implemented.
++	 */
++	/* read and write pagelist should return just 0 (to indicate that
++	 * the layout code has taken control) or 1 (to indicate that the
++	 * layout code wishes to fall back to normal nfs.)  If 0 is returned,
++	 * information can be passed back through nfs_data->res and
++	 * nfs_data->task.tk_status, and the appropriate pnfs done function
++	 * MUST be called.
++	 */
++	enum pnfs_try_status
++	(*read_pagelist) (struct nfs_read_data *nfs_data, unsigned nr_pages);
++	enum pnfs_try_status
++	(*write_pagelist) (struct nfs_write_data *nfs_data, unsigned nr_pages, int how);
++	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
++			    loff_t pos, unsigned count,
++			    struct pnfs_fsdata *fsdata);
++	int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
++			 unsigned count, unsigned copied,
++			 struct pnfs_layout_segment *lseg);
++	void (*write_end_cleanup)(struct file *filp,
++				  struct pnfs_fsdata *fsdata);
++
++	/* Consistency ops */
++	/* 2 problems:
++	 * 1) the page list contains nfs_pages, NOT pages
++	 * 2) currently the NFS code doesn't create a page array (as it does with read/write)
++	 */
++	enum pnfs_try_status
++	(*commit) (struct nfs_write_data *nfs_data, int how);
++
++	/* Layout information. For each inode, alloc_layout is executed once to retrieve an
++	 * inode specific layout structure.  Each subsequent layoutget operation results in
++	 * a set_layout call to set the opaque layout in the layout driver.*/
++	struct pnfs_layout_hdr * (*alloc_layout) (struct inode *inode);
++	void (*free_layout) (struct pnfs_layout_hdr *);
++	struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
++	void (*free_lseg) (struct pnfs_layout_segment *lseg);
++
++	int (*setup_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				   struct nfs4_layoutcommit_args *args);
++	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				     struct xdr_stream *xdr,
++				     const struct nfs4_layoutcommit_args *args);
++	void (*cleanup_layoutcommit) (struct pnfs_layout_hdr *layoutid,
++				      struct nfs4_layoutcommit_args *args,
++				      int status);
++	void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid,
++				struct xdr_stream *xdr,
++				const struct nfs4_layoutreturn_args *args);
++
++	/* Registration information for a new mounted file system
++	 */
++	int (*initialize_mountpoint) (struct nfs_server *,
++				      const struct nfs_fh * mntfh);
++	int (*uninitialize_mountpoint) (struct nfs_server *server);
++};
++
++enum layoutdriver_policy_flags {
++	/* Should the full nfs rpc cleanup code be used after io */
++	PNFS_USE_RPC_CODE		= 1 << 0,
++
++	/* Should the NFS req. gather algorithm cross stripe boundaries? */
++	PNFS_GATHER_ACROSS_STRIPES	= 1 << 1,
++
++	/* Should the pNFS client commit and return the layout upon a setattr */
++	PNFS_LAYOUTRET_ON_SETATTR	= 1 << 3,
++};
++
++struct layoutdriver_policy_operations {
++	unsigned flags;
++
++	/* The stripe size of the file system */
++	ssize_t (*get_stripesize) (struct pnfs_layout_hdr *layoutid);
++
++	/* test for nfs page cache coalescing */
++	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++
++	/* Retreive the block size of the file system.
++	 * If gather_across_stripes == 1, then the file system will gather
++	 * requests into the block size.
++	 * TODO: Where will the layout driver get this info?  It is hard
++	 * coded in PVFS2.
++	 */
++	ssize_t (*get_blocksize) (void);
++};
++
++/* Should the full nfs rpc cleanup code be used after io */
++static inline int
++pnfs_ld_use_rpc_code(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_USE_RPC_CODE;
++}
++
++/* Should the NFS req. gather algorithm cross stripe boundaries? */
++static inline int
++pnfs_ld_gather_across_stripes(struct pnfs_layoutdriver_type *ld)
++{
++	return ld->ld_policy_ops->flags & PNFS_GATHER_ACROSS_STRIPES;
++}
++
++struct pnfs_device {
++	struct pnfs_deviceid dev_id;
++	unsigned int  layout_type;
++	unsigned int  mincount;
++	struct page **pages;
++	void          *area;
++	unsigned int  pgbase;
++	unsigned int  pglen;
++	unsigned int  dev_notify_types;
++};
++
++struct pnfs_devicelist {
++	unsigned int		eof;
++	unsigned int		num_devs;
++	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
++};
++
++/*
++ * Device ID RCU cache. A device ID is unique per client ID and layout type.
++ */
++#define NFS4_DEVICE_ID_HASH_BITS	5
++#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
++#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
++
++static inline u32
++nfs4_deviceid_hash(struct pnfs_deviceid *id)
++{
++	unsigned char *cptr = (unsigned char *)id->data;
++	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
++	u32 x = 0;
++
++	while (nbytes--) {
++		x *= 37;
++		x += *cptr++;
++	}
++	return x & NFS4_DEVICE_ID_HASH_MASK;
++}
++
++struct nfs4_deviceid_cache {
++	spinlock_t		dc_lock;
++	struct kref		dc_kref;
++	void			(*dc_free_callback)(struct kref *);
++	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
++};
++
++/* Device ID cache node */
++struct nfs4_deviceid {
++	struct hlist_node	de_node;
++	struct pnfs_deviceid	de_id;
++	struct kref		de_kref;
++};
++
++extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_put_deviceid_cache(struct nfs_client *);
++extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
++extern struct nfs4_deviceid *nfs4_find_get_deviceid(
++				struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++extern struct nfs4_deviceid *nfs4_add_get_deviceid(struct nfs4_deviceid_cache *,
++				struct nfs4_deviceid *);
++extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *);
++extern void nfs4_put_unset_layout_deviceid(struct pnfs_layout_segment *,
++				struct nfs4_deviceid *,
++				void (*free_callback)(struct kref *));
++extern void nfs4_delete_device(struct nfs4_deviceid_cache *,
++				struct pnfs_deviceid *);
++
++/* pNFS client callback functions.
++ * These operations allow the layout driver to access pNFS client
++ * specific information or call pNFS client->server operations.
++ * E.g., getdeviceinfo, I/O callbacks, etc
++ */
++struct pnfs_client_operations {
++	int (*nfs_getdevicelist) (struct nfs_server *,
++				  const struct nfs_fh *fh,
++				  struct pnfs_devicelist *devlist);
++	int (*nfs_getdeviceinfo) (struct nfs_server *,
++				  struct pnfs_device *dev);
++
++	/* Post read callback. */
++	void (*nfs_readlist_complete) (struct nfs_read_data *nfs_data);
++
++	/* Post write callback. */
++	void (*nfs_writelist_complete) (struct nfs_write_data *nfs_data);
++
++	/* Post commit callback. */
++	void (*nfs_commit_complete) (struct nfs_write_data *nfs_data);
++	void (*nfs_return_layout) (struct inode *);
++};
++
++extern struct pnfs_client_operations pnfs_ops;
++
++extern struct pnfs_client_operations *pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
++extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
++
++#define NFS4_PNFS_MAX_LAYOUTS 4
++#define NFS4_PNFS_PRIVATE_LAYOUT 0x80000000
++
++#endif /* LINUX_NFS4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig linux-2.6.34.noarch/include/linux/nfsd4_block.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_block.h.orig	2010-09-30 10:17:09.178011000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_block.h	2010-09-30 10:17:09.180010000 -0400
+@@ -0,0 +1,101 @@
++#ifndef NFSD4_BLOCK
++#define NFSD4_BLOCK
++
++#include <linux/sunrpc/svc.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/nfsd/nfsfh.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++#define PNFS_BLOCK_SUCCESS		1
++#define PNFS_BLOCK_FAILURE		0
++
++#define PNFS_BLOCK_CTL_START		1
++#define PNFS_BLOCK_CTL_STOP		2
++#define PNFS_BLOCK_CTL_VERS		3 /* Allows daemon to request current
++					   * version from kernel via an upcall.
++					   */
++
++#define PNFS_UPCALL_MSG_STOP	0
++#define PNFS_UPCALL_MSG_GETSIG	1
++#define PNFS_UPCALL_MSG_GETSLICE	2
++#define PNFS_UPCALL_MSG_DMCHK	3	// See if dev_t is a DM volume
++#define PNFS_UPCALL_MSG_DMGET	4
++#define PNFS_UPCALL_MSG_VERS	5
++
++#define PNFS_UPCALL_VERS		8
++
++typedef struct stripe_dev {
++	int	major,
++		minor,
++		offset;
++} stripe_dev_t;
++
++typedef struct bl_comm_res {
++	int				res_status;
++	union {
++		struct {
++			long long	start,
++					length;
++		} slice;
++		struct {
++			int		num_stripes,
++					stripe_size;
++			stripe_dev_t	devs[];
++		} stripe;
++		struct {
++			long long	sector;
++			int		offset,
++					len;
++			char		sig[];
++		} sig;
++		int			vers,
++					dm_vol;
++	} u;
++} bl_comm_res_t;
++
++typedef struct bl_comm_msg {
++	int		msg_type,
++			msg_status;
++	union {
++		dev_t	msg_dev;
++		int	msg_vers;
++	} u;
++	bl_comm_res_t	*msg_res;
++} bl_comm_msg_t;
++
++#ifdef __KERNEL__
++
++typedef struct bl_comm {
++	/* ---- protects access to this structure ---- */
++	struct mutex		lock;
++	/* ---- protects access to rpc pipe ---- */
++	struct mutex		pipe_lock;
++	struct dentry		*pipe_dentry;
++	wait_queue_head_t	pipe_wq;
++	bl_comm_msg_t		msg;
++} bl_comm_t;
++
++int pnfs_block_enabled(struct inode *, int);
++int bl_layout_type(struct super_block *sb);
++int bl_getdeviceiter(struct super_block *, u32 layout_type,
++		     struct nfsd4_pnfs_dev_iter_res *);
++int bl_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++		     u32 layout_type,
++		     const struct nfsd4_pnfs_deviceid *);
++enum nfsstat4 bl_layoutget(struct inode *, struct exp_xdr_stream *,
++			   const struct nfsd4_pnfs_layoutget_arg *,
++			   struct nfsd4_pnfs_layoutget_res *);
++int bl_layoutcommit(struct inode *,
++		    const struct nfsd4_pnfs_layoutcommit_arg *,
++		    struct nfsd4_pnfs_layoutcommit_res *);
++int bl_layoutreturn(struct inode *,
++		    const struct nfsd4_pnfs_layoutreturn_arg *);
++int bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len);
++int bl_init_proc(void);
++int bl_upcall(bl_comm_t *, bl_comm_msg_t *, bl_comm_res_t **);
++
++extern bl_comm_t	*bl_comm_global;	// Ugly...
++#endif /* __KERNEL__ */
++
++#endif /* NFSD4_BLOCK */
++
+diff -up linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h.orig	2010-09-30 10:17:09.190013000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd4_spnfs.h	2010-09-30 10:17:09.192012000 -0400
+@@ -0,0 +1,345 @@
++/*
++ * include/linux/nfsd4_spnfs.h
++ *
++ * spNFS - simple pNFS implementation with userspace daemon
++ *
++ */
++
++/******************************************************************************
++
++(c) 2007 Network Appliance, Inc.  All Rights Reserved.
++
++Network Appliance provides this source code under the GPL v2 License.
++The GPL v2 license is available at
++http://opensource.org/licenses/gpl-license.php.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++******************************************************************************/
++
++#ifndef NFS_SPNFS_H
++#define NFS_SPNFS_H
++
++
++#ifdef __KERNEL__
++#include "exportfs.h"
++#include "sunrpc/svc.h"
++#include "nfsd/nfsfh.h"
++#else
++#include <sys/types.h>
++#endif /* __KERNEL__ */
++
++#define SPNFS_STATUS_INVALIDMSG		0x01
++#define SPNFS_STATUS_AGAIN		0x02
++#define SPNFS_STATUS_FAIL		0x04
++#define SPNFS_STATUS_SUCCESS		0x08
++
++#define SPNFS_TYPE_LAYOUTGET		0x01
++#define SPNFS_TYPE_LAYOUTCOMMIT		0x02
++#define SPNFS_TYPE_LAYOUTRETURN		0x03
++#define SPNFS_TYPE_GETDEVICEITER	0x04
++#define SPNFS_TYPE_GETDEVICEINFO	0x05
++#define SPNFS_TYPE_SETATTR		0x06
++#define SPNFS_TYPE_OPEN			0x07
++#define	SPNFS_TYPE_CLOSE		0x08
++#define SPNFS_TYPE_CREATE		0x09
++#define SPNFS_TYPE_REMOVE		0x0a
++#define SPNFS_TYPE_COMMIT		0x0b
++#define SPNFS_TYPE_READ			0x0c
++#define SPNFS_TYPE_WRITE		0x0d
++
++#define	SPNFS_MAX_DEVICES		1
++#define	SPNFS_MAX_DATA_SERVERS		16
++#define SPNFS_MAX_IO			512
++
++/* layout */
++struct spnfs_msg_layoutget_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_filelayout_list {
++	u_int32_t       fh_len;
++	unsigned char   fh_val[128]; /* DMXXX fix this const */
++};
++
++struct spnfs_msg_layoutget_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t stripe_size;
++	u_int32_t stripe_type;
++	u_int32_t stripe_count;
++	struct spnfs_filelayout_list flist[SPNFS_MAX_DATA_SERVERS];
++};
++
++/* layoutcommit */
++struct spnfs_msg_layoutcommit_args {
++	unsigned long inode;
++	unsigned long generation;
++	u_int64_t file_size;
++};
++
++struct spnfs_msg_layoutcommit_res {
++	int status;
++};
++
++/* layoutreturn */
++/* No op for the daemon */
++/*
++struct spnfs_msg_layoutreturn_args {
++};
++
++struct spnfs_msg_layoutreturn_res {
++};
++*/
++
++/* getdeviceiter */
++struct spnfs_msg_getdeviceiter_args {
++	unsigned long inode;
++	u_int64_t cookie;
++	u_int64_t verf;
++};
++
++struct spnfs_msg_getdeviceiter_res {
++	int status;
++	u_int64_t devid;
++	u_int64_t cookie;
++	u_int64_t verf;
++	u_int32_t eof;
++};
++
++/* getdeviceinfo */
++struct spnfs_data_server {
++	u_int32_t dsid;
++	char netid[5];
++	char addr[29];
++};
++
++struct spnfs_device {
++	u_int64_t devid;
++	int dscount;
++	struct spnfs_data_server dslist[SPNFS_MAX_DATA_SERVERS];
++};
++
++struct spnfs_msg_getdeviceinfo_args {
++	u_int64_t devid;
++};
++
++struct spnfs_msg_getdeviceinfo_res {
++	int status;
++	struct spnfs_device devinfo;
++};
++
++/* setattr */
++struct spnfs_msg_setattr_args {
++	unsigned long inode;
++	unsigned long generation;
++	int file_size;
++};
++
++struct spnfs_msg_setattr_res {
++	int status;
++};
++
++/* open */
++struct spnfs_msg_open_args {
++	unsigned long inode;
++	unsigned long generation;
++	int create;
++	int createmode;
++	int truncate;
++};
++
++struct spnfs_msg_open_res {
++	int status;
++};
++
++/* close */
++/* No op for daemon */
++struct spnfs_msg_close_args {
++	int x;
++};
++
++struct spnfs_msg_close_res {
++	int y;
++};
++
++/* create */
++/*
++struct spnfs_msg_create_args {
++	int x;
++};
++
++struct spnfs_msg_create_res {
++	int y;
++};
++*/
++
++/* remove */
++struct spnfs_msg_remove_args {
++	unsigned long inode;
++	unsigned long generation;
++};
++
++struct spnfs_msg_remove_res {
++	int status;
++};
++
++/* commit */
++/*
++struct spnfs_msg_commit_args {
++	int x;
++};
++
++struct spnfs_msg_commit_res {
++	int y;
++};
++*/
++
++/* read */
++struct spnfs_msg_read_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++};
++
++struct spnfs_msg_read_res {
++	int status;
++	char data[SPNFS_MAX_IO];
++};
++
++/* write */
++struct spnfs_msg_write_args {
++	unsigned long inode;
++	unsigned long generation;
++	loff_t offset;
++	unsigned long len;
++	char data[SPNFS_MAX_IO];
++};
++
++struct spnfs_msg_write_res {
++	int status;
++};
++
++/* bundle args and responses */
++union spnfs_msg_args {
++	struct spnfs_msg_layoutget_args		layoutget_args;
++	struct spnfs_msg_layoutcommit_args	layoutcommit_args;
++/*
++	struct spnfs_msg_layoutreturn_args	layoutreturn_args;
++*/
++	struct spnfs_msg_getdeviceiter_args     getdeviceiter_args;
++	struct spnfs_msg_getdeviceinfo_args     getdeviceinfo_args;
++	struct spnfs_msg_setattr_args		setattr_args;
++	struct spnfs_msg_open_args		open_args;
++	struct spnfs_msg_close_args		close_args;
++/*
++	struct spnfs_msg_create_args		create_args;
++*/
++	struct spnfs_msg_remove_args		remove_args;
++/*
++	struct spnfs_msg_commit_args		commit_args;
++*/
++	struct spnfs_msg_read_args		read_args;
++	struct spnfs_msg_write_args		write_args;
++};
++
++union spnfs_msg_res {
++	struct spnfs_msg_layoutget_res		layoutget_res;
++	struct spnfs_msg_layoutcommit_res	layoutcommit_res;
++/*
++	struct spnfs_msg_layoutreturn_res	layoutreturn_res;
++*/
++	struct spnfs_msg_getdeviceiter_res      getdeviceiter_res;
++	struct spnfs_msg_getdeviceinfo_res      getdeviceinfo_res;
++	struct spnfs_msg_setattr_res		setattr_res;
++	struct spnfs_msg_open_res		open_res;
++	struct spnfs_msg_close_res		close_res;
++/*
++	struct spnfs_msg_create_res		create_res;
++*/
++	struct spnfs_msg_remove_res		remove_res;
++/*
++	struct spnfs_msg_commit_res		commit_res;
++*/
++	struct spnfs_msg_read_res		read_res;
++	struct spnfs_msg_write_res		write_res;
++};
++
++/* a spnfs message, args and response */
++struct spnfs_msg {
++	unsigned char		im_type;
++	unsigned char		im_status;
++	union spnfs_msg_args	im_args;
++	union spnfs_msg_res	im_res;
++};
++
++/* spnfs configuration info */
++struct spnfs_config {
++	unsigned char		dense_striping;
++	int			stripe_size;
++	int			num_ds;
++	char			ds_dir[SPNFS_MAX_DATA_SERVERS][80];  /* XXX */
++};
++
++#if defined(__KERNEL__) && defined(CONFIG_SPNFS)
++
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* pipe mgmt structure.  messages flow through here */
++struct spnfs {
++	struct dentry		*spnfs_dentry;    /* dentry for pipe */
++	wait_queue_head_t	spnfs_wq;
++	struct spnfs_msg	spnfs_im;         /* spnfs message */
++	struct mutex		spnfs_lock;       /* Serializes upcalls */
++	struct mutex		spnfs_plock;
++};
++
++struct nfsd4_open;
++
++int spnfs_layout_type(struct super_block *);
++enum nfsstat4 spnfs_layoutget(struct inode *, struct exp_xdr_stream *xdr,
++			      const struct nfsd4_pnfs_layoutget_arg *,
++			      struct nfsd4_pnfs_layoutget_res *);
++int spnfs_layoutcommit(void);
++int spnfs_layoutreturn(struct inode *,
++		       const struct nfsd4_pnfs_layoutreturn_arg *);
++int spnfs_getdeviceiter(struct super_block *,
++			u32 layout_type,
++			struct nfsd4_pnfs_dev_iter_res *);
++int spnfs_getdeviceinfo(struct super_block *, struct exp_xdr_stream *,
++			u32 layout_type,
++			const struct nfsd4_pnfs_deviceid *);
++int spnfs_setattr(void);
++int spnfs_open(struct inode *, struct nfsd4_open *);
++int spnfs_get_state(struct inode *, struct knfsd_fh *, struct pnfs_get_state *);
++int spnfs_remove(unsigned long, unsigned long);
++__be32 spnfs_read(struct inode *, loff_t, unsigned long *,
++		  int, struct svc_rqst *);
++__be32 spnfs_write(struct inode *, loff_t, size_t, int, struct svc_rqst *);
++int spnfs_getfh(int, struct nfs_fh *);
++int spnfs_test_layoutrecall(char *, u64, u64);
++int spnfs_layoutrecall(struct inode *, int, u64, u64);
++
++int nfsd_spnfs_new(void);
++void nfsd_spnfs_delete(void);
++int spnfs_upcall(struct spnfs *, struct spnfs_msg *, union spnfs_msg_res *);
++int spnfs_enabled(void);
++int spnfs_init_proc(void);
++
++extern struct spnfs_config *spnfs_config;
++
++#endif /* __KERNEL__ && CONFIG_SPNFS */
++
++#endif /* NFS_SPNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/const.h.orig linux-2.6.34.noarch/include/linux/nfsd/const.h
+--- linux-2.6.34.noarch/include/linux/nfsd/const.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/const.h	2010-09-30 10:17:09.139009000 -0400
+@@ -29,6 +29,7 @@
+ #ifdef __KERNEL__
+ 
+ #include <linux/sunrpc/msg_prot.h>
++#include <linux/sunrpc/svc.h>
+ 
+ /*
+  * Largest number of bytes we need to allocate for an NFS
+diff -up linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig linux-2.6.34.noarch/include/linux/nfsd/debug.h
+--- linux-2.6.34.noarch/include/linux/nfsd/debug.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/debug.h	2010-09-30 10:17:09.144010000 -0400
+@@ -32,6 +32,8 @@
+ #define NFSDDBG_REPCACHE	0x0080
+ #define NFSDDBG_XDR		0x0100
+ #define NFSDDBG_LOCKD		0x0200
++#define NFSDDBG_PNFS		0x0400
++#define NFSDDBG_FILELAYOUT	0x0800
+ #define NFSDDBG_ALL		0x7FFF
+ #define NFSDDBG_NOCHANGE	0xFFFF
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfsd/export.h.orig linux-2.6.34.noarch/include/linux/nfsd/export.h
+--- linux-2.6.34.noarch/include/linux/nfsd/export.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/export.h	2010-09-30 10:17:09.149010000 -0400
+@@ -100,6 +100,7 @@ struct svc_export {
+ 	uid_t			ex_anon_uid;
+ 	gid_t			ex_anon_gid;
+ 	int			ex_fsid;
++	int			ex_pnfs;
+ 	unsigned char *		ex_uuid; /* 16 byte fsid */
+ 	struct nfsd4_fs_locations ex_fslocs;
+ 	int			ex_nflavors;
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h.orig	2010-09-30 10:17:09.153006000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4layoutxdr.h	2010-09-30 10:17:09.154012000 -0400
+@@ -0,0 +1,132 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef NFSD_NFS4LAYOUTXDR_H
++#define NFSD_NFS4LAYOUTXDR_H
++
++#include <linux/sunrpc/xdr.h>
++#include <linux/nfsd/nfsd4_pnfs.h>
++
++/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
++struct pnfs_filelayout_devaddr {
++	struct xdr_netobj	r_netid;
++	struct xdr_netobj	r_addr;
++};
++
++/* list of multipath servers */
++struct pnfs_filelayout_multipath {
++	u32				fl_multipath_length;
++	struct pnfs_filelayout_devaddr 	*fl_multipath_list;
++};
++
++struct pnfs_filelayout_device {
++	u32					fl_stripeindices_length;
++	u32       		 		*fl_stripeindices_list;
++	u32					fl_device_length;
++	struct pnfs_filelayout_multipath 	*fl_device_list;
++};
++
++struct pnfs_filelayout_layout {
++	u32                             lg_layout_type; /* response */
++	u32                             lg_stripe_type; /* response */
++	u32                             lg_commit_through_mds; /* response */
++	u64                             lg_stripe_unit; /* response */
++	u64                             lg_pattern_offset; /* response */
++	u32                             lg_first_stripe_index;	/* response */
++	struct nfsd4_pnfs_deviceid	device_id;		/* response */
++	u32                             lg_fh_length;		/* response */
++	struct knfsd_fh                 *lg_fh_list;		/* response */
++};
++
++enum stripetype4 {
++	STRIPE_SPARSE = 1,
++	STRIPE_DENSE = 2
++};
++
++enum pnfs_block_extent_state4 {
++        PNFS_BLOCK_READWRITE_DATA       = 0,
++        PNFS_BLOCK_READ_DATA            = 1,
++        PNFS_BLOCK_INVALID_DATA         = 2,
++        PNFS_BLOCK_NONE_DATA            = 3
++};
++
++enum pnfs_block_volume_type4 {
++        PNFS_BLOCK_VOLUME_SIMPLE = 0,
++        PNFS_BLOCK_VOLUME_SLICE = 1,
++        PNFS_BLOCK_VOLUME_CONCAT = 2,
++        PNFS_BLOCK_VOLUME_STRIPE = 3,
++};
++typedef enum pnfs_block_volume_type4 pnfs_block_volume_type4;
++
++enum bl_cache_state {
++	BLOCK_LAYOUT_NEW	= 0,
++	BLOCK_LAYOUT_CACHE	= 1,
++	BLOCK_LAYOUT_UPDATE	= 2,
++};
++
++typedef struct pnfs_blocklayout_layout {
++        struct list_head                bll_list;
++        struct nfsd4_pnfs_deviceid      bll_vol_id;
++        u64                             bll_foff;	// file offset
++        u64                             bll_len;
++        u64                             bll_soff;	// storage offset
++	int				bll_recalled;
++        enum pnfs_block_extent_state4   bll_es;
++	enum bl_cache_state		bll_cache_state;
++} pnfs_blocklayout_layout_t;
++
++typedef struct pnfs_blocklayout_devinfo {
++        struct list_head                bld_list;
++        pnfs_block_volume_type4         bld_type;
++        struct nfsd4_pnfs_deviceid      bld_devid;
++        int                             bld_index_loc;
++        union {
++                struct {
++                        u64             bld_offset;
++                        u32             bld_sig_len,
++                                        *bld_sig;
++                } simple;
++                struct {
++                        u64             bld_start,
++                                        bld_len;
++                        u32             bld_index;      /* Index of Simple Volume */
++                } slice;
++                struct {
++                        u32             bld_stripes;
++                        u64             bld_chunk_size;
++                        u32             *bld_stripe_indexs;
++                } stripe;
++        } u;
++} pnfs_blocklayout_devinfo_t;
++
++#endif /* NFSD_NFS4LAYOUTXDR_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h.orig	2010-09-30 10:17:09.157010000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfs4pnfsdlm.h	2010-09-30 10:17:09.159008000 -0400
+@@ -0,0 +1,54 @@
++/******************************************************************************
++ *
++ * (c) 2007 Network Appliance, Inc.  All Rights Reserved.
++ * (c) 2009 NetApp.  All Rights Reserved.
++ *
++ * NetApp provides this source code under the GPL v2 License.
++ * The GPL v2 license is available at
++ * http://opensource.org/licenses/gpl-license.php.
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
++ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ ******************************************************************************/
++#include <linux/genhd.h>
++
++/*
++ * Length of comma separated pnfs data server IPv4 addresses. Enough room for
++ * 32 addresses.
++ */
++#define NFSD_DLM_DS_LIST_MAX   512
++/*
++ * Length of colon separated pnfs dlm device of the form
++ * disk_name:comma separated data server IPv4 address
++ */
++#define NFSD_PNFS_DLM_DEVICE_MAX (NFSD_DLM_DS_LIST_MAX + DISK_NAME_LEN + 1)
++
++#ifdef CONFIG_PNFSD
++
++/* For use by DLM cluster file systems exported by pNFSD */
++extern const struct pnfs_export_operations pnfs_dlm_export_ops;
++
++int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);
++
++void nfsd4_pnfs_dlm_shutdown(void);
++
++ssize_t nfsd4_get_pnfs_dlm_device_list(char *buf, ssize_t buflen);
++
++#else /* CONFIG_PNFSD */
++
++static inline void nfsd4_pnfs_dlm_shutdown(void)
++{
++	return;
++}
++
++#endif /* CONFIG_PNFSD */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h
+--- linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h.orig	2010-09-30 10:17:09.162007000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/nfsd4_pnfs.h	2010-09-30 10:17:09.163012000 -0400
+@@ -0,0 +1,271 @@
++/*
++ *  Copyright (c) 2006 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  Andy Adamson <andros@umich.edu>
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ */
++
++#ifndef _LINUX_NFSD_NFSD4_PNFS_H
++#define _LINUX_NFSD_NFSD4_PNFS_H
++
++#include <linux/exportfs.h>
++#include <linux/exp_xdr.h>
++#include <linux/nfs_xdr.h>
++
++struct nfsd4_pnfs_deviceid {
++	u64	sbid;			/* per-superblock unique ID */
++	u64	devid;			/* filesystem-wide unique device ID */
++};
++
++struct nfsd4_pnfs_dev_iter_res {
++	u64		gd_cookie;	/* request/repsonse */
++	u64		gd_verf;	/* request/repsonse */
++	u64		gd_devid;	/* response */
++	u32		gd_eof;		/* response */
++};
++
++/* Arguments for set_device_notify */
++struct pnfs_devnotify_arg {
++	struct nfsd4_pnfs_deviceid dn_devid;	/* request */
++	u32 dn_layout_type;			/* request */
++	u32 dn_notify_types;			/* request/response */
++};
++
++struct nfsd4_layout_seg {
++	u64	clientid;
++	u32	layout_type;
++	u32	iomode;
++	u64	offset;
++	u64	length;
++};
++
++/* Used by layout_get to encode layout (loc_body var in spec)
++ * Args:
++ * minlength - min number of accessible bytes given by layout
++ * fsid - Major part of struct pnfs_deviceid.  File system uses this
++ * to build the deviceid returned in the layout.
++ * fh - fs can modify the file handle for use on data servers
++ * seg - layout info requested and layout info returned
++ * xdr - xdr info
++ * return_on_close - true if layout to be returned on file close
++ */
++
++struct nfsd4_pnfs_layoutget_arg {
++	u64			lg_minlength;
++	u64			lg_sbid;
++	const struct knfsd_fh	*lg_fh;
++};
++
++struct nfsd4_pnfs_layoutget_res {
++	struct nfsd4_layout_seg	lg_seg;	/* request/resopnse */
++	u32			lg_return_on_close;
++};
++
++struct nfsd4_pnfs_layoutcommit_arg {
++	struct nfsd4_layout_seg	lc_seg;		/* request */
++	u32			lc_reclaim;	/* request */
++	u32			lc_newoffset;	/* request */
++	u64			lc_last_wr;	/* request */
++	struct nfstime4		lc_mtime;	/* request */
++	u32			lc_up_len;	/* layout length */
++	void			*lc_up_layout;	/* decoded by callback */
++};
++
++struct nfsd4_pnfs_layoutcommit_res {
++	u32			lc_size_chg;	/* boolean for response */
++	u64			lc_newsize;	/* response */
++};
++
++#define PNFS_LAST_LAYOUT_NO_RECALLS ((void *)-1) /* used with lr_cookie below */
++
++struct nfsd4_pnfs_layoutreturn_arg {
++	u32			lr_return_type;	/* request */
++	struct nfsd4_layout_seg	lr_seg;		/* request */
++	u32			lr_reclaim;	/* request */
++	u32			lrf_body_len;	/* request */
++	void			*lrf_body;	/* request */
++	void			*lr_cookie;	/* fs private */
++};
++
++/* pNFS Metadata to Data server state communication */
++struct pnfs_get_state {
++	u32			dsid;    /* request */
++	u64			ino;      /* request */
++	nfs4_stateid		stid;     /* request;response */
++	nfs4_clientid		clid;     /* response */
++	u32			access;    /* response */
++	u32			stid_gen;    /* response */
++	u32			verifier[2]; /* response */
++};
++
++/*
++ * pNFS export operations vector.
++ *
++ * The filesystem must implement the following methods:
++ *   layout_type
++ *   get_device_info
++ *   layout_get
++ *
++ * All other methods are optional and can be set to NULL if not implemented.
++ */
++struct pnfs_export_operations {
++	/* Returns the supported pnfs_layouttype4. */
++	int (*layout_type) (struct super_block *);
++
++	/* Encode device info onto the xdr stream. */
++	int (*get_device_info) (struct super_block *,
++				struct exp_xdr_stream *,
++				u32 layout_type,
++				const struct nfsd4_pnfs_deviceid *);
++
++	/* Retrieve all available devices via an iterator.
++	 * arg->cookie == 0 indicates the beginning of the list,
++	 * otherwise arg->verf is used to verify that the list hasn't changed
++	 * while retrieved.
++	 *
++	 * On output, the filesystem sets the devid based on the current cookie
++	 * and sets res->cookie and res->verf corresponding to the next entry.
++	 * When the last entry in the list is retrieved, res->eof is set to 1.
++	 */
++	int (*get_device_iter) (struct super_block *,
++				u32 layout_type,
++				struct nfsd4_pnfs_dev_iter_res *);
++
++	int (*set_device_notify) (struct super_block *,
++				  struct pnfs_devnotify_arg *);
++
++	/* Retrieve and encode a layout for inode onto the xdr stream.
++	 * arg->minlength is the minimum number of accessible bytes required
++	 *   by the client.
++	 * The maximum number of bytes to encode the layout is given by
++	 *   the xdr stream end pointer.
++	 * arg->fsid contains the major part of struct pnfs_deviceid.
++	 *   The file system uses this to build the deviceid returned
++	 *   in the layout.
++	 * res->seg - layout segment requested and layout info returned.
++	 * res->fh can be modified the file handle for use on data servers
++	 * res->return_on_close - true if layout to be returned on file close
++	 *
++	 * return one of the following nfs errors:
++	 * NFS_OK			Success
++	 * NFS4ERR_ACCESS		Permission error
++	 * NFS4ERR_BADIOMODE		Server does not support requested iomode
++	 * NFS4ERR_BADLAYOUT		No layout matching loga_minlength rules
++	 * NFS4ERR_INVAL		Parameter other than layout is invalid
++	 * NFS4ERR_IO			I/O error
++	 * NFS4ERR_LAYOUTTRYLATER	Layout may be retrieved later
++	 * NFS4ERR_LAYOUTUNAVAILABLE	Layout unavailable for this file
++	 * NFS4ERR_LOCKED		Lock conflict
++	 * NFS4ERR_NOSPC		Out-of-space error occured
++	 * NFS4ERR_RECALLCONFLICT	Layout currently unavialable due to
++	 *				a conflicting CB_LAYOUTRECALL
++	 * NFS4ERR_SERVERFAULT		Server went bezerk
++	 * NFS4ERR_TOOSMALL		loga_maxcount too small to fit layout
++	 * NFS4ERR_WRONG_TYPE		Wrong file type (not a regular file)
++	 */
++	enum nfsstat4 (*layout_get) (struct inode *,
++				     struct exp_xdr_stream *xdr,
++				     const struct nfsd4_pnfs_layoutget_arg *,
++				     struct nfsd4_pnfs_layoutget_res *);
++
++	/* Commit changes to layout */
++	int (*layout_commit) (struct inode *,
++			      const struct nfsd4_pnfs_layoutcommit_arg *,
++			      struct nfsd4_pnfs_layoutcommit_res *);
++
++	/* Returns the layout */
++	int (*layout_return) (struct inode *,
++			      const struct nfsd4_pnfs_layoutreturn_arg *);
++
++	/* Can layout segments be merged for this layout type? */
++	int (*can_merge_layouts) (u32 layout_type);
++
++	/* pNFS Files layout specific operations */
++
++	/* Get the write verifier for DS (called on MDS only) */
++	void (*get_verifier) (struct super_block *, u32 *p);
++	/* Call fs on DS only */
++	int (*get_state) (struct inode *, struct knfsd_fh *,
++			  struct pnfs_get_state *);
++};
++
++struct nfsd4_pnfs_cb_layout {
++	u32			cbl_recall_type;	/* request */
++	struct nfsd4_layout_seg cbl_seg;		/* request */
++	u32			cbl_layoutchanged;	/* request */
++	nfs4_stateid		cbl_sid;		/* request */
++	struct nfs4_fsid	cbl_fsid;
++	void			*cbl_cookie;		/* fs private */
++};
++
++/* layoutrecall request (from exported filesystem) */
++struct nfs4_layoutrecall {
++	struct kref			clr_ref;
++	struct nfsd4_pnfs_cb_layout	cb;	/* request */
++	struct list_head		clr_perclnt; /* on cl_layoutrecalls */
++	struct nfs4_client	       *clr_client;
++	struct nfs4_file	       *clr_file;
++	struct timespec			clr_time;	/* last activity */
++	struct super_block 		*clr_sb; /* We might not have a file */
++	struct nfs4_layoutrecall	*parent; /* The initiating recall */
++
++	void				*clr_args;	/* nfsd internal */
++};
++
++struct nfsd4_pnfs_cb_dev_item {
++	u32			cbd_notify_type;	/* request */
++	u32			cbd_layout_type;	/* request */
++	struct nfsd4_pnfs_deviceid cbd_devid;		/* request */
++	u32			cbd_immediate;		/* request */
++};
++
++struct nfsd4_pnfs_cb_dev_list {
++	u32				cbd_len;  /* request */
++	struct nfsd4_pnfs_cb_dev_item  *cbd_list; /* request */
++};
++
++/*
++ * callbacks provided by the nfsd
++ */
++struct pnfsd_cb_operations {
++	/* Generic callbacks */
++	int (*cb_layout_recall) (struct super_block *, struct inode *,
++				 struct nfsd4_pnfs_cb_layout *);
++	int (*cb_device_notify) (struct super_block *,
++				 struct nfsd4_pnfs_cb_dev_list *);
++
++	/* pNFS Files layout specific callbacks */
++
++	/* Callback from fs on MDS only */
++	int (*cb_get_state) (struct super_block *, struct pnfs_get_state *);
++	/* Callback from fs on DS only */
++	int (*cb_change_state) (struct pnfs_get_state *);
++};
++
++#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
+diff -up linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig linux-2.6.34.noarch/include/linux/nfsd/syscall.h
+--- linux-2.6.34.noarch/include/linux/nfsd/syscall.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfsd/syscall.h	2010-09-30 10:17:09.168010000 -0400
+@@ -29,6 +29,7 @@
+ /*#define NFSCTL_GETFH		6	/ * get an fh by ino DISCARDED */
+ #define NFSCTL_GETFD		7	/* get an fh by path (used by mountd) */
+ #define	NFSCTL_GETFS		8	/* get an fh by path with max FH len */
++#define	NFSCTL_FD2FH		9	/* get a fh from a fd */
+ 
+ /* SVC */
+ struct nfsctl_svc {
+@@ -71,6 +72,11 @@ struct nfsctl_fsparm {
+ 	int			gd_maxlen;
+ };
+ 
++/* FD2FH */
++struct nfsctl_fd2fh {
++	int			fd;
++};
++
+ /*
+  * This is the argument union.
+  */
+@@ -82,6 +88,7 @@ struct nfsctl_arg {
+ 		struct nfsctl_export	u_export;
+ 		struct nfsctl_fdparm	u_getfd;
+ 		struct nfsctl_fsparm	u_getfs;
++		struct nfsctl_fd2fh	u_fd2fh;
+ 		/*
+ 		 * The following dummy member is needed to preserve binary compatibility
+ 		 * on platforms where alignof(void*)>alignof(int).  It's needed because
+@@ -95,6 +102,7 @@ struct nfsctl_arg {
+ #define ca_export	u.u_export
+ #define ca_getfd	u.u_getfd
+ #define	ca_getfs	u.u_getfs
++#define	ca_fd2fh	u.u_fd2fh
+ };
+ 
+ union nfsctl_res {
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs.h.orig linux-2.6.34.noarch/include/linux/nfs_fs.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs.h.orig	2010-09-30 10:15:17.949718000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs.h	2010-09-30 10:17:09.071005000 -0400
+@@ -72,13 +72,20 @@ struct nfs_access_entry {
+ 	int			mask;
+ };
+ 
++struct nfs_lock_context {
++	atomic_t count;
++	struct list_head list;
++	struct nfs_open_context *open_context;
++	fl_owner_t lockowner;
++	pid_t pid;
++};
++
+ struct nfs4_state;
+ struct nfs_open_context {
+-	atomic_t count;
++	struct nfs_lock_context lock_context;
+ 	struct path path;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+-	fl_owner_t lockowner;
+ 	fmode_t mode;
+ 
+ 	unsigned long flags;
+@@ -97,6 +104,27 @@ struct nfs_delegation;
+ 
+ struct posix_acl;
+ 
++struct pnfs_layout_hdr {
++	int			refcount;
++	struct list_head	layouts;   /* other client layouts */
++	struct list_head	segs;      /* layout segments list */
++	int			roc_iomode;/* return on close iomode, 0=none */
++	seqlock_t		seqlock;   /* Protects the stateid */
++	nfs4_stateid		stateid;
++	unsigned long		state;
++#define NFS_INO_RO_LAYOUT_FAILED 0         /* ro layoutget failed stop trying */
++#define NFS_INO_RW_LAYOUT_FAILED 1         /* rw layoutget failed stop trying */
++#define NFS_INO_LAYOUTCOMMIT     2         /* LAYOUTCOMMIT needed */
++
++	struct rpc_cred		*cred;     /* layoutcommit credential */
++	/* DH: These vars keep track of the maximum write range
++	 * so the values can be used for layoutcommit.
++	 */
++	loff_t			write_begin_pos;
++	loff_t			write_end_pos;
++	struct inode		*inode;
++};
++
+ /*
+  * nfs fs inode data in memory
+  */
+@@ -181,6 +209,13 @@ struct nfs_inode {
+ 	struct nfs_delegation	*delegation;
+ 	fmode_t			 delegation_state;
+ 	struct rw_semaphore	rwsem;
++
++	/* pNFS layout information */
++#if defined(CONFIG_NFS_V4_1)
++	wait_queue_head_t lo_waitq;
++	struct pnfs_layout_hdr *layout;
++	time_t pnfs_layout_suspend;
++#endif /* CONFIG_NFS_V4_1 */
+ #endif /* CONFIG_NFS_V4*/
+ #ifdef CONFIG_NFS_FSCACHE
+ 	struct fscache_cookie	*fscache;
+@@ -353,6 +388,8 @@ extern void nfs_setattr_update_inode(str
+ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
+ extern void put_nfs_open_context(struct nfs_open_context *ctx);
+ extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
++extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
++extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
+ extern u64 nfs_compat_user_ino64(u64 fileid);
+ extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ 
+@@ -481,8 +518,12 @@ extern void nfs_unblock_sillyrename(stru
+ extern int  nfs_congestion_kb;
+ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
+ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
+-extern int  nfs_flush_incompatible(struct file *file, struct page *page);
+-extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
++struct pnfs_layout_segment;
++extern int  nfs_flush_incompatible(struct file *file, struct page *page,
++				   struct pnfs_layout_segment *lseg);
++extern int  nfs_updatepage(struct file *, struct page *,
++			   unsigned int offset, unsigned int count,
++			   struct pnfs_layout_segment *lseg, void *fsdata);
+ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+ 
+ /*
+@@ -604,6 +645,8 @@ extern void * nfs_root_data(void);
+ #define NFSDBG_CLIENT		0x0200
+ #define NFSDBG_MOUNT		0x0400
+ #define NFSDBG_FSCACHE		0x0800
++#define NFSDBG_PNFS		0x1000
++#define NFSDBG_PNFS_LD		0x2000
+ #define NFSDBG_ALL		0xFFFF
+ 
+ #ifdef __KERNEL__
+diff -up linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig linux-2.6.34.noarch/include/linux/nfs_fs_sb.h
+--- linux-2.6.34.noarch/include/linux/nfs_fs_sb.h.orig	2010-09-30 10:15:17.959722000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_fs_sb.h	2010-09-30 10:17:09.083008000 -0400
+@@ -15,6 +15,7 @@ struct nlm_host;
+ struct nfs4_sequence_args;
+ struct nfs4_sequence_res;
+ struct nfs_server;
++struct nfs4_minor_version_ops;
+ 
+ /*
+  * The nfs_client identifies our client state to the server.
+@@ -70,11 +71,7 @@ struct nfs_client {
+ 	 */
+ 	char			cl_ipaddr[48];
+ 	unsigned char		cl_id_uniquifier;
+-	int		     (* cl_call_sync)(struct nfs_server *server,
+-					      struct rpc_message *msg,
+-					      struct nfs4_sequence_args *args,
+-					      struct nfs4_sequence_res *res,
+-					      int cache_reply);
++	const struct nfs4_minor_version_ops *cl_mvops;
+ #endif /* CONFIG_NFS_V4 */
+ 
+ #ifdef CONFIG_NFS_V4_1
+@@ -85,6 +82,8 @@ struct nfs_client {
+ 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
+ 	u32			cl_exchange_flags;
+ 	struct nfs4_session	*cl_session; 	/* sharred session */
++	struct list_head	cl_layouts;
++	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
+ #endif /* CONFIG_NFS_V4_1 */
+ 
+ #ifdef CONFIG_NFS_FSCACHE
+@@ -92,6 +91,16 @@ struct nfs_client {
+ #endif
+ };
+ 
++static inline bool
++is_ds_only_client(struct nfs_client *clp)
++{
++#ifdef CONFIG_NFS_V4_1
++	return is_ds_only_session(clp->cl_exchange_flags);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * NFS client parameters stored in the superblock.
+  */
+@@ -136,7 +145,7 @@ struct nfs_server {
+ #endif
+ 
+ #ifdef CONFIG_NFS_V4
+-	u32			attr_bitmask[2];/* V4 bitmask representing the set
++	u32			attr_bitmask[3];/* V4 bitmask representing the set
+ 						   of attributes supported on this
+ 						   filesystem */
+ 	u32			cache_consistency_bitmask[2];
+@@ -148,6 +157,15 @@ struct nfs_server {
+ 						   that are supported on this
+ 						   filesystem */
+ #endif
++
++#ifdef CONFIG_NFS_V4_1
++	u32				pnfs_blksize; /* layout_blksize attr */
++	struct pnfs_layoutdriver_type  *pnfs_curr_ld; /* Active layout driver */
++	void			       *pnfs_ld_data; /* Per-mount data */
++	unsigned int			ds_rsize;  /* Data server read size */
++	unsigned int			ds_wsize;  /* Data server write size */
++#endif /* CONFIG_NFS_V4_1 */
++
+ 	void (*destroy)(struct nfs_server *);
+ 
+ 	atomic_t active; /* Keep trace of any activity to this server */
+diff -up linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig linux-2.6.34.noarch/include/linux/nfs_iostat.h
+--- linux-2.6.34.noarch/include/linux/nfs_iostat.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_iostat.h	2010-09-30 10:17:09.110005000 -0400
+@@ -113,6 +113,9 @@ enum nfs_stat_eventcounters {
+ 	NFSIOS_SHORTREAD,
+ 	NFSIOS_SHORTWRITE,
+ 	NFSIOS_DELAY,
++	NFSIOS_PNFS_READ,
++	NFSIOS_PNFS_WRITE,
++	NFSIOS_PNFS_COMMIT,
+ 	__NFSIOS_COUNTSMAX,
+ };
+ 
+diff -up linux-2.6.34.noarch/include/linux/nfs_page.h.orig linux-2.6.34.noarch/include/linux/nfs_page.h
+--- linux-2.6.34.noarch/include/linux/nfs_page.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_page.h	2010-09-30 10:17:09.122008000 -0400
+@@ -39,6 +39,7 @@ struct nfs_page {
+ 	struct list_head	wb_list;	/* Defines state of page: */
+ 	struct page		*wb_page;	/* page to read in/write out */
+ 	struct nfs_open_context	*wb_context;	/* File state context info */
++	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
+ 	atomic_t		wb_complete;	/* i/os we're waiting for */
+ 	pgoff_t			wb_index;	/* Offset >> PAGE_CACHE_SHIFT */
+ 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+@@ -47,6 +48,7 @@ struct nfs_page {
+ 	struct kref		wb_kref;	/* reference count */
+ 	unsigned long		wb_flags;
+ 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
++	struct pnfs_layout_segment *wb_lseg;	/* Pnfs layout info */
+ };
+ 
+ struct nfs_pageio_descriptor {
+@@ -60,6 +62,12 @@ struct nfs_pageio_descriptor {
+ 	int			(*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
+ 	int 			pg_ioflags;
+ 	int			pg_error;
++	struct pnfs_layout_segment *pg_lseg;
++#ifdef CONFIG_NFS_V4_1
++	int			pg_iswrite;
++	int			pg_boundary;
++	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
++#endif /* CONFIG_NFS_V4_1 */
+ };
+ 
+ #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
+@@ -68,13 +76,15 @@ extern	struct nfs_page *nfs_create_reque
+ 					    struct inode *inode,
+ 					    struct page *page,
+ 					    unsigned int offset,
+-					    unsigned int count);
++					    unsigned int count,
++					    struct pnfs_layout_segment *lseg);
+ extern	void nfs_clear_request(struct nfs_page *req);
+ extern	void nfs_release_request(struct nfs_page *req);
+ 
+ 
+ extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
+-			  pgoff_t idx_start, unsigned int npages, int tag);
++			  pgoff_t idx_start, unsigned int npages, int tag,
++			  int *use_pnfs);
+ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ 			     struct inode *inode,
+ 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+diff -up linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig linux-2.6.34.noarch/include/linux/nfs_xdr.h
+--- linux-2.6.34.noarch/include/linux/nfs_xdr.h.orig	2010-09-30 10:15:17.965727000 -0400
++++ linux-2.6.34.noarch/include/linux/nfs_xdr.h	2010-09-30 10:17:09.134006000 -0400
+@@ -3,6 +3,8 @@
+ 
+ #include <linux/nfsacl.h>
+ #include <linux/nfs3.h>
++#include <linux/nfs4.h>
++#include <linux/sunrpc/sched.h>
+ 
+ /*
+  * To change the maximum rsize and wsize supported by the NFS client, adjust
+@@ -10,7 +12,7 @@
+  * support a megabyte or more.  The default is left at 4096 bytes, which is
+  * reasonable for NFS over UDP.
+  */
+-#define NFS_MAX_FILE_IO_SIZE	(1048576U)
++#define NFS_MAX_FILE_IO_SIZE	(4U * 1048576U)
+ #define NFS_DEF_FILE_IO_SIZE	(4096U)
+ #define NFS_MIN_FILE_IO_SIZE	(1024U)
+ 
+@@ -113,6 +115,10 @@ struct nfs_fsinfo {
+ 	__u32			dtpref;	/* pref. readdir transfer size */
+ 	__u64			maxfilesize;
+ 	__u32			lease_time; /* in seconds */
++#if defined(CONFIG_NFS_V4_1)
++	__u32			layouttype; /* supported pnfs layout driver */
++	__u32			blksize; /* preferred pnfs io block size */
++#endif
+ };
+ 
+ struct nfs_fsstat {
+@@ -185,6 +191,125 @@ struct nfs4_get_lease_time_res {
+ 	struct nfs4_sequence_res	lr_seq_res;
+ };
+ 
++#define PNFS_LAYOUT_MAXSIZE 4096
++#define NFS4_PNFS_DEVICEID4_SIZE 16
++
++struct pnfs_deviceid {
++	char data[NFS4_PNFS_DEVICEID4_SIZE];
++};
++
++struct nfs4_layoutdriver_data {
++	__u32 len;
++	void *buf;
++};
++
++struct pnfs_layout_range {
++	u32 iomode;
++	u64 offset;
++	u64 length;
++};
++
++struct nfs4_layoutget_args {
++	__u32 type;
++	struct pnfs_layout_range range;
++	__u64 minlength;
++	__u32 maxcount;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutget_res {
++	__u32 return_on_close;
++	struct pnfs_layout_range range;
++	__u32 type;
++	nfs4_stateid stateid;
++	struct nfs4_layoutdriver_data layout;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_layoutget {
++	struct nfs4_layoutget_args args;
++	struct nfs4_layoutget_res res;
++	struct pnfs_layout_segment **lsegpp;
++	int status;
++};
++
++struct nfs4_layoutcommit_args {
++	nfs4_stateid stateid;
++	__u64 lastbytewritten;
++	__u32 time_modify_changed;
++	struct timespec time_modify;
++	const u32 *bitmask;
++	struct nfs_fh *fh;
++	struct inode *inode;
++
++	/* Values set by layout driver */
++	struct pnfs_layout_range range;
++	__u32 layout_type;
++	void *layoutdriver_data;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutcommit_res {
++	__u32 sizechanged;
++	__u64 newsize;
++	struct nfs_fattr *fattr;
++	const struct nfs_server *server;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_layoutcommit_data {
++	struct rpc_task task;
++	struct rpc_cred *cred;
++	struct nfs_fattr fattr;
++	struct nfs4_layoutcommit_args args;
++	struct nfs4_layoutcommit_res res;
++	int status;
++};
++
++struct nfs4_layoutreturn_args {
++	__u32   reclaim;
++	__u32   layout_type;
++	__u32   return_type;
++	struct pnfs_layout_range range;
++	struct inode *inode;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_layoutreturn_res {
++	struct nfs4_sequence_res seq_res;
++	u32 lrs_present;
++	nfs4_stateid stateid;
++};
++
++struct nfs4_layoutreturn {
++	struct nfs4_layoutreturn_args args;
++	struct nfs4_layoutreturn_res res;
++	struct rpc_cred *cred;
++	int rpc_status;
++};
++
++struct nfs4_getdevicelist_args {
++	const struct nfs_fh *fh;
++	u32 layoutclass;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_getdevicelist_res {
++	struct pnfs_devicelist *devlist;
++	struct nfs4_sequence_res seq_res;
++};
++
++struct nfs4_getdeviceinfo_args {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_args seq_args;
++};
++
++struct nfs4_getdeviceinfo_res {
++	struct pnfs_device *pdev;
++	struct nfs4_sequence_res seq_res;
++};
++
+ /*
+  * Arguments to the open call.
+  */
+@@ -196,8 +321,10 @@ struct nfs_openargs {
+ 	__u64                   clientid;
+ 	__u64                   id;
+ 	union {
+-		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
+-		nfs4_verifier   verifier; /* EXCLUSIVE */
++		struct {
++			struct iattr *  attrs;    /* UNCHECKED, GUARDED */
++			nfs4_verifier   verifier; /* EXCLUSIVE */
++		};
+ 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
+ 		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
+ 	} u;
+@@ -313,6 +440,10 @@ struct nfs_lockt_res {
+ 	struct nfs4_sequence_res	seq_res;
+ };
+ 
++struct nfs_release_lockowner_args {
++	struct nfs_lowner	lock_owner;
++};
++
+ struct nfs4_delegreturnargs {
+ 	const struct nfs_fh *fhandle;
+ 	const nfs4_stateid *stateid;
+@@ -332,6 +463,7 @@ struct nfs4_delegreturnres {
+ struct nfs_readargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	unsigned int		pgbase;
+@@ -352,6 +484,7 @@ struct nfs_readres {
+ struct nfs_writeargs {
+ 	struct nfs_fh *		fh;
+ 	struct nfs_open_context *context;
++	struct nfs_lock_context *lock_context;
+ 	__u64			offset;
+ 	__u32			count;
+ 	enum nfs3_stable_how	stable;
+@@ -846,7 +979,7 @@ struct nfs4_server_caps_arg {
+ };
+ 
+ struct nfs4_server_caps_res {
+-	u32				attr_bitmask[2];
++	u32				attr_bitmask[3];
+ 	u32				acl_bitmask;
+ 	u32				has_links;
+ 	u32				has_symlinks;
+@@ -961,6 +1094,27 @@ struct nfs_page;
+ 
+ #define NFS_PAGEVEC_SIZE	(8U)
+ 
++#if defined(CONFIG_NFS_V4_1)
++/* pnfsflag values */
++#define PNFS_NO_RPC		0x0001   /* non rpc result callback switch */
++
++/* pnfs-specific data needed for read, write, and commit calls */
++struct pnfs_call_data {
++	struct pnfs_layout_segment *lseg;
++	const struct rpc_call_ops *call_ops;
++	u32			orig_count;	/* for retry via MDS */
++	int			pnfs_error;
++	u8			pnfsflags;
++	u8			how;		/* for FLUSH_STABLE */
++};
++
++/* files layout-type specific data for read, write, and commit */
++struct pnfs_fl_call_data {
++	struct nfs_client	*ds_nfs_client;
++	__u64			orig_offset;
++};
++#endif /* CONFIG_NFS_V4_1 */
++
+ struct nfs_read_data {
+ 	int			flags;
+ 	struct rpc_task		task;
+@@ -976,10 +1130,16 @@ struct nfs_read_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+ struct nfs_write_data {
++	struct kref		refcount;	/* For pnfs commit splitting */
++	struct nfs_write_data	*parent;	/* For pnfs commit splitting */
+ 	int			flags;
+ 	struct rpc_task		task;
+ 	struct inode		*inode;
+@@ -995,6 +1155,10 @@ struct nfs_write_data {
+ #ifdef CONFIG_NFS_V4
+ 	unsigned long		timestamp;	/* For lease renewal */
+ #endif
++#if defined(CONFIG_NFS_V4_1)
++	struct pnfs_call_data	pdata;
++	struct pnfs_fl_call_data fldata;
++#endif /* CONFIG_NFS_V4_1 */
+ 	struct page		*page_array[NFS_PAGEVEC_SIZE];
+ };
+ 
+@@ -1008,6 +1172,7 @@ struct nfs_rpc_ops {
+ 	const struct dentry_operations *dentry_ops;
+ 	const struct inode_operations *dir_inode_ops;
+ 	const struct inode_operations *file_inode_ops;
++	const struct file_operations *file_ops;
+ 
+ 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
+ 			    struct nfs_fsinfo *);
+@@ -1072,6 +1237,7 @@ struct nfs_rpc_ops {
+ extern const struct nfs_rpc_ops	nfs_v2_clientops;
+ extern const struct nfs_rpc_ops	nfs_v3_clientops;
+ extern const struct nfs_rpc_ops	nfs_v4_clientops;
++extern const struct nfs_rpc_ops	pnfs_v4_clientops;
+ extern struct rpc_version	nfs_version2;
+ extern struct rpc_version	nfs_version3;
+ extern struct rpc_version	nfs_version4;
+diff -up linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig linux-2.6.34.noarch/include/linux/panfs_shim_api.h
+--- linux-2.6.34.noarch/include/linux/panfs_shim_api.h.orig	2010-09-30 10:17:09.202009000 -0400
++++ linux-2.6.34.noarch/include/linux/panfs_shim_api.h	2010-09-30 10:17:09.204008000 -0400
+@@ -0,0 +1,57 @@
++#ifndef _PANFS_SHIM_API_H
++#define _PANFS_SHIM_API_H
++
++/*
++ * imported panfs functions
++ */
++struct panfs_export_operations {
++	int (*convert_rc)(pan_status_t rc);
++
++	int (*sm_sec_t_get_size_otw)(
++		pan_sm_sec_otw_t *var,
++		pan_size_t *core_sizep,
++		pan_size_t *wire_size,
++		void *buf_end);
++
++	int (*sm_sec_t_unmarshall)(
++		pan_sm_sec_otw_t *in,
++		pan_sm_sec_t *out,
++		void *buf,
++		pan_size_t size,
++		pan_size_t *otw_consumed,
++		pan_size_t *in_core_consumed);
++
++	int (*ucreds_get)(void **ucreds_pp);
++
++	void (*ucreds_put)(void *ucreds);
++
++	int (*sam_read)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_read_args_t    *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_read_cb_t       closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_read_res_t     *res_p);
++
++	int (*sam_write)(
++		pan_sam_access_flags_t  flags,
++		pan_sam_write_args_t   *args_p,
++		pan_sam_obj_sec_t      *obj_sec_p,
++		pan_sg_entry_t         *data_p,
++		void                   *ucreds,
++		pan_sam_write_cb_t      closure,
++		void                   *user_arg1,
++		void                   *user_arg2,
++		pan_sam_write_res_t    *res_p);
++};
++
++extern int
++panfs_shim_register(struct panfs_export_operations *ops);
++
++extern int
++panfs_shim_unregister(void);
++
++#endif /* _PANFS_SHIM_API_H */
+diff -up linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h
+--- linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h.orig	2010-09-30 10:17:09.214010000 -0400
++++ linux-2.6.34.noarch/include/linux/pnfs_osd_xdr.h	2010-09-30 10:17:09.215014000 -0400
+@@ -0,0 +1,439 @@
++/*
++ *  pnfs_osd_xdr.h
++ *
++ *  pNFS-osd on-the-wire data structures
++ *
++ *  Copyright (C) 2007-2009 Panasas Inc.
++ *  All rights reserved.
++ *
++ *  Benny Halevy <bhalevy@panasas.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License version 2
++ *  See the file COPYING included with this distribution for more details.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the Panasas company nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++#ifndef __PNFS_OSD_XDR_H__
++#define __PNFS_OSD_XDR_H__
++
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/exp_xdr.h>
++#include <scsi/osd_protocol.h>
++
++#define PNFS_OSD_OSDNAME_MAXSIZE 256
++
++/*
++ * START OF "GENERIC" DECODE ROUTINES.
++ *   These may look a little ugly since they are imported from a "generic"
++ * set of XDR encode/decode routines which are intended to be shared by
++ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
++ *
++ * If the pain of reading these is too great, it should be a straightforward
++ * task to translate them into Linux-specific versions which are more
++ * consistent with the style used in NFSv2/v3...
++ */
++#define READ32(x)         (x) = ntohl(*p++)
++#define READ64(x)         do {			\
++	(x) = (u64)ntohl(*p++) << 32;		\
++	(x) |= ntohl(*p++);			\
++} while (0)
++#define COPYMEM(x, nbytes) do {			\
++	memcpy((x), p, nbytes);			\
++	p += XDR_QUADLEN(nbytes);		\
++} while (0)
++
++/*
++ * draft-ietf-nfsv4-minorversion-22
++ * draft-ietf-nfsv4-pnfs-obj-12
++ */
++
++/* Layout Structure */
++
++enum pnfs_osd_raid_algorithm4 {
++	PNFS_OSD_RAID_0		= 1,
++	PNFS_OSD_RAID_4		= 2,
++	PNFS_OSD_RAID_5		= 3,
++	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
++};
++
++/*   struct pnfs_osd_data_map4 {
++ *       uint32_t                    odm_num_comps;
++ *       length4                     odm_stripe_unit;
++ *       uint32_t                    odm_group_width;
++ *       uint32_t                    odm_group_depth;
++ *       uint32_t                    odm_mirror_cnt;
++ *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
++ *   };
++ */
++struct pnfs_osd_data_map {
++	u32	odm_num_comps;
++	u64	odm_stripe_unit;
++	u32	odm_group_width;
++	u32	odm_group_depth;
++	u32	odm_mirror_cnt;
++	u32	odm_raid_algorithm;
++};
++
++static inline int
++pnfs_osd_data_map_xdr_sz(void)
++{
++	return 1 + 2 + 1 + 1 + 1 + 1;
++}
++
++static inline size_t
++pnfs_osd_data_map_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_data_map);
++}
++
++/*   struct pnfs_osd_objid4 {
++ *       deviceid4       oid_device_id;
++ *       uint64_t        oid_partition_id;
++ *       uint64_t        oid_object_id;
++ *   };
++ */
++struct pnfs_osd_objid {
++	struct pnfs_deviceid	oid_device_id;
++	u64			oid_partition_id;
++	u64			oid_object_id;
++};
++
++/* For printout. I use "dev(%llx:%llx)", _DEVID_LO(), _DEVID_HI BE style */
++#define _DEVID_LO(oid_device_id) \
++	(unsigned long long)be64_to_cpup((__be64 *)oid_device_id.data)
++
++#define _DEVID_HI(oid_device_id) \
++	(unsigned long long)be64_to_cpup(((__be64 *)oid_device_id.data) + 1)
++
++static inline int
++pnfs_osd_objid_xdr_sz(void)
++{
++	return (NFS4_PNFS_DEVICEID4_SIZE / 4) + 2 + 2;
++}
++
++static inline size_t
++pnfs_osd_objid_incore_sz(void)
++{
++	return sizeof(struct pnfs_osd_objid);
++}
++
++enum pnfs_osd_version {
++	PNFS_OSD_MISSING              = 0,
++	PNFS_OSD_VERSION_1            = 1,
++	PNFS_OSD_VERSION_2            = 2
++};
++
++struct pnfs_osd_opaque_cred {
++	u32 cred_len;
++	u8 *cred;
++};
++
++static inline int
++pnfs_osd_opaque_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	READ32(n);
++	p += XDR_QUADLEN(n);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_opaque_cred_incore_sz(u32 *p)
++{
++	u32 n;
++
++	READ32(n);
++	return XDR_QUADLEN(n) * 4;
++}
++
++enum pnfs_osd_cap_key_sec {
++	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
++	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
++};
++
++/*   struct pnfs_osd_object_cred4 {
++ *       pnfs_osd_objid4         oc_object_id;
++ *       pnfs_osd_version4       oc_osd_version;
++ *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
++ *       opaque                  oc_capability_key<>;
++ *       opaque                  oc_capability<>;
++ *   };
++ */
++struct pnfs_osd_object_cred {
++	struct pnfs_osd_objid		oc_object_id;
++	u32				oc_osd_version;
++	u32				oc_cap_key_sec;
++	struct pnfs_osd_opaque_cred	oc_cap_key;
++	struct pnfs_osd_opaque_cred	oc_cap;
++};
++
++static inline int
++pnfs_osd_object_cred_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_object_cred_incore_sz(u32 *p)
++{
++	size_t sz = sizeof(struct pnfs_osd_object_cred);
++
++	p += pnfs_osd_objid_xdr_sz() + 2;
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	p += pnfs_osd_opaque_cred_xdr_sz(p);
++	sz += pnfs_osd_opaque_cred_incore_sz(p);
++	return sz;
++}
++
++/*   struct pnfs_osd_layout4 {
++ *       pnfs_osd_data_map4      olo_map;
++ *       uint32_t                olo_comps_index;
++ *       pnfs_osd_object_cred4   olo_components<>;
++ *   };
++ */
++struct pnfs_osd_layout {
++	struct pnfs_osd_data_map	olo_map;
++	u32				olo_comps_index;
++	u32				olo_num_comps;
++	struct pnfs_osd_object_cred	*olo_comps;
++};
++
++static inline int
++pnfs_osd_layout_xdr_sz(u32 *p)
++{
++	u32 *start = p;
++	u32 n;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	while ((int)(n--) > 0)
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	return p - start;
++}
++
++static inline size_t
++pnfs_osd_layout_incore_sz(u32 *p)
++{
++	u32 n;
++	size_t sz;
++
++	p += pnfs_osd_data_map_xdr_sz() + 1;
++	READ32(n);
++	sz = sizeof(struct pnfs_osd_layout);
++	while ((int)(n--) > 0) {
++		sz += pnfs_osd_object_cred_incore_sz(p);
++		p += pnfs_osd_object_cred_xdr_sz(p);
++	}
++	return sz;
++}
++
++/* Device Address */
++
++enum pnfs_osd_targetid_type {
++	OBJ_TARGET_ANON = 1,
++	OBJ_TARGET_SCSI_NAME = 2,
++	OBJ_TARGET_SCSI_DEVICE_ID = 3,
++};
++
++/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
++ *       case OBJ_TARGET_SCSI_NAME:
++ *           string              oti_scsi_name<>;
++ *
++ *       case OBJ_TARGET_SCSI_DEVICE_ID:
++ *           opaque              oti_scsi_device_id<>;
++ *
++ *       default:
++ *           void;
++ *   };
++ *
++ *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
++ *       case TRUE:
++ *           netaddr4            ota_netaddr;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_deviceaddr4 {
++ *       pnfs_osd_targetid4      oda_targetid;
++ *       pnfs_osd_targetaddr4    oda_targetaddr;
++ *       uint64_t                oda_lun;
++ *       opaque                  oda_systemid<>;
++ *       pnfs_osd_object_cred4   oda_root_obj_cred;
++ *       opaque                  oda_osdname<>;
++ *   };
++ */
++struct pnfs_osd_targetid {
++	u32				oti_type;
++	struct nfs4_string		oti_scsi_device_id;
++};
++
++enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
++
++/*   struct netaddr4 {
++ *       // see struct rpcb in RFC1833
++ *       string r_netid<>;    // network id
++ *       string r_addr<>;     // universal address
++ *   };
++ */
++struct pnfs_osd_net_addr {
++	struct nfs4_string	r_netid;
++	struct nfs4_string	r_addr;
++};
++
++struct pnfs_osd_targetaddr {
++	u32				ota_available;
++	struct pnfs_osd_net_addr	ota_netaddr;
++};
++
++enum {
++	NETWORK_ID_MAX = 16 / 4,
++	UNIVERSAL_ADDRESS_MAX = 64 / 4,
++	PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
++};
++
++struct pnfs_osd_deviceaddr {
++	struct pnfs_osd_targetid	oda_targetid;
++	struct pnfs_osd_targetaddr	oda_targetaddr;
++	u8				oda_lun[8];
++	struct nfs4_string		oda_systemid;
++	struct pnfs_osd_object_cred	oda_root_obj_cred;
++	struct nfs4_string		oda_osdname;
++};
++
++enum {
++	ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
++	PNFS_OSD_DEVICEADDR_MAX =
++		PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
++		2 /*oda_lun*/ +
++		1 + OSD_SYSTEMID_LEN +
++		1 + ODA_OSDNAME_MAX,
++};
++
++/* LAYOUTCOMMIT: layoutupdate */
++
++/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
++ *       case TRUE:
++ *           int64_t     dsu_delta;
++ *       case FALSE:
++ *           void;
++ *   };
++ *
++ *   struct pnfs_osd_layoutupdate4 {
++ *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
++ *       bool                        olu_ioerr_flag;
++ *   };
++ */
++struct pnfs_osd_layoutupdate {
++	u32	dsu_valid;
++	s64	dsu_delta;
++	u32	olu_ioerr_flag;
++};
++
++/* LAYOUTRETURN: I/O Rrror Report */
++
++enum pnfs_osd_errno {
++	PNFS_OSD_ERR_EIO		= 1,
++	PNFS_OSD_ERR_NOT_FOUND		= 2,
++	PNFS_OSD_ERR_NO_SPACE		= 3,
++	PNFS_OSD_ERR_BAD_CRED		= 4,
++	PNFS_OSD_ERR_NO_ACCESS		= 5,
++	PNFS_OSD_ERR_UNREACHABLE	= 6,
++	PNFS_OSD_ERR_RESOURCE		= 7
++};
++
++/*   struct pnfs_osd_ioerr4 {
++ *       pnfs_osd_objid4     oer_component;
++ *       length4             oer_comp_offset;
++ *       length4             oer_comp_length;
++ *       bool                oer_iswrite;
++ *       pnfs_osd_errno4     oer_errno;
++ *   };
++ */
++struct pnfs_osd_ioerr {
++	struct pnfs_osd_objid	oer_component;
++	u64			oer_comp_offset;
++	u64			oer_comp_length;
++	u32			oer_iswrite;
++	u32			oer_errno;
++};
++
++static inline unsigned
++pnfs_osd_ioerr_xdr_sz(void)
++{
++	return pnfs_osd_objid_xdr_sz() + 2 + 2 + 1 + 1;
++}
++
++/* OSD XDR API */
++
++/* Layout helpers */
++extern struct pnfs_osd_layout *pnfs_osd_xdr_decode_layout(
++	struct pnfs_osd_layout *layout, u32 *p);
++
++extern int pnfs_osd_xdr_encode_layout(
++	struct exp_xdr_stream *xdr,
++	struct pnfs_osd_layout *layout);
++
++/* Device Info helpers */
++
++/* First pass calculate total size for space needed */
++extern size_t pnfs_osd_xdr_deviceaddr_incore_sz(u32 *p);
++
++/* Note: some strings pointed to inside @deviceaddr might point
++ * to space inside @p. @p should stay valid while @deviceaddr
++ * is in use.
++ * It is assumed that @deviceaddr points to bigger memory of size
++ * calculated in first pass by pnfs_osd_xdr_deviceaddr_incore_sz()
++ */
++extern void pnfs_osd_xdr_decode_deviceaddr(
++	struct pnfs_osd_deviceaddr *deviceaddr, u32 *p);
++
++/* For Servers */
++extern int pnfs_osd_xdr_encode_deviceaddr(
++	struct exp_xdr_stream *xdr, struct pnfs_osd_deviceaddr *devaddr);
++
++/* layoutupdate (layout_commit) xdr helpers */
++extern int
++pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
++				 struct pnfs_osd_layoutupdate *lou);
++extern __be32 *
++pnfs_osd_xdr_decode_layoutupdate(struct pnfs_osd_layoutupdate *lou, __be32 *p);
++
++/* osd_ioerror encoding/decoding (layout_return) */
++extern int
++pnfs_osd_xdr_encode_ioerr(struct xdr_stream *xdr, struct pnfs_osd_ioerr *ioerr);
++extern __be32 *
++pnfs_osd_xdr_decode_ioerr(struct pnfs_osd_ioerr *ioerr, __be32 *p);
++
++#endif /* __PNFS_OSD_XDR_H__ */
+diff -up linux-2.6.34.noarch/include/linux/posix_acl.h.orig linux-2.6.34.noarch/include/linux/posix_acl.h
+--- linux-2.6.34.noarch/include/linux/posix_acl.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/posix_acl.h	2010-09-30 10:17:09.227023000 -0400
+@@ -8,6 +8,7 @@
+ #ifndef __LINUX_POSIX_ACL_H
+ #define __LINUX_POSIX_ACL_H
+ 
++#include <linux/fs.h>
+ #include <linux/slab.h>
+ 
+ #define ACL_UNDEFINED_ID	(-1)
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/msg_prot.h	2010-09-30 10:17:09.233014000 -0400
+@@ -14,6 +14,8 @@
+ /* size of an XDR encoding unit in bytes, i.e. 32bit */
+ #define XDR_UNIT	(4)
+ 
++#include <linux/types.h>
++
+ /* spec defines authentication flavor as an unsigned 32 bit integer */
+ typedef u32	rpc_authflavor_t;
+ 
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/rpc_pipe_fs.h	2010-09-30 10:17:09.238025000 -0400
+@@ -3,6 +3,7 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/fs.h>
+ #include <linux/workqueue.h>
+ 
+ struct rpc_pipe_msg {
+@@ -11,6 +12,10 @@ struct rpc_pipe_msg {
+ 	size_t len;
+ 	size_t copied;
+ 	int errno;
++#define PIPEFS_AUTOFREE_RPCMSG       0x01 /* frees rpc_pipe_msg */
++#define PIPEFS_AUTOFREE_RPCMSG_DATA  0x02 /* frees rpc_pipe_msg->data */
++#define PIPEFS_AUTOFREE_UPCALL_MSG   PIPEFS_AUTOFREE_RPCMSG_DATA
++	u8 flags;
+ };
+ 
+ struct rpc_pipe_ops {
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h.orig	2010-09-30 10:17:09.242015000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/simple_rpc_pipefs.h	2010-09-30 10:17:09.244014000 -0400
+@@ -0,0 +1,111 @@
++/*
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#ifndef _SIMPLE_RPC_PIPEFS_H_
++#define _SIMPLE_RPC_PIPEFS_H_
++
++#include <linux/fs.h>
++#include <linux/list.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/sunrpc/rpc_pipe_fs.h>
++
++
++#define payload_of(headerp)  ((void *)(headerp + 1))
++
++/*
++ * struct pipefs_hdr -- the generic message format for simple_rpc_pipefs.
++ * Messages may simply be the header itself, although having an optional
++ * data payload follow the header allows much more flexibility.
++ *
++ * Messages are created using pipefs_alloc_init_msg() and
++ * pipefs_alloc_init_msg_padded(), both of which accept a pointer to an
++ * (optional) data payload.
++ *
++ * Given a struct pipefs_hdr *msg that has a struct foo payload, the data
++ * can be accessed using: struct foo *foop = payload_of(msg)
++ */
++struct pipefs_hdr {
++	u32 msgid;
++	u8  type;
++	u8  flags;
++	u16 totallen; /* length of entire message, including hdr itself */
++	u32 status;
++};
++
++/*
++ * struct pipefs_list -- a type of list used for tracking callers who've made an
++ * upcall and are blocked waiting for a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_assign_upcall_reply().
++ */
++struct pipefs_list {
++	struct list_head list;
++	spinlock_t list_lock;
++};
++
++
++/* See net/sunrpc/simple_rpc_pipefs.c for more info on using these functions. */
++extern struct dentry *pipefs_mkpipe(const char *name,
++				    const struct rpc_pipe_ops *ops,
++				    int wait_for_open);
++extern void pipefs_closepipe(struct dentry *pipe);
++extern void pipefs_init_list(struct pipefs_list *list);
++extern struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++						void *data, u16 datalen);
++extern struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type,
++						       u8 flags, void *data,
++						       u16 datalen, u16 padlen);
++extern struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++							struct pipefs_hdr *msg,
++							struct pipefs_list
++							*uplist, u8 upflags,
++							u32 timeout);
++extern int pipefs_queue_upcall_noreply(struct dentry *pipe,
++				       struct pipefs_hdr *msg, u8 upflags);
++extern int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++				      struct pipefs_list *uplist);
++extern struct pipefs_hdr *pipefs_readmsg(struct file *filp,
++					 const char __user *src, size_t len);
++extern ssize_t pipefs_generic_upcall(struct file *filp,
++				     struct rpc_pipe_msg *rpcmsg,
++				     char __user *dst, size_t buflen);
++extern void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg);
++
++#endif /* _SIMPLE_RPC_PIPEFS_H_ */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/svc_xprt.h	2010-09-30 10:17:09.249016000 -0400
+@@ -166,4 +166,41 @@ static inline char *__svc_print_addr(con
+ 
+ 	return buf;
+ }
++
++/*
++ * Print a network address in a universal format (see rfc1833 and nfsv4.1)
++ */
++static inline int __svc_print_netaddr(struct sockaddr *addr,
++				      struct xdr_netobj *na)
++{
++	u16 port;
++	ssize_t len;
++
++	switch (addr->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *sin = (struct sockaddr_in *)addr;
++		port = ntohs(sin->sin_port);
++
++		len = snprintf(na->data, na->len, "%pI4.%u.%u",
++				&sin->sin_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
++		port = ntohs(sin6->sin6_port);
++
++		len = snprintf(na->data, na->len, "%pI6.%u.%u",
++				&sin6->sin6_addr,
++				port >> 8, port & 0xff);
++		break;
++	}
++	default:
++		snprintf(na->data, na->len, "unknown address type: %d",
++			 addr->sa_family);
++		len = -EINVAL;
++		break;
++	}
++	return len;
++}
+ #endif /* SUNRPC_SVC_XPRT_H */
+diff -up linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig linux-2.6.34.noarch/include/linux/sunrpc/xdr.h
+--- linux-2.6.34.noarch/include/linux/sunrpc/xdr.h.orig	2010-09-30 10:15:18.029721000 -0400
++++ linux-2.6.34.noarch/include/linux/sunrpc/xdr.h	2010-09-30 10:17:09.254021000 -0400
+@@ -131,6 +131,13 @@ xdr_decode_hyper(__be32 *p, __u64 *valp)
+ 	return p + 2;
+ }
+ 
++static inline __be32 *
++xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len)
++{
++	memcpy(ptr, p, len);
++	return p + XDR_QUADLEN(len);
++}
++
+ /*
+  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
+  */
+@@ -197,6 +204,7 @@ struct xdr_stream {
+ 
+ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
++extern __be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q);
+ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
+ 		unsigned int base, unsigned int len);
+ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+diff -up linux-2.6.34.noarch/net/sunrpc/Makefile.orig linux-2.6.34.noarch/net/sunrpc/Makefile
+--- linux-2.6.34.noarch/net/sunrpc/Makefile.orig	2010-05-16 17:17:36.000000000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/Makefile	2010-09-30 10:17:09.263013000 -0400
+@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprt
+ 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
+ 	    addr.o rpcb_clnt.o timer.o xdr.o \
+ 	    sunrpc_syms.o cache.o rpc_pipe.o \
+-	    svc_xprt.o
++	    svc_xprt.o simple_rpc_pipefs.o
+ sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+ sunrpc-$(CONFIG_PROC_FS) += stats.o
+ sunrpc-$(CONFIG_SYSCTL) += sysctl.o
+diff -up linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c
+--- linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c.orig	2010-09-30 10:17:09.267010000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/simple_rpc_pipefs.c	2010-09-30 10:17:09.268015000 -0400
+@@ -0,0 +1,424 @@
++/*
++ *  net/sunrpc/simple_rpc_pipefs.c
++ *
++ *  Copyright (c) 2008 The Regents of the University of Michigan.
++ *  All rights reserved.
++ *
++ *  David M. Richter <richterd@citi.umich.edu>
++ *
++ *  Drawing on work done by Andy Adamson <andros@citi.umich.edu> and
++ *  Marius Eriksen <marius@monkey.org>.  Thanks for the help over the
++ *  years, guys.
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in the
++ *     documentation and/or other materials provided with the distribution.
++ *  3. Neither the name of the University nor the names of its
++ *     contributors may be used to endorse or promote products derived
++ *     from this software without specific prior written permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  With thanks to CITI's project sponsor and partner, IBM.
++ */
++
++#include <linux/completion.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/sunrpc/simple_rpc_pipefs.h>
++
++
++/*
++ * Make an rpc_pipefs pipe named @name at the root of the mounted rpc_pipefs
++ * filesystem.
++ *
++ * If @wait_for_open is non-zero and an upcall is later queued but the userland
++ * end of the pipe has not yet been opened, the upcall will remain queued until
++ * the pipe is opened; otherwise, the upcall queueing will return with -EPIPE.
++ */
++struct dentry *pipefs_mkpipe(const char *name, const struct rpc_pipe_ops *ops,
++			     int wait_for_open)
++{
++	struct dentry *dir, *pipe;
++	struct vfsmount *mnt;
++
++	mnt = rpc_get_mount();
++	if (IS_ERR(mnt)) {
++		pipe = ERR_CAST(mnt);
++		goto out;
++	}
++	dir = mnt->mnt_root;
++	if (!dir) {
++		pipe = ERR_PTR(-ENOENT);
++		goto out;
++	}
++	pipe = rpc_mkpipe(dir, name, NULL, ops,
++			  wait_for_open ? RPC_PIPE_WAIT_FOR_OPEN : 0);
++out:
++	return pipe;
++}
++EXPORT_SYMBOL(pipefs_mkpipe);
++
++/*
++ * Shutdown a pipe made by pipefs_mkpipe().
++ * XXX: do we need to retain an extra reference on the mount?
++ */
++void pipefs_closepipe(struct dentry *pipe)
++{
++	rpc_unlink(pipe);
++	rpc_put_mount();
++}
++EXPORT_SYMBOL(pipefs_closepipe);
++
++/*
++ * Initialize a struct pipefs_list -- which are a way to keep track of callers
++ * who're blocked having made an upcall and are awaiting a reply.
++ *
++ * See pipefs_queue_upcall_waitreply() and pipefs_find_upcall_msgid() for how
++ * to use them.
++ */
++inline void pipefs_init_list(struct pipefs_list *list)
++{
++	INIT_LIST_HEAD(&list->list);
++	spin_lock_init(&list->list_lock);
++}
++EXPORT_SYMBOL(pipefs_init_list);
++
++/*
++ * Alloc/init a generic pipefs message header and copy into its message body
++ * an arbitrary data payload.
++ *
++ * struct pipefs_hdr's are meant to serve as generic, general-purpose message
++ * headers for easy rpc_pipefs I/O.  When an upcall is made, the
++ * struct pipefs_hdr is assigned to a struct rpc_pipe_msg and delivered
++ * therein.  --And yes, the naming can seem a little confusing at first:
++ *
++ * When one thinks of an upcall "message", in simple_rpc_pipefs that's a
++ * struct pipefs_hdr (possibly with an attached message body).  A
++ * struct rpc_pipe_msg is actually only the -vehicle- by which the "real"
++ * message is delivered and processed.
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg_padded(u32 msgid, u8 type, u8 flags,
++					   void *data, u16 datalen, u16 padlen)
++{
++	u16 totallen;
++	struct pipefs_hdr *msg = NULL;
++
++	totallen = sizeof(*msg) + datalen + padlen;
++	if (totallen > PAGE_SIZE) {
++		msg = ERR_PTR(-E2BIG);
++		goto out;
++	}
++
++	msg = kzalloc(totallen, GFP_KERNEL);
++	if (!msg) {
++		msg = ERR_PTR(-ENOMEM);
++		goto out;
++	}
++
++	msg->msgid = msgid;
++	msg->type = type;
++	msg->flags = flags;
++	msg->totallen = totallen;
++	memcpy(payload_of(msg), data, datalen);
++out:
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg_padded);
++
++/*
++ * See the description of pipefs_alloc_init_msg_padded().
++ */
++struct pipefs_hdr *pipefs_alloc_init_msg(u32 msgid, u8 type, u8 flags,
++				    void *data, u16 datalen)
++{
++	return pipefs_alloc_init_msg_padded(msgid, type, flags, data,
++					    datalen, 0);
++}
++EXPORT_SYMBOL(pipefs_alloc_init_msg);
++
++
++static void pipefs_init_rpcmsg(struct rpc_pipe_msg *rpcmsg,
++			       struct pipefs_hdr *msg, u8 upflags)
++{
++	memset(rpcmsg, 0, sizeof(*rpcmsg));
++	rpcmsg->data = msg;
++	rpcmsg->len = msg->totallen;
++	rpcmsg->flags = upflags;
++}
++
++static struct rpc_pipe_msg *pipefs_alloc_init_rpcmsg(struct pipefs_hdr *msg,
++						     u8 upflags)
++{
++	struct rpc_pipe_msg *rpcmsg;
++
++	rpcmsg = kmalloc(sizeof(*rpcmsg), GFP_KERNEL);
++	if (!rpcmsg)
++		return ERR_PTR(-ENOMEM);
++
++	pipefs_init_rpcmsg(rpcmsg, msg, upflags);
++	return rpcmsg;
++}
++
++
++/* represents an upcall that'll block and wait for a reply */
++struct pipefs_upcall {
++	u32 msgid;
++	struct rpc_pipe_msg rpcmsg;
++	struct list_head list;
++	wait_queue_head_t waitq;
++	struct pipefs_hdr *reply;
++};
++
++
++static void pipefs_init_upcall_waitreply(struct pipefs_upcall *upcall,
++					 struct pipefs_hdr *msg, u8 upflags)
++{
++	upcall->reply = NULL;
++	upcall->msgid = msg->msgid;
++	INIT_LIST_HEAD(&upcall->list);
++	init_waitqueue_head(&upcall->waitq);
++	pipefs_init_rpcmsg(&upcall->rpcmsg, msg, upflags);
++}
++
++static int __pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					   struct pipefs_upcall *upcall,
++					   struct pipefs_list *uplist,
++					   u32 timeout)
++{
++	int err = 0;
++	DECLARE_WAITQUEUE(wq, current);
++
++	add_wait_queue(&upcall->waitq, &wq);
++	spin_lock(&uplist->list_lock);
++	list_add(&upcall->list, &uplist->list);
++	spin_unlock(&uplist->list_lock);
++
++	err = rpc_queue_upcall(pipe->d_inode, &upcall->rpcmsg);
++	if (err < 0)
++		goto out;
++
++	if (timeout) {
++		/* retval of 0 means timer expired */
++		err = schedule_timeout_uninterruptible(timeout);
++		if (err == 0 && upcall->reply == NULL)
++			err = -ETIMEDOUT;
++	} else {
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		schedule();
++		__set_current_state(TASK_RUNNING);
++	}
++
++out:
++	spin_lock(&uplist->list_lock);
++	list_del_init(&upcall->list);
++	spin_unlock(&uplist->list_lock);
++	remove_wait_queue(&upcall->waitq, &wq);
++	return err;
++}
++
++/*
++ * Queue a pipefs msg for an upcall to userspace, place the calling thread
++ * on @uplist, and block the thread to wait for a reply.  If @timeout is
++ * nonzero, the thread will be blocked for at most @timeout jiffies.
++ *
++ * (To convert time units into jiffies, consider the functions
++ *  msecs_to_jiffies(), usecs_to_jiffies(), timeval_to_jiffies(), and
++ *  timespec_to_jiffies().)
++ *
++ * Once a reply is received by your downcall handler, call
++ * pipefs_assign_upcall_reply() with @uplist to find the corresponding upcall,
++ * assign the reply, and wake the waiting thread.
++ *
++ * This function's return value pointer may be an error and should be checked
++ * with IS_ERR() before attempting to access the reply message.
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++struct pipefs_hdr *pipefs_queue_upcall_waitreply(struct dentry *pipe,
++					    struct pipefs_hdr *msg,
++					    struct pipefs_list *uplist,
++					    u8 upflags, u32 timeout)
++{
++	int err = 0;
++	struct pipefs_upcall upcall;
++
++	pipefs_init_upcall_waitreply(&upcall, msg, upflags);
++	err = __pipefs_queue_upcall_waitreply(pipe, &upcall, uplist, timeout);
++	if (err < 0) {
++		kfree(upcall.reply);
++		upcall.reply = ERR_PTR(err);
++	}
++
++	return upcall.reply;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_waitreply);
++
++/*
++ * Queue a pipefs msg for an upcall to userspace and immediately return (i.e.,
++ * no reply is expected).
++ *
++ * Callers are responsible for freeing @msg, unless pipefs_generic_destroy_msg()
++ * is used as the ->destroy_msg() callback and the PIPEFS_AUTOFREE_UPCALL_MSG
++ * flag is set in @upflags.  See also rpc_pipe_fs.h.
++ */
++int pipefs_queue_upcall_noreply(struct dentry *pipe, struct pipefs_hdr *msg,
++				u8 upflags)
++{
++	int err = 0;
++	struct rpc_pipe_msg *rpcmsg;
++
++	upflags |= PIPEFS_AUTOFREE_RPCMSG;
++	rpcmsg = pipefs_alloc_init_rpcmsg(msg, upflags);
++	if (IS_ERR(rpcmsg)) {
++		err = PTR_ERR(rpcmsg);
++		goto out;
++	}
++	err = rpc_queue_upcall(pipe->d_inode, rpcmsg);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_queue_upcall_noreply);
++
++
++static struct pipefs_upcall *pipefs_find_upcall_msgid(u32 msgid,
++						 struct pipefs_list *uplist)
++{
++	struct pipefs_upcall *upcall;
++
++	spin_lock(&uplist->list_lock);
++	list_for_each_entry(upcall, &uplist->list, list)
++		if (upcall->msgid == msgid)
++			goto out;
++	upcall = NULL;
++out:
++	spin_unlock(&uplist->list_lock);
++	return upcall;
++}
++
++/*
++ * In your rpc_pipe_ops->downcall() handler, once you've read in a downcall
++ * message and have determined that it is a reply to a waiting upcall,
++ * you can use this function to find the appropriate upcall, assign the result,
++ * and wake the upcall thread.
++ *
++ * The reply message must have the same msgid as the original upcall message's.
++ *
++ * See also pipefs_queue_upcall_waitreply() and pipefs_readmsg().
++ */
++int pipefs_assign_upcall_reply(struct pipefs_hdr *reply,
++			       struct pipefs_list *uplist)
++{
++	int err = 0;
++	struct pipefs_upcall *upcall;
++
++	upcall = pipefs_find_upcall_msgid(reply->msgid, uplist);
++	if (!upcall) {
++		printk(KERN_ERR "%s: ERROR: have reply but no matching upcall "
++			"for msgid %d\n", __func__, reply->msgid);
++		err = -ENOENT;
++		goto out;
++	}
++	upcall->reply = reply;
++	wake_up(&upcall->waitq);
++out:
++	return err;
++}
++EXPORT_SYMBOL(pipefs_assign_upcall_reply);
++
++/*
++ * Generic method to read-in and return a newly-allocated message which begins
++ * with a struct pipefs_hdr.
++ */
++struct pipefs_hdr *pipefs_readmsg(struct file *filp, const char __user *src,
++			     size_t len)
++{
++	int err = 0, hdrsize;
++	struct pipefs_hdr *msg = NULL;
++
++	hdrsize = sizeof(*msg);
++	if (len < hdrsize) {
++		printk(KERN_ERR "%s: ERROR: header is too short (%d vs %d)\n",
++		       __func__, (int) len, hdrsize);
++		err = -EINVAL;
++		goto out;
++	}
++
++	msg = kzalloc(len, GFP_KERNEL);
++	if (!msg) {
++		err = -ENOMEM;
++		goto out;
++	}
++	if (copy_from_user(msg, src, len))
++		err = -EFAULT;
++out:
++	if (err) {
++		kfree(msg);
++		msg = ERR_PTR(err);
++	}
++	return msg;
++}
++EXPORT_SYMBOL(pipefs_readmsg);
++
++/*
++ * Generic rpc_pipe_ops->upcall() handler implementation.
++ *
++ * Don't call this directly: to make an upcall, use
++ * pipefs_queue_upcall_waitreply() or pipefs_queue_upcall_noreply().
++ */
++ssize_t pipefs_generic_upcall(struct file *filp, struct rpc_pipe_msg *rpcmsg,
++			      char __user *dst, size_t buflen)
++{
++	char *data;
++	ssize_t len, left;
++
++	data = (char *)rpcmsg->data + rpcmsg->copied;
++	len = rpcmsg->len - rpcmsg->copied;
++	if (len > buflen)
++		len = buflen;
++
++	left = copy_to_user(dst, data, len);
++	if (left < 0) {
++		rpcmsg->errno = left;
++		return left;
++	}
++
++	len -= left;
++	rpcmsg->copied += len;
++	rpcmsg->errno = 0;
++	return len;
++}
++EXPORT_SYMBOL(pipefs_generic_upcall);
++
++/*
++ * Generic rpc_pipe_ops->destroy_msg() handler implementation.
++ *
++ * Items are only freed if @rpcmsg->flags has been set appropriately.
++ * See pipefs_queue_upcall_noreply() and rpc_pipe_fs.h.
++ */
++void pipefs_generic_destroy_msg(struct rpc_pipe_msg *rpcmsg)
++{
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_UPCALL_MSG)
++		kfree(rpcmsg->data);
++	if (rpcmsg->flags & PIPEFS_AUTOFREE_RPCMSG)
++		kfree(rpcmsg);
++}
++EXPORT_SYMBOL(pipefs_generic_destroy_msg);
+diff -up linux-2.6.34.noarch/net/sunrpc/xdr.c.orig linux-2.6.34.noarch/net/sunrpc/xdr.c
+--- linux-2.6.34.noarch/net/sunrpc/xdr.c.orig	2010-09-30 10:15:18.189725000 -0400
++++ linux-2.6.34.noarch/net/sunrpc/xdr.c	2010-09-30 10:17:09.274010000 -0400
+@@ -395,24 +395,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, 
+ {
+ 	struct kvec *tail;
+ 	size_t copy;
+-	char *p;
+ 	unsigned int pglen = buf->page_len;
++	unsigned int tailbuf_len;
+ 
+ 	tail = buf->tail;
+ 	BUG_ON (len > pglen);
+ 
++	tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
++
+ 	/* Shift the tail first */
+-	if (tail->iov_len != 0) {
+-		p = (char *)tail->iov_base + len;
++	if (tailbuf_len != 0) {
++		unsigned int free_space = tailbuf_len - tail->iov_len;
++
++		if (len < free_space)
++			free_space = len;
++		tail->iov_len += free_space;
++
++		copy = len;
+ 		if (tail->iov_len > len) {
+-			copy = tail->iov_len - len;
+-			memmove(p, tail->iov_base, copy);
++			char *p = (char *)tail->iov_base + len;
++			memmove(p, tail->iov_base, tail->iov_len - len);
+ 		} else
+-			buf->buflen -= len;
+-		/* Copy from the inlined pages into the tail */
+-		copy = len;
+-		if (copy > tail->iov_len)
+ 			copy = tail->iov_len;
++		/* Copy from the inlined pages into the tail */
+ 		_copy_from_pages((char *)tail->iov_base,
+ 				buf->pages, buf->page_base + pglen - len,
+ 				copy);
+@@ -496,6 +501,27 @@ __be32 * xdr_reserve_space(struct xdr_st
+ EXPORT_SYMBOL_GPL(xdr_reserve_space);
+ 
+ /**
++ * xdr_rewind_stream - rewind a stream back to some checkpoint
++ * @xdr: pointer to xdr_stream
++ * @q: some checkpoint at historical place of @xdr
++ *
++ * Restors an xdr stream to some historical point. @q must be
++ * a logical xdr point in the past that was sampled by @q = @xdr->p.
++ */
++__be32 *xdr_rewind_stream(struct xdr_stream *xdr, __be32 *q)
++{
++	size_t nbytes = (xdr->p - q) << 2;
++
++	BUG_ON(xdr->p < q);
++	BUG_ON(nbytes > xdr->iov->iov_len || nbytes > xdr->buf->len);
++	xdr->p = q;
++	xdr->iov->iov_len -= nbytes;
++	xdr->buf->len -= nbytes;
++	return q;
++}
++EXPORT_SYMBOL_GPL(xdr_rewind_stream);
++
++/**
+  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
+  * @xdr: pointer to xdr_stream
+  * @pages: list of pages